comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 0:a61af66fc99e jdk7-b24

Initial load
author duke
date Sat, 01 Dec 2007 00:00:00 +0000
parents
children f8236e79048a
comparison
equal deleted inserted replaced
-1:000000000000 0:a61af66fc99e
1 /*
2 * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
24
25 #include "incls/_precompiled.incl"
26 #include "incls/_stubGenerator_sparc.cpp.incl"
27
28 // Declaration and definition of StubGenerator (no .hpp file).
29 // For a more detailed description of the stub routine structure
30 // see the comment in stubRoutines.hpp.
31
32 #define __ _masm->
33
34 #ifdef PRODUCT
35 #define BLOCK_COMMENT(str) /* nothing */
36 #else
37 #define BLOCK_COMMENT(str) __ block_comment(str)
38 #endif
39
40 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
41
42 // Note: The register L7 is used as L7_thread_cache, and may not be used
43 // any other way within this module.
44
45
46 static const Register& Lstub_temp = L2;
47
48 // -------------------------------------------------------------------------------------------------------------------------
49 // Stub Code definitions
50
51 static address handle_unsafe_access() {
52 JavaThread* thread = JavaThread::current();
53 address pc = thread->saved_exception_pc();
54 address npc = thread->saved_exception_npc();
55 // pc is the instruction which we must emulate
56 // doing a no-op is fine: return garbage from the load
57
58 // request an async exception
59 thread->set_pending_unsafe_access_error();
60
61 // return address of next instruction to execute
62 return npc;
63 }
64
65 class StubGenerator: public StubCodeGenerator {
66 private:
67
68 #ifdef PRODUCT
69 #define inc_counter_np(a,b,c) (0)
70 #else
71 void inc_counter_np_(int& counter, Register t1, Register t2) {
72 Address counter_addr(t2, (address) &counter);
73 __ sethi(counter_addr);
74 __ ld(counter_addr, t1);
75 __ inc(t1);
76 __ st(t1, counter_addr);
77 }
78 #define inc_counter_np(counter, t1, t2) \
79 BLOCK_COMMENT("inc_counter " #counter); \
80 inc_counter_np_(counter, t1, t2);
81 #endif
82
83 //----------------------------------------------------------------------------------------------------
84 // Call stubs are used to call Java from C
85
86 address generate_call_stub(address& return_pc) {
87 StubCodeMark mark(this, "StubRoutines", "call_stub");
88 address start = __ pc();
89
90 // Incoming arguments:
91 //
92 // o0 : call wrapper address
93 // o1 : result (address)
94 // o2 : result type
95 // o3 : method
96 // o4 : (interpreter) entry point
97 // o5 : parameters (address)
98 // [sp + 0x5c]: parameter size (in words)
99 // [sp + 0x60]: thread
100 //
101 // +---------------+ <--- sp + 0
102 // | |
103 // . reg save area .
104 // | |
105 // +---------------+ <--- sp + 0x40
106 // | |
107 // . extra 7 slots .
108 // | |
109 // +---------------+ <--- sp + 0x5c
110 // | param. size |
111 // +---------------+ <--- sp + 0x60
112 // | thread |
113 // +---------------+
114 // | |
115
116 // note: if the link argument position changes, adjust
117 // the code in frame::entry_frame_call_wrapper()
118
119 const Argument link = Argument(0, false); // used only for GC
120 const Argument result = Argument(1, false);
121 const Argument result_type = Argument(2, false);
122 const Argument method = Argument(3, false);
123 const Argument entry_point = Argument(4, false);
124 const Argument parameters = Argument(5, false);
125 const Argument parameter_size = Argument(6, false);
126 const Argument thread = Argument(7, false);
127
128 // setup thread register
129 __ ld_ptr(thread.as_address(), G2_thread);
130
131 #ifdef ASSERT
132 // make sure we have no pending exceptions
133 { const Register t = G3_scratch;
134 Label L;
135 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
136 __ br_null(t, false, Assembler::pt, L);
137 __ delayed()->nop();
138 __ stop("StubRoutines::call_stub: entered with pending exception");
139 __ bind(L);
140 }
141 #endif
142
143 // create activation frame & allocate space for parameters
144 { const Register t = G3_scratch;
145 __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words)
146 __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words)
147 __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words)
148 __ sll(t, Interpreter::logStackElementSize(), t); // compute number of bytes
149 __ neg(t); // negate so it can be used with save
150 __ save(SP, t, SP); // setup new frame
151 }
152
153 // +---------------+ <--- sp + 0
154 // | |
155 // . reg save area .
156 // | |
157 // +---------------+ <--- sp + 0x40
158 // | |
159 // . extra 7 slots .
160 // | |
161 // +---------------+ <--- sp + 0x5c
162 // | empty slot | (only if parameter size is even)
163 // +---------------+
164 // | |
165 // . parameters .
166 // | |
167 // +---------------+ <--- fp + 0
168 // | |
169 // . reg save area .
170 // | |
171 // +---------------+ <--- fp + 0x40
172 // | |
173 // . extra 7 slots .
174 // | |
175 // +---------------+ <--- fp + 0x5c
176 // | param. size |
177 // +---------------+ <--- fp + 0x60
178 // | thread |
179 // +---------------+
180 // | |
181
182 // pass parameters if any
183 BLOCK_COMMENT("pass parameters if any");
184 { const Register src = parameters.as_in().as_register();
185 const Register dst = Lentry_args;
186 const Register tmp = G3_scratch;
187 const Register cnt = G4_scratch;
188
189 // test if any parameters & setup of Lentry_args
190 Label exit;
191 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter
192 __ add( FP, STACK_BIAS, dst );
193 __ tst(cnt);
194 __ br(Assembler::zero, false, Assembler::pn, exit);
195 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args
196
197 // copy parameters if any
198 Label loop;
199 __ BIND(loop);
200 // Store tag first.
201 if (TaggedStackInterpreter) {
202 __ ld_ptr(src, 0, tmp);
203 __ add(src, BytesPerWord, src); // get next
204 __ st_ptr(tmp, dst, Interpreter::tag_offset_in_bytes());
205 }
206 // Store parameter value
207 __ ld_ptr(src, 0, tmp);
208 __ add(src, BytesPerWord, src);
209 __ st_ptr(tmp, dst, Interpreter::value_offset_in_bytes());
210 __ deccc(cnt);
211 __ br(Assembler::greater, false, Assembler::pt, loop);
212 __ delayed()->sub(dst, Interpreter::stackElementSize(), dst);
213
214 // done
215 __ BIND(exit);
216 }
217
218 // setup parameters, method & call Java function
219 #ifdef ASSERT
220 // layout_activation_impl checks it's notion of saved SP against
221 // this register, so if this changes update it as well.
222 const Register saved_SP = Lscratch;
223 __ mov(SP, saved_SP); // keep track of SP before call
224 #endif
225
226 // setup parameters
227 const Register t = G3_scratch;
228 __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words)
229 __ sll(t, Interpreter::logStackElementSize(), t); // compute number of bytes
230 __ sub(FP, t, Gargs); // setup parameter pointer
231 #ifdef _LP64
232 __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias
233 #endif
234 __ mov(SP, O5_savedSP);
235
236
237 // do the call
238 //
239 // the following register must be setup:
240 //
241 // G2_thread
242 // G5_method
243 // Gargs
244 BLOCK_COMMENT("call Java function");
245 __ jmpl(entry_point.as_in().as_register(), G0, O7);
246 __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method
247
248 BLOCK_COMMENT("call_stub_return_address:");
249 return_pc = __ pc();
250
251 // The callee, if it wasn't interpreted, can return with SP changed so
252 // we can no longer assert of change of SP.
253
254 // store result depending on type
255 // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
256 // is treated as T_INT)
257 { const Register addr = result .as_in().as_register();
258 const Register type = result_type.as_in().as_register();
259 Label is_long, is_float, is_double, is_object, exit;
260 __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object);
261 __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float);
262 __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double);
263 __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long);
264 __ delayed()->nop();
265
266 // store int result
267 __ st(O0, addr, G0);
268
269 __ BIND(exit);
270 __ ret();
271 __ delayed()->restore();
272
273 __ BIND(is_object);
274 __ ba(false, exit);
275 __ delayed()->st_ptr(O0, addr, G0);
276
277 __ BIND(is_float);
278 __ ba(false, exit);
279 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
280
281 __ BIND(is_double);
282 __ ba(false, exit);
283 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
284
285 __ BIND(is_long);
286 #ifdef _LP64
287 __ ba(false, exit);
288 __ delayed()->st_long(O0, addr, G0); // store entire long
289 #else
290 #if defined(COMPILER2)
291 // All return values are where we want them, except for Longs. C2 returns
292 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
293 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
294 // build we simply always use G1.
295 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
296 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
297 // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
298
299 __ ba(false, exit);
300 __ delayed()->stx(G1, addr, G0); // store entire long
301 #else
302 __ st(O1, addr, BytesPerInt);
303 __ ba(false, exit);
304 __ delayed()->st(O0, addr, G0);
305 #endif /* COMPILER2 */
306 #endif /* _LP64 */
307 }
308 return start;
309 }
310
311
312 //----------------------------------------------------------------------------------------------------
313 // Return point for a Java call if there's an exception thrown in Java code.
314 // The exception is caught and transformed into a pending exception stored in
315 // JavaThread that can be tested from within the VM.
316 //
317 // Oexception: exception oop
318
319 address generate_catch_exception() {
320 StubCodeMark mark(this, "StubRoutines", "catch_exception");
321
322 address start = __ pc();
323 // verify that thread corresponds
324 __ verify_thread();
325
326 const Register& temp_reg = Gtemp;
327 Address pending_exception_addr (G2_thread, 0, in_bytes(Thread::pending_exception_offset()));
328 Address exception_file_offset_addr(G2_thread, 0, in_bytes(Thread::exception_file_offset ()));
329 Address exception_line_offset_addr(G2_thread, 0, in_bytes(Thread::exception_line_offset ()));
330
331 // set pending exception
332 __ verify_oop(Oexception);
333 __ st_ptr(Oexception, pending_exception_addr);
334 __ set((intptr_t)__FILE__, temp_reg);
335 __ st_ptr(temp_reg, exception_file_offset_addr);
336 __ set((intptr_t)__LINE__, temp_reg);
337 __ st(temp_reg, exception_line_offset_addr);
338
339 // complete return to VM
340 assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before");
341
342 Address stub_ret(temp_reg, StubRoutines::_call_stub_return_address);
343 __ jump_to(stub_ret);
344 __ delayed()->nop();
345
346 return start;
347 }
348
349
350 //----------------------------------------------------------------------------------------------------
351 // Continuation point for runtime calls returning with a pending exception
352 // The pending exception check happened in the runtime or native call stub
353 // The pending exception in Thread is converted into a Java-level exception
354 //
355 // Contract with Java-level exception handler: O0 = exception
356 // O1 = throwing pc
357
358 address generate_forward_exception() {
359 StubCodeMark mark(this, "StubRoutines", "forward_exception");
360 address start = __ pc();
361
362 // Upon entry, O7 has the return address returning into Java
363 // (interpreted or compiled) code; i.e. the return address
364 // becomes the throwing pc.
365
366 const Register& handler_reg = Gtemp;
367
368 Address exception_addr (G2_thread, 0, in_bytes(Thread::pending_exception_offset()));
369
370 #ifdef ASSERT
371 // make sure that this code is only executed if there is a pending exception
372 { Label L;
373 __ ld_ptr(exception_addr, Gtemp);
374 __ br_notnull(Gtemp, false, Assembler::pt, L);
375 __ delayed()->nop();
376 __ stop("StubRoutines::forward exception: no pending exception (1)");
377 __ bind(L);
378 }
379 #endif
380
381 // compute exception handler into handler_reg
382 __ get_thread();
383 __ ld_ptr(exception_addr, Oexception);
384 __ verify_oop(Oexception);
385 __ save_frame(0); // compensates for compiler weakness
386 __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC
387 BLOCK_COMMENT("call exception_handler_for_return_address");
388 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), Lscratch);
389 __ mov(O0, handler_reg);
390 __ restore(); // compensates for compiler weakness
391
392 __ ld_ptr(exception_addr, Oexception);
393 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
394
395 #ifdef ASSERT
396 // make sure exception is set
397 { Label L;
398 __ br_notnull(Oexception, false, Assembler::pt, L);
399 __ delayed()->nop();
400 __ stop("StubRoutines::forward exception: no pending exception (2)");
401 __ bind(L);
402 }
403 #endif
404 // jump to exception handler
405 __ jmp(handler_reg, 0);
406 // clear pending exception
407 __ delayed()->st_ptr(G0, exception_addr);
408
409 return start;
410 }
411
412
413 //------------------------------------------------------------------------------------------------------------------------
414 // Continuation point for throwing of implicit exceptions that are not handled in
415 // the current activation. Fabricates an exception oop and initiates normal
416 // exception dispatching in this frame. Only callee-saved registers are preserved
417 // (through the normal register window / RegisterMap handling).
418 // If the compiler needs all registers to be preserved between the fault
419 // point and the exception handler then it must assume responsibility for that in
420 // AbstractCompiler::continuation_for_implicit_null_exception or
421 // continuation_for_implicit_division_by_zero_exception. All other implicit
422 // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
423 // either at call sites or otherwise assume that stack unwinding will be initiated,
424 // so caller saved registers were assumed volatile in the compiler.
425
426 // Note that we generate only this stub into a RuntimeStub, because it needs to be
427 // properly traversed and ignored during GC, so we change the meaning of the "__"
428 // macro within this method.
429 #undef __
430 #define __ masm->
431
432 address generate_throw_exception(const char* name, address runtime_entry, bool restore_saved_exception_pc) {
433 #ifdef ASSERT
434 int insts_size = VerifyThread ? 1 * K : 600;
435 #else
436 int insts_size = VerifyThread ? 1 * K : 256;
437 #endif /* ASSERT */
438 int locs_size = 32;
439
440 CodeBuffer code(name, insts_size, locs_size);
441 MacroAssembler* masm = new MacroAssembler(&code);
442
443 __ verify_thread();
444
445 // This is an inlined and slightly modified version of call_VM
446 // which has the ability to fetch the return PC out of thread-local storage
447 __ assert_not_delayed();
448
449 // Note that we always push a frame because on the SPARC
450 // architecture, for all of our implicit exception kinds at call
451 // sites, the implicit exception is taken before the callee frame
452 // is pushed.
453 __ save_frame(0);
454
455 int frame_complete = __ offset();
456
457 if (restore_saved_exception_pc) {
458 Address saved_exception_pc(G2_thread, 0, in_bytes(JavaThread::saved_exception_pc_offset()));
459 __ ld_ptr(saved_exception_pc, I7);
460 __ sub(I7, frame::pc_return_offset, I7);
461 }
462
463 // Note that we always have a runtime stub frame on the top of stack by this point
464 Register last_java_sp = SP;
465 // 64-bit last_java_sp is biased!
466 __ set_last_Java_frame(last_java_sp, G0);
467 if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early
468 __ save_thread(noreg);
469 // do the call
470 BLOCK_COMMENT("call runtime_entry");
471 __ call(runtime_entry, relocInfo::runtime_call_type);
472 if (!VerifyThread)
473 __ delayed()->mov(G2_thread, O0); // pass thread as first argument
474 else
475 __ delayed()->nop(); // (thread already passed)
476 __ restore_thread(noreg);
477 __ reset_last_Java_frame();
478
479 // check for pending exceptions. use Gtemp as scratch register.
480 #ifdef ASSERT
481 Label L;
482
483 Address exception_addr(G2_thread, 0, in_bytes(Thread::pending_exception_offset()));
484 Register scratch_reg = Gtemp;
485 __ ld_ptr(exception_addr, scratch_reg);
486 __ br_notnull(scratch_reg, false, Assembler::pt, L);
487 __ delayed()->nop();
488 __ should_not_reach_here();
489 __ bind(L);
490 #endif // ASSERT
491 BLOCK_COMMENT("call forward_exception_entry");
492 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
493 // we use O7 linkage so that forward_exception_entry has the issuing PC
494 __ delayed()->restore();
495
496 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
497 return stub->entry_point();
498 }
499
500 #undef __
501 #define __ _masm->
502
503
504 // Generate a routine that sets all the registers so we
505 // can tell if the stop routine prints them correctly.
506 address generate_test_stop() {
507 StubCodeMark mark(this, "StubRoutines", "test_stop");
508 address start = __ pc();
509
510 int i;
511
512 __ save_frame(0);
513
514 static jfloat zero = 0.0, one = 1.0;
515
516 // put addr in L0, then load through L0 to F0
517 __ set((intptr_t)&zero, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F0);
518 __ set((intptr_t)&one, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F1); // 1.0 to F1
519
520 // use add to put 2..18 in F2..F18
521 for ( i = 2; i <= 18; ++i ) {
522 __ fadd( FloatRegisterImpl::S, F1, as_FloatRegister(i-1), as_FloatRegister(i));
523 }
524
525 // Now put double 2 in F16, double 18 in F18
526 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F2, F16 );
527 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F18, F18 );
528
529 // use add to put 20..32 in F20..F32
530 for (i = 20; i < 32; i += 2) {
531 __ fadd( FloatRegisterImpl::D, F16, as_FloatRegister(i-2), as_FloatRegister(i));
532 }
533
534 // put 0..7 in i's, 8..15 in l's, 16..23 in o's, 24..31 in g's
535 for ( i = 0; i < 8; ++i ) {
536 if (i < 6) {
537 __ set( i, as_iRegister(i));
538 __ set(16 + i, as_oRegister(i));
539 __ set(24 + i, as_gRegister(i));
540 }
541 __ set( 8 + i, as_lRegister(i));
542 }
543
544 __ stop("testing stop");
545
546
547 __ ret();
548 __ delayed()->restore();
549
550 return start;
551 }
552
553
554 address generate_stop_subroutine() {
555 StubCodeMark mark(this, "StubRoutines", "stop_subroutine");
556 address start = __ pc();
557
558 __ stop_subroutine();
559
560 return start;
561 }
562
563 address generate_flush_callers_register_windows() {
564 StubCodeMark mark(this, "StubRoutines", "flush_callers_register_windows");
565 address start = __ pc();
566
567 __ flush_windows();
568 __ retl(false);
569 __ delayed()->add( FP, STACK_BIAS, O0 );
570 // The returned value must be a stack pointer whose register save area
571 // is flushed, and will stay flushed while the caller executes.
572
573 return start;
574 }
575
576 // Helper functions for v8 atomic operations.
577 //
578 void get_v8_oop_lock_ptr(Register lock_ptr_reg, Register mark_oop_reg, Register scratch_reg) {
579 if (mark_oop_reg == noreg) {
580 address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr();
581 __ set((intptr_t)lock_ptr, lock_ptr_reg);
582 } else {
583 assert(scratch_reg != noreg, "just checking");
584 address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache;
585 __ set((intptr_t)lock_ptr, lock_ptr_reg);
586 __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg);
587 __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg);
588 }
589 }
590
591 void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
592
593 get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg);
594 __ set(StubRoutines::Sparc::locked, lock_reg);
595 // Initialize yield counter
596 __ mov(G0,yield_reg);
597
598 __ BIND(retry);
599 __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount);
600 __ br(Assembler::less, false, Assembler::pt, dontyield);
601 __ delayed()->nop();
602
603 // This code can only be called from inside the VM, this
604 // stub is only invoked from Atomic::add(). We do not
605 // want to use call_VM, because _last_java_sp and such
606 // must already be set.
607 //
608 // Save the regs and make space for a C call
609 __ save(SP, -96, SP);
610 __ save_all_globals_into_locals();
611 BLOCK_COMMENT("call os::naked_sleep");
612 __ call(CAST_FROM_FN_PTR(address, os::naked_sleep));
613 __ delayed()->nop();
614 __ restore_globals_from_locals();
615 __ restore();
616 // reset the counter
617 __ mov(G0,yield_reg);
618
619 __ BIND(dontyield);
620
621 // try to get lock
622 __ swap(lock_ptr_reg, 0, lock_reg);
623
624 // did we get the lock?
625 __ cmp(lock_reg, StubRoutines::Sparc::unlocked);
626 __ br(Assembler::notEqual, true, Assembler::pn, retry);
627 __ delayed()->add(yield_reg,1,yield_reg);
628
629 // yes, got lock. do the operation here.
630 }
631
632 void generate_v8_lock_epilogue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
633 __ st(lock_reg, lock_ptr_reg, 0); // unlock
634 }
635
636 // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest).
637 //
638 // Arguments :
639 //
640 // exchange_value: O0
641 // dest: O1
642 //
643 // Results:
644 //
645 // O0: the value previously stored in dest
646 //
647 address generate_atomic_xchg() {
648 StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
649 address start = __ pc();
650
651 if (UseCASForSwap) {
652 // Use CAS instead of swap, just in case the MP hardware
653 // prefers to work with just one kind of synch. instruction.
654 Label retry;
655 __ BIND(retry);
656 __ mov(O0, O3); // scratch copy of exchange value
657 __ ld(O1, 0, O2); // observe the previous value
658 // try to replace O2 with O3
659 __ cas_under_lock(O1, O2, O3,
660 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
661 __ cmp(O2, O3);
662 __ br(Assembler::notEqual, false, Assembler::pn, retry);
663 __ delayed()->nop();
664
665 __ retl(false);
666 __ delayed()->mov(O2, O0); // report previous value to caller
667
668 } else {
669 if (VM_Version::v9_instructions_work()) {
670 __ retl(false);
671 __ delayed()->swap(O1, 0, O0);
672 } else {
673 const Register& lock_reg = O2;
674 const Register& lock_ptr_reg = O3;
675 const Register& yield_reg = O4;
676
677 Label retry;
678 Label dontyield;
679
680 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
681 // got the lock, do the swap
682 __ swap(O1, 0, O0);
683
684 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
685 __ retl(false);
686 __ delayed()->nop();
687 }
688 }
689
690 return start;
691 }
692
693
694 // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value)
695 //
696 // Arguments :
697 //
698 // exchange_value: O0
699 // dest: O1
700 // compare_value: O2
701 //
702 // Results:
703 //
704 // O0: the value previously stored in dest
705 //
706 // Overwrites (v8): O3,O4,O5
707 //
708 address generate_atomic_cmpxchg() {
709 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg");
710 address start = __ pc();
711
712 // cmpxchg(dest, compare_value, exchange_value)
713 __ cas_under_lock(O1, O2, O0,
714 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
715 __ retl(false);
716 __ delayed()->nop();
717
718 return start;
719 }
720
721 // Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value)
722 //
723 // Arguments :
724 //
725 // exchange_value: O1:O0
726 // dest: O2
727 // compare_value: O4:O3
728 //
729 // Results:
730 //
731 // O1:O0: the value previously stored in dest
732 //
733 // This only works on V9, on V8 we don't generate any
734 // code and just return NULL.
735 //
736 // Overwrites: G1,G2,G3
737 //
738 address generate_atomic_cmpxchg_long() {
739 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long");
740 address start = __ pc();
741
742 if (!VM_Version::supports_cx8())
743 return NULL;;
744 __ sllx(O0, 32, O0);
745 __ srl(O1, 0, O1);
746 __ or3(O0,O1,O0); // O0 holds 64-bit value from compare_value
747 __ sllx(O3, 32, O3);
748 __ srl(O4, 0, O4);
749 __ or3(O3,O4,O3); // O3 holds 64-bit value from exchange_value
750 __ casx(O2, O3, O0);
751 __ srl(O0, 0, O1); // unpacked return value in O1:O0
752 __ retl(false);
753 __ delayed()->srlx(O0, 32, O0);
754
755 return start;
756 }
757
758
759 // Support for jint Atomic::add(jint add_value, volatile jint* dest).
760 //
761 // Arguments :
762 //
763 // add_value: O0 (e.g., +1 or -1)
764 // dest: O1
765 //
766 // Results:
767 //
768 // O0: the new value stored in dest
769 //
770 // Overwrites (v9): O3
771 // Overwrites (v8): O3,O4,O5
772 //
773 address generate_atomic_add() {
774 StubCodeMark mark(this, "StubRoutines", "atomic_add");
775 address start = __ pc();
776 __ BIND(_atomic_add_stub);
777
778 if (VM_Version::v9_instructions_work()) {
779 Label(retry);
780 __ BIND(retry);
781
782 __ lduw(O1, 0, O2);
783 __ add(O0, O2, O3);
784 __ cas(O1, O2, O3);
785 __ cmp( O2, O3);
786 __ br(Assembler::notEqual, false, Assembler::pn, retry);
787 __ delayed()->nop();
788 __ retl(false);
789 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
790 } else {
791 const Register& lock_reg = O2;
792 const Register& lock_ptr_reg = O3;
793 const Register& value_reg = O4;
794 const Register& yield_reg = O5;
795
796 Label(retry);
797 Label(dontyield);
798
799 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
800 // got lock, do the increment
801 __ ld(O1, 0, value_reg);
802 __ add(O0, value_reg, value_reg);
803 __ st(value_reg, O1, 0);
804
805 // %%% only for RMO and PSO
806 __ membar(Assembler::StoreStore);
807
808 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
809
810 __ retl(false);
811 __ delayed()->mov(value_reg, O0);
812 }
813
814 return start;
815 }
816 Label _atomic_add_stub; // called from other stubs
817
818
819 // Support for void OrderAccess::fence().
820 //
821 address generate_fence() {
822 StubCodeMark mark(this, "StubRoutines", "fence");
823 address start = __ pc();
824
825 __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore |
826 Assembler::StoreLoad | Assembler::StoreStore));
827 __ retl(false);
828 __ delayed()->nop();
829
830 return start;
831 }
832
833
834 //------------------------------------------------------------------------------------------------------------------------
835 // The following routine generates a subroutine to throw an asynchronous
836 // UnknownError when an unsafe access gets a fault that could not be
837 // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.)
838 //
839 // Arguments :
840 //
841 // trapping PC: O7
842 //
843 // Results:
844 // posts an asynchronous exception, skips the trapping instruction
845 //
846
847 address generate_handler_for_unsafe_access() {
848 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
849 address start = __ pc();
850
851 const int preserve_register_words = (64 * 2);
852 Address preserve_addr(FP, 0, (-preserve_register_words * wordSize) + STACK_BIAS);
853
854 Register Lthread = L7_thread_cache;
855 int i;
856
857 __ save_frame(0);
858 __ mov(G1, L1);
859 __ mov(G2, L2);
860 __ mov(G3, L3);
861 __ mov(G4, L4);
862 __ mov(G5, L5);
863 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
864 __ stf(FloatRegisterImpl::D, as_FloatRegister(i), preserve_addr, i * wordSize);
865 }
866
867 address entry_point = CAST_FROM_FN_PTR(address, handle_unsafe_access);
868 BLOCK_COMMENT("call handle_unsafe_access");
869 __ call(entry_point, relocInfo::runtime_call_type);
870 __ delayed()->nop();
871
872 __ mov(L1, G1);
873 __ mov(L2, G2);
874 __ mov(L3, G3);
875 __ mov(L4, G4);
876 __ mov(L5, G5);
877 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
878 __ ldf(FloatRegisterImpl::D, preserve_addr, as_FloatRegister(i), i * wordSize);
879 }
880
881 __ verify_thread();
882
883 __ jmp(O0, 0);
884 __ delayed()->restore();
885
886 return start;
887 }
888
889
890 // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super );
891 // Arguments :
892 //
893 // ret : O0, returned
894 // icc/xcc: set as O0 (depending on wordSize)
895 // sub : O1, argument, not changed
896 // super: O2, argument, not changed
897 // raddr: O7, blown by call
898 address generate_partial_subtype_check() {
899 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
900 address start = __ pc();
901 Label loop, miss;
902
903 // Compare super with sub directly, since super is not in its own SSA.
904 // The compiler used to emit this test, but we fold it in here,
905 // to increase overall code density, with no real loss of speed.
906 { Label L;
907 __ cmp(O1, O2);
908 __ brx(Assembler::notEqual, false, Assembler::pt, L);
909 __ delayed()->nop();
910 __ retl();
911 __ delayed()->addcc(G0,0,O0); // set Z flags, zero result
912 __ bind(L);
913 }
914
915 #if defined(COMPILER2) && !defined(_LP64)
916 // Do not use a 'save' because it blows the 64-bit O registers.
917 __ add(SP,-4*wordSize,SP); // Make space for 4 temps
918 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize);
919 __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize);
920 __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize);
921 __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize);
922 Register Rret = O0;
923 Register Rsub = O1;
924 Register Rsuper = O2;
925 #else
926 __ save_frame(0);
927 Register Rret = I0;
928 Register Rsub = I1;
929 Register Rsuper = I2;
930 #endif
931
932 Register L0_ary_len = L0;
933 Register L1_ary_ptr = L1;
934 Register L2_super = L2;
935 Register L3_index = L3;
936
937 inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1);
938
939 __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
940 __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len);
941 __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr);
942 __ clr(L3_index); // zero index
943 // Load a little early; will load 1 off the end of the array.
944 // Ok for now; revisit if we have other uses of this routine.
945 __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
946 __ align(CodeEntryAlignment);
947
948 // The scan loop
949 __ BIND(loop);
950 __ add(L1_ary_ptr,wordSize,L1_ary_ptr); // Bump by OOP size
951 __ cmp(L3_index,L0_ary_len);
952 __ br(Assembler::equal,false,Assembler::pn,miss);
953 __ delayed()->inc(L3_index); // Bump index
954 __ subcc(L2_super,Rsuper,Rret); // Check for match; zero in Rret for a hit
955 __ brx( Assembler::notEqual, false, Assembler::pt, loop );
956 __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super); // Will load a little early
957
958 // Got a hit; report success; set cache. Cache load doesn't
959 // happen here; for speed it is directly emitted by the compiler.
960 __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
961
962 #if defined(COMPILER2) && !defined(_LP64)
963 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
964 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
965 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
966 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
967 __ retl(); // Result in Rret is zero; flags set to Z
968 __ delayed()->add(SP,4*wordSize,SP);
969 #else
970 __ ret(); // Result in Rret is zero; flags set to Z
971 __ delayed()->restore();
972 #endif
973
974 // Hit or miss falls through here
975 __ BIND(miss);
976 __ addcc(G0,1,Rret); // set NZ flags, NZ result
977
978 #if defined(COMPILER2) && !defined(_LP64)
979 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
980 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
981 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
982 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
983 __ retl(); // Result in Rret is != 0; flags set to NZ
984 __ delayed()->add(SP,4*wordSize,SP);
985 #else
986 __ ret(); // Result in Rret is != 0; flags set to NZ
987 __ delayed()->restore();
988 #endif
989
990 return start;
991 }
992
993
994 // Called from MacroAssembler::verify_oop
995 //
996 address generate_verify_oop_subroutine() {
997 StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
998
999 address start = __ pc();
1000
1001 __ verify_oop_subroutine();
1002
1003 return start;
1004 }
1005
1006 static address disjoint_byte_copy_entry;
1007 static address disjoint_short_copy_entry;
1008 static address disjoint_int_copy_entry;
1009 static address disjoint_long_copy_entry;
1010 static address disjoint_oop_copy_entry;
1011
1012 static address byte_copy_entry;
1013 static address short_copy_entry;
1014 static address int_copy_entry;
1015 static address long_copy_entry;
1016 static address oop_copy_entry;
1017
1018 static address checkcast_copy_entry;
1019
1020 //
1021 // Verify that a register contains clean 32-bits positive value
1022 // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax).
1023 //
1024 // Input:
1025 // Rint - 32-bits value
1026 // Rtmp - scratch
1027 //
1028 void assert_clean_int(Register Rint, Register Rtmp) {
1029 #if defined(ASSERT) && defined(_LP64)
1030 __ signx(Rint, Rtmp);
1031 __ cmp(Rint, Rtmp);
1032 __ breakpoint_trap(Assembler::notEqual, Assembler::xcc);
1033 #endif
1034 }
1035
1036 //
1037 // Generate overlap test for array copy stubs
1038 //
1039 // Input:
1040 // O0 - array1
1041 // O1 - array2
1042 // O2 - element count
1043 //
1044 // Kills temps: O3, O4
1045 //
1046 void array_overlap_test(address no_overlap_target, int log2_elem_size) {
1047 assert(no_overlap_target != NULL, "must be generated");
1048 array_overlap_test(no_overlap_target, NULL, log2_elem_size);
1049 }
1050 void array_overlap_test(Label& L_no_overlap, int log2_elem_size) {
1051 array_overlap_test(NULL, &L_no_overlap, log2_elem_size);
1052 }
1053 void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) {
1054 const Register from = O0;
1055 const Register to = O1;
1056 const Register count = O2;
1057 const Register to_from = O3; // to - from
1058 const Register byte_count = O4; // count << log2_elem_size
1059
1060 __ subcc(to, from, to_from);
1061 __ sll_ptr(count, log2_elem_size, byte_count);
1062 if (NOLp == NULL)
1063 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, no_overlap_target);
1064 else
1065 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, (*NOLp));
1066 __ delayed()->cmp(to_from, byte_count);
1067 if (NOLp == NULL)
1068 __ brx(Assembler::greaterEqual, false, Assembler::pt, no_overlap_target);
1069 else
1070 __ brx(Assembler::greaterEqual, false, Assembler::pt, (*NOLp));
1071 __ delayed()->nop();
1072 }
1073
1074 //
1075 // Generate pre-write barrier for array.
1076 //
1077 // Input:
1078 // addr - register containing starting address
1079 // count - register containing element count
1080 // tmp - scratch register
1081 //
1082 // The input registers are overwritten.
1083 //
1084 void gen_write_ref_array_pre_barrier(Register addr, Register count) {
1085 #if 0 // G1 only
1086 BarrierSet* bs = Universe::heap()->barrier_set();
1087 if (bs->has_write_ref_pre_barrier()) {
1088 assert(bs->has_write_ref_array_pre_opt(),
1089 "Else unsupported barrier set.");
1090
1091 assert(addr->is_global() && count->is_global(),
1092 "If not, then we have to fix this code to handle more "
1093 "general cases.");
1094 // Get some new fresh output registers.
1095 __ save_frame(0);
1096 // Save the necessary global regs... will be used after.
1097 __ mov(addr, L0);
1098 __ mov(count, L1);
1099
1100 __ mov(addr, O0);
1101 // Get the count into O1
1102 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
1103 __ delayed()->mov(count, O1);
1104 __ mov(L0, addr);
1105 __ mov(L1, count);
1106 __ restore();
1107 }
1108 #endif // 0
1109 }
1110
1111 //
1112 // Generate post-write barrier for array.
1113 //
1114 // Input:
1115 // addr - register containing starting address
1116 // count - register containing element count
1117 // tmp - scratch register
1118 //
1119 // The input registers are overwritten.
1120 //
1121 void gen_write_ref_array_post_barrier(Register addr, Register count,
1122 Register tmp) {
1123 BarrierSet* bs = Universe::heap()->barrier_set();
1124
1125 switch (bs->kind()) {
1126 #if 0 // G1 - only
1127 case BarrierSet::G1SATBCT:
1128 case BarrierSet::G1SATBCTLogging:
1129 {
1130 assert(addr->is_global() && count->is_global(),
1131 "If not, then we have to fix this code to handle more "
1132 "general cases.");
1133 // Get some new fresh output registers.
1134 __ save_frame(0);
1135 __ mov(addr, O0);
1136 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
1137 __ delayed()->mov(count, O1);
1138 __ restore();
1139 }
1140 break;
1141 #endif // 0 G1 - only
1142 case BarrierSet::CardTableModRef:
1143 case BarrierSet::CardTableExtension:
1144 {
1145 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1146 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1147 assert_different_registers(addr, count, tmp);
1148
1149 Label L_loop;
1150
1151 __ sll_ptr(count, LogBytesPerOop, count);
1152 __ sub(count, BytesPerOop, count);
1153 __ add(count, addr, count);
1154 // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
1155 __ srl_ptr(addr, CardTableModRefBS::card_shift, addr);
1156 __ srl_ptr(count, CardTableModRefBS::card_shift, count);
1157 __ sub(count, addr, count);
1158 Address rs(tmp, (address)ct->byte_map_base);
1159 __ load_address(rs);
1160 __ BIND(L_loop);
1161 __ stb(G0, rs.base(), addr);
1162 __ subcc(count, 1, count);
1163 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
1164 __ delayed()->add(addr, 1, addr);
1165
1166 }
1167 break;
1168 case BarrierSet::ModRef:
1169 break;
1170 default :
1171 ShouldNotReachHere();
1172
1173 }
1174
1175 }
1176
1177
1178 // Copy big chunks forward with shift
1179 //
1180 // Inputs:
1181 // from - source arrays
1182 // to - destination array aligned to 8-bytes
1183 // count - elements count to copy >= the count equivalent to 16 bytes
1184 // count_dec - elements count's decrement equivalent to 16 bytes
1185 // L_copy_bytes - copy exit label
1186 //
1187 void copy_16_bytes_forward_with_shift(Register from, Register to,
1188 Register count, int count_dec, Label& L_copy_bytes) {
1189 Label L_loop, L_aligned_copy, L_copy_last_bytes;
1190
1191 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy
1192 __ andcc(from, 7, G1); // misaligned bytes
1193 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
1194 __ delayed()->nop();
1195
1196 const Register left_shift = G1; // left shift bit counter
1197 const Register right_shift = G5; // right shift bit counter
1198
1199 __ sll(G1, LogBitsPerByte, left_shift);
1200 __ mov(64, right_shift);
1201 __ sub(right_shift, left_shift, right_shift);
1202
1203 //
1204 // Load 2 aligned 8-bytes chunks and use one from previous iteration
1205 // to form 2 aligned 8-bytes chunks to store.
1206 //
1207 __ deccc(count, count_dec); // Pre-decrement 'count'
1208 __ andn(from, 7, from); // Align address
1209 __ ldx(from, 0, O3);
1210 __ inc(from, 8);
1211 __ align(16);
1212 __ BIND(L_loop);
1213 __ ldx(from, 0, O4);
1214 __ deccc(count, count_dec); // Can we do next iteration after this one?
1215 __ ldx(from, 8, G4);
1216 __ inc(to, 16);
1217 __ inc(from, 16);
1218 __ sllx(O3, left_shift, O3);
1219 __ srlx(O4, right_shift, G3);
1220 __ bset(G3, O3);
1221 __ stx(O3, to, -16);
1222 __ sllx(O4, left_shift, O4);
1223 __ srlx(G4, right_shift, G3);
1224 __ bset(G3, O4);
1225 __ stx(O4, to, -8);
1226 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
1227 __ delayed()->mov(G4, O3);
1228
1229 __ inccc(count, count_dec>>1 ); // + 8 bytes
1230 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
1231 __ delayed()->inc(count, count_dec>>1); // restore 'count'
1232
1233 // copy 8 bytes, part of them already loaded in O3
1234 __ ldx(from, 0, O4);
1235 __ inc(to, 8);
1236 __ inc(from, 8);
1237 __ sllx(O3, left_shift, O3);
1238 __ srlx(O4, right_shift, G3);
1239 __ bset(O3, G3);
1240 __ stx(G3, to, -8);
1241
1242 __ BIND(L_copy_last_bytes);
1243 __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes
1244 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
1245 __ delayed()->sub(from, right_shift, from); // restore address
1246
1247 __ BIND(L_aligned_copy);
1248 }
1249
1250 // Copy big chunks backward with shift
1251 //
1252 // Inputs:
1253 // end_from - source arrays end address
1254 // end_to - destination array end address aligned to 8-bytes
1255 // count - elements count to copy >= the count equivalent to 16 bytes
1256 // count_dec - elements count's decrement equivalent to 16 bytes
1257 // L_aligned_copy - aligned copy exit label
1258 // L_copy_bytes - copy exit label
1259 //
1260 void copy_16_bytes_backward_with_shift(Register end_from, Register end_to,
1261 Register count, int count_dec,
1262 Label& L_aligned_copy, Label& L_copy_bytes) {
1263 Label L_loop, L_copy_last_bytes;
1264
1265 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy
1266 __ andcc(end_from, 7, G1); // misaligned bytes
1267 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
1268 __ delayed()->deccc(count, count_dec); // Pre-decrement 'count'
1269
1270 const Register left_shift = G1; // left shift bit counter
1271 const Register right_shift = G5; // right shift bit counter
1272
1273 __ sll(G1, LogBitsPerByte, left_shift);
1274 __ mov(64, right_shift);
1275 __ sub(right_shift, left_shift, right_shift);
1276
1277 //
1278 // Load 2 aligned 8-bytes chunks and use one from previous iteration
1279 // to form 2 aligned 8-bytes chunks to store.
1280 //
1281 __ andn(end_from, 7, end_from); // Align address
1282 __ ldx(end_from, 0, O3);
1283 __ align(16);
1284 __ BIND(L_loop);
1285 __ ldx(end_from, -8, O4);
1286 __ deccc(count, count_dec); // Can we do next iteration after this one?
1287 __ ldx(end_from, -16, G4);
1288 __ dec(end_to, 16);
1289 __ dec(end_from, 16);
1290 __ srlx(O3, right_shift, O3);
1291 __ sllx(O4, left_shift, G3);
1292 __ bset(G3, O3);
1293 __ stx(O3, end_to, 8);
1294 __ srlx(O4, right_shift, O4);
1295 __ sllx(G4, left_shift, G3);
1296 __ bset(G3, O4);
1297 __ stx(O4, end_to, 0);
1298 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
1299 __ delayed()->mov(G4, O3);
1300
1301 __ inccc(count, count_dec>>1 ); // + 8 bytes
1302 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
1303 __ delayed()->inc(count, count_dec>>1); // restore 'count'
1304
1305 // copy 8 bytes, part of them already loaded in O3
1306 __ ldx(end_from, -8, O4);
1307 __ dec(end_to, 8);
1308 __ dec(end_from, 8);
1309 __ srlx(O3, right_shift, O3);
1310 __ sllx(O4, left_shift, G3);
1311 __ bset(O3, G3);
1312 __ stx(G3, end_to, 0);
1313
1314 __ BIND(L_copy_last_bytes);
1315 __ srl(left_shift, LogBitsPerByte, left_shift); // misaligned bytes
1316 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
1317 __ delayed()->add(end_from, left_shift, end_from); // restore address
1318 }
1319
1320 //
1321 // Generate stub for disjoint byte copy. If "aligned" is true, the
1322 // "from" and "to" addresses are assumed to be heapword aligned.
1323 //
1324 // Arguments for generated stub:
1325 // from: O0
1326 // to: O1
1327 // count: O2 treated as signed
1328 //
1329 address generate_disjoint_byte_copy(bool aligned, const char * name) {
1330 __ align(CodeEntryAlignment);
1331 StubCodeMark mark(this, "StubRoutines", name);
1332 address start = __ pc();
1333
1334 Label L_skip_alignment, L_align;
1335 Label L_copy_byte, L_copy_byte_loop, L_exit;
1336
1337 const Register from = O0; // source array address
1338 const Register to = O1; // destination array address
1339 const Register count = O2; // elements count
1340 const Register offset = O5; // offset from start of arrays
1341 // O3, O4, G3, G4 are used as temp registers
1342
1343 assert_clean_int(count, O3); // Make sure 'count' is clean int.
1344
1345 if (!aligned) disjoint_byte_copy_entry = __ pc();
1346 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1347 if (!aligned) BLOCK_COMMENT("Entry:");
1348
1349 // for short arrays, just do single element copy
1350 __ cmp(count, 23); // 16 + 7
1351 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1352 __ delayed()->mov(G0, offset);
1353
1354 if (aligned) {
1355 // 'aligned' == true when it is known statically during compilation
1356 // of this arraycopy call site that both 'from' and 'to' addresses
1357 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1358 //
1359 // Aligned arrays have 4 bytes alignment in 32-bits VM
1360 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
1361 //
1362 #ifndef _LP64
1363 // copy a 4-bytes word if necessary to align 'to' to 8 bytes
1364 __ andcc(to, 7, G0);
1365 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment);
1366 __ delayed()->ld(from, 0, O3);
1367 __ inc(from, 4);
1368 __ inc(to, 4);
1369 __ dec(count, 4);
1370 __ st(O3, to, -4);
1371 __ BIND(L_skip_alignment);
1372 #endif
1373 } else {
1374 // copy bytes to align 'to' on 8 byte boundary
1375 __ andcc(to, 7, G1); // misaligned bytes
1376 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1377 __ delayed()->neg(G1);
1378 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment
1379 __ sub(count, G1, count);
1380 __ BIND(L_align);
1381 __ ldub(from, 0, O3);
1382 __ deccc(G1);
1383 __ inc(from);
1384 __ stb(O3, to, 0);
1385 __ br(Assembler::notZero, false, Assembler::pt, L_align);
1386 __ delayed()->inc(to);
1387 __ BIND(L_skip_alignment);
1388 }
1389 #ifdef _LP64
1390 if (!aligned)
1391 #endif
1392 {
1393 // Copy with shift 16 bytes per iteration if arrays do not have
1394 // the same alignment mod 8, otherwise fall through to the next
1395 // code for aligned copy.
1396 // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1397 // Also jump over aligned copy after the copy with shift completed.
1398
1399 copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
1400 }
1401
1402 // Both array are 8 bytes aligned, copy 16 bytes at a time
1403 __ and3(count, 7, G4); // Save count
1404 __ srl(count, 3, count);
1405 generate_disjoint_long_copy_core(aligned);
1406 __ mov(G4, count); // Restore count
1407
1408 // copy tailing bytes
1409 __ BIND(L_copy_byte);
1410 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1411 __ delayed()->nop();
1412 __ align(16);
1413 __ BIND(L_copy_byte_loop);
1414 __ ldub(from, offset, O3);
1415 __ deccc(count);
1416 __ stb(O3, to, offset);
1417 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
1418 __ delayed()->inc(offset);
1419
1420 __ BIND(L_exit);
1421 // O3, O4 are used as temp registers
1422 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1423 __ retl();
1424 __ delayed()->mov(G0, O0); // return 0
1425 return start;
1426 }
1427
1428 //
1429 // Generate stub for conjoint byte copy. If "aligned" is true, the
1430 // "from" and "to" addresses are assumed to be heapword aligned.
1431 //
1432 // Arguments for generated stub:
1433 // from: O0
1434 // to: O1
1435 // count: O2 treated as signed
1436 //
1437 address generate_conjoint_byte_copy(bool aligned, const char * name) {
1438 // Do reverse copy.
1439
1440 __ align(CodeEntryAlignment);
1441 StubCodeMark mark(this, "StubRoutines", name);
1442 address start = __ pc();
1443 address nooverlap_target = aligned ?
1444 StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
1445 disjoint_byte_copy_entry;
1446
1447 Label L_skip_alignment, L_align, L_aligned_copy;
1448 Label L_copy_byte, L_copy_byte_loop, L_exit;
1449
1450 const Register from = O0; // source array address
1451 const Register to = O1; // destination array address
1452 const Register count = O2; // elements count
1453 const Register end_from = from; // source array end address
1454 const Register end_to = to; // destination array end address
1455
1456 assert_clean_int(count, O3); // Make sure 'count' is clean int.
1457
1458 if (!aligned) byte_copy_entry = __ pc();
1459 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1460 if (!aligned) BLOCK_COMMENT("Entry:");
1461
1462 array_overlap_test(nooverlap_target, 0);
1463
1464 __ add(to, count, end_to); // offset after last copied element
1465
1466 // for short arrays, just do single element copy
1467 __ cmp(count, 23); // 16 + 7
1468 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1469 __ delayed()->add(from, count, end_from);
1470
1471 {
1472 // Align end of arrays since they could be not aligned even
1473 // when arrays itself are aligned.
1474
1475 // copy bytes to align 'end_to' on 8 byte boundary
1476 __ andcc(end_to, 7, G1); // misaligned bytes
1477 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1478 __ delayed()->nop();
1479 __ sub(count, G1, count);
1480 __ BIND(L_align);
1481 __ dec(end_from);
1482 __ dec(end_to);
1483 __ ldub(end_from, 0, O3);
1484 __ deccc(G1);
1485 __ brx(Assembler::notZero, false, Assembler::pt, L_align);
1486 __ delayed()->stb(O3, end_to, 0);
1487 __ BIND(L_skip_alignment);
1488 }
1489 #ifdef _LP64
1490 if (aligned) {
1491 // Both arrays are aligned to 8-bytes in 64-bits VM.
1492 // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1493 // in unaligned case.
1494 __ dec(count, 16);
1495 } else
1496 #endif
1497 {
1498 // Copy with shift 16 bytes per iteration if arrays do not have
1499 // the same alignment mod 8, otherwise jump to the next
1500 // code for aligned copy (and substracting 16 from 'count' before jump).
1501 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1502 // Also jump over aligned copy after the copy with shift completed.
1503
1504 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1505 L_aligned_copy, L_copy_byte);
1506 }
1507 // copy 4 elements (16 bytes) at a time
1508 __ align(16);
1509 __ BIND(L_aligned_copy);
1510 __ dec(end_from, 16);
1511 __ ldx(end_from, 8, O3);
1512 __ ldx(end_from, 0, O4);
1513 __ dec(end_to, 16);
1514 __ deccc(count, 16);
1515 __ stx(O3, end_to, 8);
1516 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1517 __ delayed()->stx(O4, end_to, 0);
1518 __ inc(count, 16);
1519
1520 // copy 1 element (2 bytes) at a time
1521 __ BIND(L_copy_byte);
1522 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1523 __ delayed()->nop();
1524 __ align(16);
1525 __ BIND(L_copy_byte_loop);
1526 __ dec(end_from);
1527 __ dec(end_to);
1528 __ ldub(end_from, 0, O4);
1529 __ deccc(count);
1530 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
1531 __ delayed()->stb(O4, end_to, 0);
1532
1533 __ BIND(L_exit);
1534 // O3, O4 are used as temp registers
1535 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1536 __ retl();
1537 __ delayed()->mov(G0, O0); // return 0
1538 return start;
1539 }
1540
1541 //
1542 // Generate stub for disjoint short copy. If "aligned" is true, the
1543 // "from" and "to" addresses are assumed to be heapword aligned.
1544 //
1545 // Arguments for generated stub:
1546 // from: O0
1547 // to: O1
1548 // count: O2 treated as signed
1549 //
1550 address generate_disjoint_short_copy(bool aligned, const char * name) {
1551 __ align(CodeEntryAlignment);
1552 StubCodeMark mark(this, "StubRoutines", name);
1553 address start = __ pc();
1554
1555 Label L_skip_alignment, L_skip_alignment2;
1556 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
1557
1558 const Register from = O0; // source array address
1559 const Register to = O1; // destination array address
1560 const Register count = O2; // elements count
1561 const Register offset = O5; // offset from start of arrays
1562 // O3, O4, G3, G4 are used as temp registers
1563
1564 assert_clean_int(count, O3); // Make sure 'count' is clean int.
1565
1566 if (!aligned) disjoint_short_copy_entry = __ pc();
1567 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1568 if (!aligned) BLOCK_COMMENT("Entry:");
1569
1570 // for short arrays, just do single element copy
1571 __ cmp(count, 11); // 8 + 3 (22 bytes)
1572 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1573 __ delayed()->mov(G0, offset);
1574
1575 if (aligned) {
1576 // 'aligned' == true when it is known statically during compilation
1577 // of this arraycopy call site that both 'from' and 'to' addresses
1578 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1579 //
1580 // Aligned arrays have 4 bytes alignment in 32-bits VM
1581 // and 8 bytes - in 64-bits VM.
1582 //
1583 #ifndef _LP64
1584 // copy a 2-elements word if necessary to align 'to' to 8 bytes
1585 __ andcc(to, 7, G0);
1586 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1587 __ delayed()->ld(from, 0, O3);
1588 __ inc(from, 4);
1589 __ inc(to, 4);
1590 __ dec(count, 2);
1591 __ st(O3, to, -4);
1592 __ BIND(L_skip_alignment);
1593 #endif
1594 } else {
1595 // copy 1 element if necessary to align 'to' on an 4 bytes
1596 __ andcc(to, 3, G0);
1597 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1598 __ delayed()->lduh(from, 0, O3);
1599 __ inc(from, 2);
1600 __ inc(to, 2);
1601 __ dec(count);
1602 __ sth(O3, to, -2);
1603 __ BIND(L_skip_alignment);
1604
1605 // copy 2 elements to align 'to' on an 8 byte boundary
1606 __ andcc(to, 7, G0);
1607 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1608 __ delayed()->lduh(from, 0, O3);
1609 __ dec(count, 2);
1610 __ lduh(from, 2, O4);
1611 __ inc(from, 4);
1612 __ inc(to, 4);
1613 __ sth(O3, to, -4);
1614 __ sth(O4, to, -2);
1615 __ BIND(L_skip_alignment2);
1616 }
1617 #ifdef _LP64
1618 if (!aligned)
1619 #endif
1620 {
1621 // Copy with shift 16 bytes per iteration if arrays do not have
1622 // the same alignment mod 8, otherwise fall through to the next
1623 // code for aligned copy.
1624 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1625 // Also jump over aligned copy after the copy with shift completed.
1626
1627 copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
1628 }
1629
1630 // Both array are 8 bytes aligned, copy 16 bytes at a time
1631 __ and3(count, 3, G4); // Save
1632 __ srl(count, 2, count);
1633 generate_disjoint_long_copy_core(aligned);
1634 __ mov(G4, count); // restore
1635
1636 // copy 1 element at a time
1637 __ BIND(L_copy_2_bytes);
1638 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1639 __ delayed()->nop();
1640 __ align(16);
1641 __ BIND(L_copy_2_bytes_loop);
1642 __ lduh(from, offset, O3);
1643 __ deccc(count);
1644 __ sth(O3, to, offset);
1645 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
1646 __ delayed()->inc(offset, 2);
1647
1648 __ BIND(L_exit);
1649 // O3, O4 are used as temp registers
1650 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1651 __ retl();
1652 __ delayed()->mov(G0, O0); // return 0
1653 return start;
1654 }
1655
1656 //
1657 // Generate stub for conjoint short copy. If "aligned" is true, the
1658 // "from" and "to" addresses are assumed to be heapword aligned.
1659 //
1660 // Arguments for generated stub:
1661 // from: O0
1662 // to: O1
1663 // count: O2 treated as signed
1664 //
1665 address generate_conjoint_short_copy(bool aligned, const char * name) {
1666 // Do reverse copy.
1667
1668 __ align(CodeEntryAlignment);
1669 StubCodeMark mark(this, "StubRoutines", name);
1670 address start = __ pc();
1671 address nooverlap_target = aligned ?
1672 StubRoutines::arrayof_jshort_disjoint_arraycopy() :
1673 disjoint_short_copy_entry;
1674
1675 Label L_skip_alignment, L_skip_alignment2, L_aligned_copy;
1676 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
1677
1678 const Register from = O0; // source array address
1679 const Register to = O1; // destination array address
1680 const Register count = O2; // elements count
1681 const Register end_from = from; // source array end address
1682 const Register end_to = to; // destination array end address
1683
1684 const Register byte_count = O3; // bytes count to copy
1685
1686 assert_clean_int(count, O3); // Make sure 'count' is clean int.
1687
1688 if (!aligned) short_copy_entry = __ pc();
1689 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1690 if (!aligned) BLOCK_COMMENT("Entry:");
1691
1692 array_overlap_test(nooverlap_target, 1);
1693
1694 __ sllx(count, LogBytesPerShort, byte_count);
1695 __ add(to, byte_count, end_to); // offset after last copied element
1696
1697 // for short arrays, just do single element copy
1698 __ cmp(count, 11); // 8 + 3 (22 bytes)
1699 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1700 __ delayed()->add(from, byte_count, end_from);
1701
1702 {
1703 // Align end of arrays since they could be not aligned even
1704 // when arrays itself are aligned.
1705
1706 // copy 1 element if necessary to align 'end_to' on an 4 bytes
1707 __ andcc(end_to, 3, G0);
1708 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1709 __ delayed()->lduh(end_from, -2, O3);
1710 __ dec(end_from, 2);
1711 __ dec(end_to, 2);
1712 __ dec(count);
1713 __ sth(O3, end_to, 0);
1714 __ BIND(L_skip_alignment);
1715
1716 // copy 2 elements to align 'end_to' on an 8 byte boundary
1717 __ andcc(end_to, 7, G0);
1718 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1719 __ delayed()->lduh(end_from, -2, O3);
1720 __ dec(count, 2);
1721 __ lduh(end_from, -4, O4);
1722 __ dec(end_from, 4);
1723 __ dec(end_to, 4);
1724 __ sth(O3, end_to, 2);
1725 __ sth(O4, end_to, 0);
1726 __ BIND(L_skip_alignment2);
1727 }
1728 #ifdef _LP64
1729 if (aligned) {
1730 // Both arrays are aligned to 8-bytes in 64-bits VM.
1731 // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1732 // in unaligned case.
1733 __ dec(count, 8);
1734 } else
1735 #endif
1736 {
1737 // Copy with shift 16 bytes per iteration if arrays do not have
1738 // the same alignment mod 8, otherwise jump to the next
1739 // code for aligned copy (and substracting 8 from 'count' before jump).
1740 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1741 // Also jump over aligned copy after the copy with shift completed.
1742
1743 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1744 L_aligned_copy, L_copy_2_bytes);
1745 }
1746 // copy 4 elements (16 bytes) at a time
1747 __ align(16);
1748 __ BIND(L_aligned_copy);
1749 __ dec(end_from, 16);
1750 __ ldx(end_from, 8, O3);
1751 __ ldx(end_from, 0, O4);
1752 __ dec(end_to, 16);
1753 __ deccc(count, 8);
1754 __ stx(O3, end_to, 8);
1755 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1756 __ delayed()->stx(O4, end_to, 0);
1757 __ inc(count, 8);
1758
1759 // copy 1 element (2 bytes) at a time
1760 __ BIND(L_copy_2_bytes);
1761 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1762 __ delayed()->nop();
1763 __ BIND(L_copy_2_bytes_loop);
1764 __ dec(end_from, 2);
1765 __ dec(end_to, 2);
1766 __ lduh(end_from, 0, O4);
1767 __ deccc(count);
1768 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
1769 __ delayed()->sth(O4, end_to, 0);
1770
1771 __ BIND(L_exit);
1772 // O3, O4 are used as temp registers
1773 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1774 __ retl();
1775 __ delayed()->mov(G0, O0); // return 0
1776 return start;
1777 }
1778
1779 //
1780 // Generate core code for disjoint int copy (and oop copy on 32-bit).
1781 // If "aligned" is true, the "from" and "to" addresses are assumed
1782 // to be heapword aligned.
1783 //
1784 // Arguments:
1785 // from: O0
1786 // to: O1
1787 // count: O2 treated as signed
1788 //
1789 void generate_disjoint_int_copy_core(bool aligned) {
1790
1791 Label L_skip_alignment, L_aligned_copy;
1792 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
1793
1794 const Register from = O0; // source array address
1795 const Register to = O1; // destination array address
1796 const Register count = O2; // elements count
1797 const Register offset = O5; // offset from start of arrays
1798 // O3, O4, G3, G4 are used as temp registers
1799
1800 // 'aligned' == true when it is known statically during compilation
1801 // of this arraycopy call site that both 'from' and 'to' addresses
1802 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1803 //
1804 // Aligned arrays have 4 bytes alignment in 32-bits VM
1805 // and 8 bytes - in 64-bits VM.
1806 //
1807 #ifdef _LP64
1808 if (!aligned)
1809 #endif
1810 {
1811 // The next check could be put under 'ifndef' since the code in
1812 // generate_disjoint_long_copy_core() has own checks and set 'offset'.
1813
1814 // for short arrays, just do single element copy
1815 __ cmp(count, 5); // 4 + 1 (20 bytes)
1816 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
1817 __ delayed()->mov(G0, offset);
1818
1819 // copy 1 element to align 'to' on an 8 byte boundary
1820 __ andcc(to, 7, G0);
1821 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1822 __ delayed()->ld(from, 0, O3);
1823 __ inc(from, 4);
1824 __ inc(to, 4);
1825 __ dec(count);
1826 __ st(O3, to, -4);
1827 __ BIND(L_skip_alignment);
1828
1829 // if arrays have same alignment mod 8, do 4 elements copy
1830 __ andcc(from, 7, G0);
1831 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
1832 __ delayed()->ld(from, 0, O3);
1833
1834 //
1835 // Load 2 aligned 8-bytes chunks and use one from previous iteration
1836 // to form 2 aligned 8-bytes chunks to store.
1837 //
1838 // copy_16_bytes_forward_with_shift() is not used here since this
1839 // code is more optimal.
1840
1841 // copy with shift 4 elements (16 bytes) at a time
1842 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4
1843
1844 __ align(16);
1845 __ BIND(L_copy_16_bytes);
1846 __ ldx(from, 4, O4);
1847 __ deccc(count, 4); // Can we do next iteration after this one?
1848 __ ldx(from, 12, G4);
1849 __ inc(to, 16);
1850 __ inc(from, 16);
1851 __ sllx(O3, 32, O3);
1852 __ srlx(O4, 32, G3);
1853 __ bset(G3, O3);
1854 __ stx(O3, to, -16);
1855 __ sllx(O4, 32, O4);
1856 __ srlx(G4, 32, G3);
1857 __ bset(G3, O4);
1858 __ stx(O4, to, -8);
1859 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
1860 __ delayed()->mov(G4, O3);
1861
1862 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
1863 __ delayed()->inc(count, 4); // restore 'count'
1864
1865 __ BIND(L_aligned_copy);
1866 }
1867 // copy 4 elements (16 bytes) at a time
1868 __ and3(count, 1, G4); // Save
1869 __ srl(count, 1, count);
1870 generate_disjoint_long_copy_core(aligned);
1871 __ mov(G4, count); // Restore
1872
1873 // copy 1 element at a time
1874 __ BIND(L_copy_4_bytes);
1875 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1876 __ delayed()->nop();
1877 __ BIND(L_copy_4_bytes_loop);
1878 __ ld(from, offset, O3);
1879 __ deccc(count);
1880 __ st(O3, to, offset);
1881 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
1882 __ delayed()->inc(offset, 4);
1883 __ BIND(L_exit);
1884 }
1885
1886 //
1887 // Generate stub for disjoint int copy. If "aligned" is true, the
1888 // "from" and "to" addresses are assumed to be heapword aligned.
1889 //
1890 // Arguments for generated stub:
1891 // from: O0
1892 // to: O1
1893 // count: O2 treated as signed
1894 //
1895 address generate_disjoint_int_copy(bool aligned, const char * name) {
1896 __ align(CodeEntryAlignment);
1897 StubCodeMark mark(this, "StubRoutines", name);
1898 address start = __ pc();
1899
1900 const Register count = O2;
1901 assert_clean_int(count, O3); // Make sure 'count' is clean int.
1902
1903 if (!aligned) disjoint_int_copy_entry = __ pc();
1904 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1905 if (!aligned) BLOCK_COMMENT("Entry:");
1906
1907 generate_disjoint_int_copy_core(aligned);
1908
1909 // O3, O4 are used as temp registers
1910 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
1911 __ retl();
1912 __ delayed()->mov(G0, O0); // return 0
1913 return start;
1914 }
1915
1916 //
1917 // Generate core code for conjoint int copy (and oop copy on 32-bit).
1918 // If "aligned" is true, the "from" and "to" addresses are assumed
1919 // to be heapword aligned.
1920 //
1921 // Arguments:
1922 // from: O0
1923 // to: O1
1924 // count: O2 treated as signed
1925 //
1926 void generate_conjoint_int_copy_core(bool aligned) {
1927 // Do reverse copy.
1928
1929 Label L_skip_alignment, L_aligned_copy;
1930 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
1931
1932 const Register from = O0; // source array address
1933 const Register to = O1; // destination array address
1934 const Register count = O2; // elements count
1935 const Register end_from = from; // source array end address
1936 const Register end_to = to; // destination array end address
1937 // O3, O4, O5, G3 are used as temp registers
1938
1939 const Register byte_count = O3; // bytes count to copy
1940
1941 __ sllx(count, LogBytesPerInt, byte_count);
1942 __ add(to, byte_count, end_to); // offset after last copied element
1943
1944 __ cmp(count, 5); // for short arrays, just do single element copy
1945 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
1946 __ delayed()->add(from, byte_count, end_from);
1947
1948 // copy 1 element to align 'to' on an 8 byte boundary
1949 __ andcc(end_to, 7, G0);
1950 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1951 __ delayed()->nop();
1952 __ dec(count);
1953 __ dec(end_from, 4);
1954 __ dec(end_to, 4);
1955 __ ld(end_from, 0, O4);
1956 __ st(O4, end_to, 0);
1957 __ BIND(L_skip_alignment);
1958
1959 // Check if 'end_from' and 'end_to' has the same alignment.
1960 __ andcc(end_from, 7, G0);
1961 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
1962 __ delayed()->dec(count, 4); // The cmp at the start guaranty cnt >= 4
1963
1964 // copy with shift 4 elements (16 bytes) at a time
1965 //
1966 // Load 2 aligned 8-bytes chunks and use one from previous iteration
1967 // to form 2 aligned 8-bytes chunks to store.
1968 //
1969 __ ldx(end_from, -4, O3);
1970 __ align(16);
1971 __ BIND(L_copy_16_bytes);
1972 __ ldx(end_from, -12, O4);
1973 __ deccc(count, 4);
1974 __ ldx(end_from, -20, O5);
1975 __ dec(end_to, 16);
1976 __ dec(end_from, 16);
1977 __ srlx(O3, 32, O3);
1978 __ sllx(O4, 32, G3);
1979 __ bset(G3, O3);
1980 __ stx(O3, end_to, 8);
1981 __ srlx(O4, 32, O4);
1982 __ sllx(O5, 32, G3);
1983 __ bset(O4, G3);
1984 __ stx(G3, end_to, 0);
1985 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
1986 __ delayed()->mov(O5, O3);
1987
1988 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
1989 __ delayed()->inc(count, 4);
1990
1991 // copy 4 elements (16 bytes) at a time
1992 __ align(16);
1993 __ BIND(L_aligned_copy);
1994 __ dec(end_from, 16);
1995 __ ldx(end_from, 8, O3);
1996 __ ldx(end_from, 0, O4);
1997 __ dec(end_to, 16);
1998 __ deccc(count, 4);
1999 __ stx(O3, end_to, 8);
2000 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
2001 __ delayed()->stx(O4, end_to, 0);
2002 __ inc(count, 4);
2003
2004 // copy 1 element (4 bytes) at a time
2005 __ BIND(L_copy_4_bytes);
2006 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
2007 __ delayed()->nop();
2008 __ BIND(L_copy_4_bytes_loop);
2009 __ dec(end_from, 4);
2010 __ dec(end_to, 4);
2011 __ ld(end_from, 0, O4);
2012 __ deccc(count);
2013 __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop);
2014 __ delayed()->st(O4, end_to, 0);
2015 __ BIND(L_exit);
2016 }
2017
2018 //
2019 // Generate stub for conjoint int copy. If "aligned" is true, the
2020 // "from" and "to" addresses are assumed to be heapword aligned.
2021 //
2022 // Arguments for generated stub:
2023 // from: O0
2024 // to: O1
2025 // count: O2 treated as signed
2026 //
2027 address generate_conjoint_int_copy(bool aligned, const char * name) {
2028 __ align(CodeEntryAlignment);
2029 StubCodeMark mark(this, "StubRoutines", name);
2030 address start = __ pc();
2031
2032 address nooverlap_target = aligned ?
2033 StubRoutines::arrayof_jint_disjoint_arraycopy() :
2034 disjoint_int_copy_entry;
2035
2036 assert_clean_int(O2, O3); // Make sure 'count' is clean int.
2037
2038 if (!aligned) int_copy_entry = __ pc();
2039 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2040 if (!aligned) BLOCK_COMMENT("Entry:");
2041
2042 array_overlap_test(nooverlap_target, 2);
2043
2044 generate_conjoint_int_copy_core(aligned);
2045
2046 // O3, O4 are used as temp registers
2047 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
2048 __ retl();
2049 __ delayed()->mov(G0, O0); // return 0
2050 return start;
2051 }
2052
2053 //
2054 // Generate core code for disjoint long copy (and oop copy on 64-bit).
2055 // "aligned" is ignored, because we must make the stronger
2056 // assumption that both addresses are always 64-bit aligned.
2057 //
2058 // Arguments:
2059 // from: O0
2060 // to: O1
2061 // count: O2 treated as signed
2062 //
2063 void generate_disjoint_long_copy_core(bool aligned) {
2064 Label L_copy_8_bytes, L_copy_16_bytes, L_exit;
2065 const Register from = O0; // source array address
2066 const Register to = O1; // destination array address
2067 const Register count = O2; // elements count
2068 const Register offset0 = O4; // element offset
2069 const Register offset8 = O5; // next element offset
2070
2071 __ deccc(count, 2);
2072 __ mov(G0, offset0); // offset from start of arrays (0)
2073 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
2074 __ delayed()->add(offset0, 8, offset8);
2075 __ align(16);
2076 __ BIND(L_copy_16_bytes);
2077 __ ldx(from, offset0, O3);
2078 __ ldx(from, offset8, G3);
2079 __ deccc(count, 2);
2080 __ stx(O3, to, offset0);
2081 __ inc(offset0, 16);
2082 __ stx(G3, to, offset8);
2083 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2084 __ delayed()->inc(offset8, 16);
2085
2086 __ BIND(L_copy_8_bytes);
2087 __ inccc(count, 2);
2088 __ brx(Assembler::zero, true, Assembler::pn, L_exit );
2089 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs
2090 __ ldx(from, offset0, O3);
2091 __ stx(O3, to, offset0);
2092 __ BIND(L_exit);
2093 }
2094
2095 //
2096 // Generate stub for disjoint long copy.
2097 // "aligned" is ignored, because we must make the stronger
2098 // assumption that both addresses are always 64-bit aligned.
2099 //
2100 // Arguments for generated stub:
2101 // from: O0
2102 // to: O1
2103 // count: O2 treated as signed
2104 //
2105 address generate_disjoint_long_copy(bool aligned, const char * name) {
2106 __ align(CodeEntryAlignment);
2107 StubCodeMark mark(this, "StubRoutines", name);
2108 address start = __ pc();
2109
2110 assert_clean_int(O2, O3); // Make sure 'count' is clean int.
2111
2112 if (!aligned) disjoint_long_copy_entry = __ pc();
2113 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2114 if (!aligned) BLOCK_COMMENT("Entry:");
2115
2116 generate_disjoint_long_copy_core(aligned);
2117
2118 // O3, O4 are used as temp registers
2119 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
2120 __ retl();
2121 __ delayed()->mov(G0, O0); // return 0
2122 return start;
2123 }
2124
2125 //
2126 // Generate core code for conjoint long copy (and oop copy on 64-bit).
2127 // "aligned" is ignored, because we must make the stronger
2128 // assumption that both addresses are always 64-bit aligned.
2129 //
2130 // Arguments:
2131 // from: O0
2132 // to: O1
2133 // count: O2 treated as signed
2134 //
2135 void generate_conjoint_long_copy_core(bool aligned) {
2136 // Do reverse copy.
2137 Label L_copy_8_bytes, L_copy_16_bytes, L_exit;
2138 const Register from = O0; // source array address
2139 const Register to = O1; // destination array address
2140 const Register count = O2; // elements count
2141 const Register offset8 = O4; // element offset
2142 const Register offset0 = O5; // previous element offset
2143
2144 __ subcc(count, 1, count);
2145 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes );
2146 __ delayed()->sllx(count, LogBytesPerLong, offset8);
2147 __ sub(offset8, 8, offset0);
2148 __ align(16);
2149 __ BIND(L_copy_16_bytes);
2150 __ ldx(from, offset8, O2);
2151 __ ldx(from, offset0, O3);
2152 __ stx(O2, to, offset8);
2153 __ deccc(offset8, 16); // use offset8 as counter
2154 __ stx(O3, to, offset0);
2155 __ brx(Assembler::greater, false, Assembler::pt, L_copy_16_bytes);
2156 __ delayed()->dec(offset0, 16);
2157
2158 __ BIND(L_copy_8_bytes);
2159 __ brx(Assembler::negative, false, Assembler::pn, L_exit );
2160 __ delayed()->nop();
2161 __ ldx(from, 0, O3);
2162 __ stx(O3, to, 0);
2163 __ BIND(L_exit);
2164 }
2165
2166 // Generate stub for conjoint long copy.
2167 // "aligned" is ignored, because we must make the stronger
2168 // assumption that both addresses are always 64-bit aligned.
2169 //
2170 // Arguments for generated stub:
2171 // from: O0
2172 // to: O1
2173 // count: O2 treated as signed
2174 //
2175 address generate_conjoint_long_copy(bool aligned, const char * name) {
2176 __ align(CodeEntryAlignment);
2177 StubCodeMark mark(this, "StubRoutines", name);
2178 address start = __ pc();
2179
2180 assert(!aligned, "usage");
2181 address nooverlap_target = disjoint_long_copy_entry;
2182
2183 assert_clean_int(O2, O3); // Make sure 'count' is clean int.
2184
2185 if (!aligned) long_copy_entry = __ pc();
2186 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2187 if (!aligned) BLOCK_COMMENT("Entry:");
2188
2189 array_overlap_test(nooverlap_target, 3);
2190
2191 generate_conjoint_long_copy_core(aligned);
2192
2193 // O3, O4 are used as temp registers
2194 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
2195 __ retl();
2196 __ delayed()->mov(G0, O0); // return 0
2197 return start;
2198 }
2199
2200 // Generate stub for disjoint oop copy. If "aligned" is true, the
2201 // "from" and "to" addresses are assumed to be heapword aligned.
2202 //
2203 // Arguments for generated stub:
2204 // from: O0
2205 // to: O1
2206 // count: O2 treated as signed
2207 //
2208 address generate_disjoint_oop_copy(bool aligned, const char * name) {
2209
2210 const Register from = O0; // source array address
2211 const Register to = O1; // destination array address
2212 const Register count = O2; // elements count
2213
2214 __ align(CodeEntryAlignment);
2215 StubCodeMark mark(this, "StubRoutines", name);
2216 address start = __ pc();
2217
2218 assert_clean_int(count, O3); // Make sure 'count' is clean int.
2219
2220 if (!aligned) disjoint_oop_copy_entry = __ pc();
2221 // caller can pass a 64-bit byte count here
2222 if (!aligned) BLOCK_COMMENT("Entry:");
2223
2224 // save arguments for barrier generation
2225 __ mov(to, G1);
2226 __ mov(count, G5);
2227 gen_write_ref_array_pre_barrier(G1, G5);
2228 #ifdef _LP64
2229 generate_disjoint_long_copy_core(aligned);
2230 #else
2231 generate_disjoint_int_copy_core(aligned);
2232 #endif
2233 // O0 is used as temp register
2234 gen_write_ref_array_post_barrier(G1, G5, O0);
2235
2236 // O3, O4 are used as temp registers
2237 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2238 __ retl();
2239 __ delayed()->mov(G0, O0); // return 0
2240 return start;
2241 }
2242
2243 // Generate stub for conjoint oop copy. If "aligned" is true, the
2244 // "from" and "to" addresses are assumed to be heapword aligned.
2245 //
2246 // Arguments for generated stub:
2247 // from: O0
2248 // to: O1
2249 // count: O2 treated as signed
2250 //
2251 address generate_conjoint_oop_copy(bool aligned, const char * name) {
2252
2253 const Register from = O0; // source array address
2254 const Register to = O1; // destination array address
2255 const Register count = O2; // elements count
2256
2257 __ align(CodeEntryAlignment);
2258 StubCodeMark mark(this, "StubRoutines", name);
2259 address start = __ pc();
2260
2261 assert_clean_int(count, O3); // Make sure 'count' is clean int.
2262
2263 if (!aligned) oop_copy_entry = __ pc();
2264 // caller can pass a 64-bit byte count here
2265 if (!aligned) BLOCK_COMMENT("Entry:");
2266
2267 // save arguments for barrier generation
2268 __ mov(to, G1);
2269 __ mov(count, G5);
2270
2271 gen_write_ref_array_pre_barrier(G1, G5);
2272
2273 address nooverlap_target = aligned ?
2274 StubRoutines::arrayof_oop_disjoint_arraycopy() :
2275 disjoint_oop_copy_entry;
2276
2277 array_overlap_test(nooverlap_target, LogBytesPerWord);
2278
2279 #ifdef _LP64
2280 generate_conjoint_long_copy_core(aligned);
2281 #else
2282 generate_conjoint_int_copy_core(aligned);
2283 #endif
2284
2285 // O0 is used as temp register
2286 gen_write_ref_array_post_barrier(G1, G5, O0);
2287
2288 // O3, O4 are used as temp registers
2289 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2290 __ retl();
2291 __ delayed()->mov(G0, O0); // return 0
2292 return start;
2293 }
2294
2295
2296 // Helper for generating a dynamic type check.
2297 // Smashes only the given temp registers.
2298 void generate_type_check(Register sub_klass,
2299 Register super_check_offset,
2300 Register super_klass,
2301 Register temp,
2302 Label& L_success,
2303 Register deccc_hack = noreg) {
2304 assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
2305
2306 BLOCK_COMMENT("type_check:");
2307
2308 Label L_miss;
2309
2310 assert_clean_int(super_check_offset, temp);
2311
2312 // maybe decrement caller's trip count:
2313 #define DELAY_SLOT delayed(); \
2314 { if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); }
2315
2316 // if the pointers are equal, we are done (e.g., String[] elements)
2317 __ cmp(sub_klass, super_klass);
2318 __ brx(Assembler::equal, true, Assembler::pt, L_success);
2319 __ DELAY_SLOT;
2320
2321 // check the supertype display:
2322 __ ld_ptr(sub_klass, super_check_offset, temp); // query the super type
2323 __ cmp(super_klass, temp); // test the super type
2324 __ brx(Assembler::equal, true, Assembler::pt, L_success);
2325 __ DELAY_SLOT;
2326
2327 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
2328 Klass::secondary_super_cache_offset_in_bytes());
2329 __ cmp(super_klass, sc_offset);
2330 __ brx(Assembler::notEqual, true, Assembler::pt, L_miss);
2331 __ delayed()->nop();
2332
2333 __ save_frame(0);
2334 __ mov(sub_klass->after_save(), O1);
2335 // mov(super_klass->after_save(), O2); //fill delay slot
2336 assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation");
2337 __ call(StubRoutines::Sparc::_partial_subtype_check);
2338 __ delayed()->mov(super_klass->after_save(), O2);
2339 __ restore();
2340
2341 // Upon return, the condition codes are already set.
2342 __ brx(Assembler::equal, true, Assembler::pt, L_success);
2343 __ DELAY_SLOT;
2344
2345 #undef DELAY_SLOT
2346
2347 // Fall through on failure!
2348 __ BIND(L_miss);
2349 }
2350
2351
2352 // Generate stub for checked oop copy.
2353 //
2354 // Arguments for generated stub:
2355 // from: O0
2356 // to: O1
2357 // count: O2 treated as signed
2358 // ckoff: O3 (super_check_offset)
2359 // ckval: O4 (super_klass)
2360 // ret: O0 zero for success; (-1^K) where K is partial transfer count
2361 //
2362 address generate_checkcast_copy(const char* name) {
2363
2364 const Register O0_from = O0; // source array address
2365 const Register O1_to = O1; // destination array address
2366 const Register O2_count = O2; // elements count
2367 const Register O3_ckoff = O3; // super_check_offset
2368 const Register O4_ckval = O4; // super_klass
2369
2370 const Register O5_offset = O5; // loop var, with stride wordSize
2371 const Register G1_remain = G1; // loop var, with stride -1
2372 const Register G3_oop = G3; // actual oop copied
2373 const Register G4_klass = G4; // oop._klass
2374 const Register G5_super = G5; // oop._klass._primary_supers[ckval]
2375
2376 __ align(CodeEntryAlignment);
2377 StubCodeMark mark(this, "StubRoutines", name);
2378 address start = __ pc();
2379
2380 int klass_off = oopDesc::klass_offset_in_bytes();
2381
2382 gen_write_ref_array_pre_barrier(G1, G5);
2383
2384
2385 #ifdef ASSERT
2386 // We sometimes save a frame (see partial_subtype_check below).
2387 // If this will cause trouble, let's fail now instead of later.
2388 __ save_frame(0);
2389 __ restore();
2390 #endif
2391
2392 #ifdef ASSERT
2393 // caller guarantees that the arrays really are different
2394 // otherwise, we would have to make conjoint checks
2395 { Label L;
2396 __ mov(O3, G1); // spill: overlap test smashes O3
2397 __ mov(O4, G4); // spill: overlap test smashes O4
2398 array_overlap_test(L, LogBytesPerWord);
2399 __ stop("checkcast_copy within a single array");
2400 __ bind(L);
2401 __ mov(G1, O3);
2402 __ mov(G4, O4);
2403 }
2404 #endif //ASSERT
2405
2406 assert_clean_int(O2_count, G1); // Make sure 'count' is clean int.
2407
2408 checkcast_copy_entry = __ pc();
2409 // caller can pass a 64-bit byte count here (from generic stub)
2410 BLOCK_COMMENT("Entry:");
2411
2412 Label load_element, store_element, do_card_marks, fail, done;
2413 __ addcc(O2_count, 0, G1_remain); // initialize loop index, and test it
2414 __ brx(Assembler::notZero, false, Assembler::pt, load_element);
2415 __ delayed()->mov(G0, O5_offset); // offset from start of arrays
2416
2417 // Empty array: Nothing to do.
2418 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
2419 __ retl();
2420 __ delayed()->set(0, O0); // return 0 on (trivial) success
2421
2422 // ======== begin loop ========
2423 // (Loop is rotated; its entry is load_element.)
2424 // Loop variables:
2425 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
2426 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
2427 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super
2428 __ align(16);
2429
2430 __ bind(store_element);
2431 // deccc(G1_remain); // decrement the count (hoisted)
2432 __ st_ptr(G3_oop, O1_to, O5_offset); // store the oop
2433 __ inc(O5_offset, wordSize); // step to next offset
2434 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
2435 __ delayed()->set(0, O0); // return -1 on success
2436
2437 // ======== loop entry is here ========
2438 __ bind(load_element);
2439 __ ld_ptr(O0_from, O5_offset, G3_oop); // load the oop
2440 __ br_null(G3_oop, true, Assembler::pt, store_element);
2441 __ delayed()->deccc(G1_remain); // decrement the count
2442
2443 __ ld_ptr(G3_oop, klass_off, G4_klass); // query the object klass
2444
2445 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
2446 // branch to this on success:
2447 store_element,
2448 // decrement this on success:
2449 G1_remain);
2450 // ======== end loop ========
2451
2452 // It was a real error; we must depend on the caller to finish the job.
2453 // Register G1 has number of *remaining* oops, O2 number of *total* oops.
2454 // Emit GC store barriers for the oops we have copied (O2 minus G1),
2455 // and report their number to the caller.
2456 __ bind(fail);
2457 __ subcc(O2_count, G1_remain, O2_count);
2458 __ brx(Assembler::zero, false, Assembler::pt, done);
2459 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller
2460
2461 __ bind(do_card_marks);
2462 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2]
2463
2464 __ bind(done);
2465 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
2466 __ retl();
2467 __ delayed()->nop(); // return value in 00
2468
2469 return start;
2470 }
2471
2472
2473 // Generate 'unsafe' array copy stub
2474 // Though just as safe as the other stubs, it takes an unscaled
2475 // size_t argument instead of an element count.
2476 //
2477 // Arguments for generated stub:
2478 // from: O0
2479 // to: O1
2480 // count: O2 byte count, treated as ssize_t, can be zero
2481 //
2482 // Examines the alignment of the operands and dispatches
2483 // to a long, int, short, or byte copy loop.
2484 //
2485 address generate_unsafe_copy(const char* name) {
2486
2487 const Register O0_from = O0; // source array address
2488 const Register O1_to = O1; // destination array address
2489 const Register O2_count = O2; // elements count
2490
2491 const Register G1_bits = G1; // test copy of low bits
2492
2493 __ align(CodeEntryAlignment);
2494 StubCodeMark mark(this, "StubRoutines", name);
2495 address start = __ pc();
2496
2497 // bump this on entry, not on exit:
2498 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, G1, G3);
2499
2500 __ or3(O0_from, O1_to, G1_bits);
2501 __ or3(O2_count, G1_bits, G1_bits);
2502
2503 __ btst(BytesPerLong-1, G1_bits);
2504 __ br(Assembler::zero, true, Assembler::pt,
2505 long_copy_entry, relocInfo::runtime_call_type);
2506 // scale the count on the way out:
2507 __ delayed()->srax(O2_count, LogBytesPerLong, O2_count);
2508
2509 __ btst(BytesPerInt-1, G1_bits);
2510 __ br(Assembler::zero, true, Assembler::pt,
2511 int_copy_entry, relocInfo::runtime_call_type);
2512 // scale the count on the way out:
2513 __ delayed()->srax(O2_count, LogBytesPerInt, O2_count);
2514
2515 __ btst(BytesPerShort-1, G1_bits);
2516 __ br(Assembler::zero, true, Assembler::pt,
2517 short_copy_entry, relocInfo::runtime_call_type);
2518 // scale the count on the way out:
2519 __ delayed()->srax(O2_count, LogBytesPerShort, O2_count);
2520
2521 __ br(Assembler::always, false, Assembler::pt,
2522 byte_copy_entry, relocInfo::runtime_call_type);
2523 __ delayed()->nop();
2524
2525 return start;
2526 }
2527
2528
2529 // Perform range checks on the proposed arraycopy.
2530 // Kills the two temps, but nothing else.
2531 // Also, clean the sign bits of src_pos and dst_pos.
2532 void arraycopy_range_checks(Register src, // source array oop (O0)
2533 Register src_pos, // source position (O1)
2534 Register dst, // destination array oo (O2)
2535 Register dst_pos, // destination position (O3)
2536 Register length, // length of copy (O4)
2537 Register temp1, Register temp2,
2538 Label& L_failed) {
2539 BLOCK_COMMENT("arraycopy_range_checks:");
2540
2541 // if (src_pos + length > arrayOop(src)->length() ) FAIL;
2542
2543 const Register array_length = temp1; // scratch
2544 const Register end_pos = temp2; // scratch
2545
2546 // Note: This next instruction may be in the delay slot of a branch:
2547 __ add(length, src_pos, end_pos); // src_pos + length
2548 __ lduw(src, arrayOopDesc::length_offset_in_bytes(), array_length);
2549 __ cmp(end_pos, array_length);
2550 __ br(Assembler::greater, false, Assembler::pn, L_failed);
2551
2552 // if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
2553 __ delayed()->add(length, dst_pos, end_pos); // dst_pos + length
2554 __ lduw(dst, arrayOopDesc::length_offset_in_bytes(), array_length);
2555 __ cmp(end_pos, array_length);
2556 __ br(Assembler::greater, false, Assembler::pn, L_failed);
2557
2558 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
2559 // Move with sign extension can be used since they are positive.
2560 __ delayed()->signx(src_pos, src_pos);
2561 __ signx(dst_pos, dst_pos);
2562
2563 BLOCK_COMMENT("arraycopy_range_checks done");
2564 }
2565
2566
2567 //
2568 // Generate generic array copy stubs
2569 //
2570 // Input:
2571 // O0 - src oop
2572 // O1 - src_pos
2573 // O2 - dst oop
2574 // O3 - dst_pos
2575 // O4 - element count
2576 //
2577 // Output:
2578 // O0 == 0 - success
2579 // O0 == -1 - need to call System.arraycopy
2580 //
2581 address generate_generic_copy(const char *name) {
2582
2583 Label L_failed, L_objArray;
2584
2585 // Input registers
2586 const Register src = O0; // source array oop
2587 const Register src_pos = O1; // source position
2588 const Register dst = O2; // destination array oop
2589 const Register dst_pos = O3; // destination position
2590 const Register length = O4; // elements count
2591
2592 // registers used as temp
2593 const Register G3_src_klass = G3; // source array klass
2594 const Register G4_dst_klass = G4; // destination array klass
2595 const Register G5_lh = G5; // layout handler
2596 const Register O5_temp = O5;
2597
2598 __ align(CodeEntryAlignment);
2599 StubCodeMark mark(this, "StubRoutines", name);
2600 address start = __ pc();
2601
2602 // bump this on entry, not on exit:
2603 inc_counter_np(SharedRuntime::_generic_array_copy_ctr, G1, G3);
2604
2605 // In principle, the int arguments could be dirty.
2606 //assert_clean_int(src_pos, G1);
2607 //assert_clean_int(dst_pos, G1);
2608 //assert_clean_int(length, G1);
2609
2610 //-----------------------------------------------------------------------
2611 // Assembler stubs will be used for this call to arraycopy
2612 // if the following conditions are met:
2613 //
2614 // (1) src and dst must not be null.
2615 // (2) src_pos must not be negative.
2616 // (3) dst_pos must not be negative.
2617 // (4) length must not be negative.
2618 // (5) src klass and dst klass should be the same and not NULL.
2619 // (6) src and dst should be arrays.
2620 // (7) src_pos + length must not exceed length of src.
2621 // (8) dst_pos + length must not exceed length of dst.
2622 BLOCK_COMMENT("arraycopy initial argument checks");
2623
2624 // if (src == NULL) return -1;
2625 __ br_null(src, false, Assembler::pn, L_failed);
2626
2627 // if (src_pos < 0) return -1;
2628 __ delayed()->tst(src_pos);
2629 __ br(Assembler::negative, false, Assembler::pn, L_failed);
2630 __ delayed()->nop();
2631
2632 // if (dst == NULL) return -1;
2633 __ br_null(dst, false, Assembler::pn, L_failed);
2634
2635 // if (dst_pos < 0) return -1;
2636 __ delayed()->tst(dst_pos);
2637 __ br(Assembler::negative, false, Assembler::pn, L_failed);
2638
2639 // if (length < 0) return -1;
2640 __ delayed()->tst(length);
2641 __ br(Assembler::negative, false, Assembler::pn, L_failed);
2642
2643 BLOCK_COMMENT("arraycopy argument klass checks");
2644 // get src->klass()
2645 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
2646
2647 #ifdef ASSERT
2648 // assert(src->klass() != NULL);
2649 BLOCK_COMMENT("assert klasses not null");
2650 { Label L_a, L_b;
2651 __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL
2652 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
2653 __ bind(L_a);
2654 __ stop("broken null klass");
2655 __ bind(L_b);
2656 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also
2657 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp
2658 BLOCK_COMMENT("assert done");
2659 }
2660 #endif
2661
2662 // Load layout helper
2663 //
2664 // |array_tag| | header_size | element_type | |log2_element_size|
2665 // 32 30 24 16 8 2 0
2666 //
2667 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
2668 //
2669
2670 int lh_offset = klassOopDesc::header_size() * HeapWordSize +
2671 Klass::layout_helper_offset_in_bytes();
2672
2673 // Load 32-bits signed value. Use br() instruction with it to check icc.
2674 __ lduw(G3_src_klass, lh_offset, G5_lh);
2675
2676 // Handle objArrays completely differently...
2677 juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2678 __ set(objArray_lh, O5_temp);
2679 __ cmp(G5_lh, O5_temp);
2680 __ br(Assembler::equal, false, Assembler::pt, L_objArray);
2681 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
2682
2683 // if (src->klass() != dst->klass()) return -1;
2684 __ cmp(G3_src_klass, G4_dst_klass);
2685 __ brx(Assembler::notEqual, false, Assembler::pn, L_failed);
2686 __ delayed()->nop();
2687
2688 // if (!src->is_Array()) return -1;
2689 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
2690 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
2691
2692 // At this point, it is known to be a typeArray (array_tag 0x3).
2693 #ifdef ASSERT
2694 __ delayed()->nop();
2695 { Label L;
2696 jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2697 __ set(lh_prim_tag_in_place, O5_temp);
2698 __ cmp(G5_lh, O5_temp);
2699 __ br(Assembler::greaterEqual, false, Assembler::pt, L);
2700 __ delayed()->nop();
2701 __ stop("must be a primitive array");
2702 __ bind(L);
2703 }
2704 #else
2705 __ delayed(); // match next insn to prev branch
2706 #endif
2707
2708 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
2709 O5_temp, G4_dst_klass, L_failed);
2710
2711 // typeArrayKlass
2712 //
2713 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
2714 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
2715 //
2716
2717 const Register G4_offset = G4_dst_klass; // array offset
2718 const Register G3_elsize = G3_src_klass; // log2 element size
2719
2720 __ srl(G5_lh, Klass::_lh_header_size_shift, G4_offset);
2721 __ and3(G4_offset, Klass::_lh_header_size_mask, G4_offset); // array_offset
2722 __ add(src, G4_offset, src); // src array offset
2723 __ add(dst, G4_offset, dst); // dst array offset
2724 __ and3(G5_lh, Klass::_lh_log2_element_size_mask, G3_elsize); // log2 element size
2725
2726 // next registers should be set before the jump to corresponding stub
2727 const Register from = O0; // source array address
2728 const Register to = O1; // destination array address
2729 const Register count = O2; // elements count
2730
2731 // 'from', 'to', 'count' registers should be set in this order
2732 // since they are the same as 'src', 'src_pos', 'dst'.
2733
2734 BLOCK_COMMENT("scale indexes to element size");
2735 __ sll_ptr(src_pos, G3_elsize, src_pos);
2736 __ sll_ptr(dst_pos, G3_elsize, dst_pos);
2737 __ add(src, src_pos, from); // src_addr
2738 __ add(dst, dst_pos, to); // dst_addr
2739
2740 BLOCK_COMMENT("choose copy loop based on element size");
2741 __ cmp(G3_elsize, 0);
2742 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jbyte_arraycopy);
2743 __ delayed()->signx(length, count); // length
2744
2745 __ cmp(G3_elsize, LogBytesPerShort);
2746 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jshort_arraycopy);
2747 __ delayed()->signx(length, count); // length
2748
2749 __ cmp(G3_elsize, LogBytesPerInt);
2750 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jint_arraycopy);
2751 __ delayed()->signx(length, count); // length
2752 #ifdef ASSERT
2753 { Label L;
2754 __ cmp(G3_elsize, LogBytesPerLong);
2755 __ br(Assembler::equal, false, Assembler::pt, L);
2756 __ delayed()->nop();
2757 __ stop("must be long copy, but elsize is wrong");
2758 __ bind(L);
2759 }
2760 #endif
2761 __ br(Assembler::always,false,Assembler::pt,StubRoutines::_jlong_arraycopy);
2762 __ delayed()->signx(length, count); // length
2763
2764 // objArrayKlass
2765 __ BIND(L_objArray);
2766 // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
2767
2768 Label L_plain_copy, L_checkcast_copy;
2769 // test array classes for subtyping
2770 __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality
2771 __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy);
2772 __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below
2773
2774 // Identically typed arrays can be copied without element-wise checks.
2775 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
2776 O5_temp, G5_lh, L_failed);
2777
2778 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset
2779 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset
2780 __ sll_ptr(src_pos, LogBytesPerOop, src_pos);
2781 __ sll_ptr(dst_pos, LogBytesPerOop, dst_pos);
2782 __ add(src, src_pos, from); // src_addr
2783 __ add(dst, dst_pos, to); // dst_addr
2784 __ BIND(L_plain_copy);
2785 __ br(Assembler::always, false, Assembler::pt,StubRoutines::_oop_arraycopy);
2786 __ delayed()->signx(length, count); // length
2787
2788 __ BIND(L_checkcast_copy);
2789 // live at this point: G3_src_klass, G4_dst_klass
2790 {
2791 // Before looking at dst.length, make sure dst is also an objArray.
2792 // lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted to delay slot
2793 __ cmp(G5_lh, O5_temp);
2794 __ br(Assembler::notEqual, false, Assembler::pn, L_failed);
2795
2796 // It is safe to examine both src.length and dst.length.
2797 __ delayed(); // match next insn to prev branch
2798 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
2799 O5_temp, G5_lh, L_failed);
2800
2801 // Marshal the base address arguments now, freeing registers.
2802 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset
2803 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset
2804 __ sll_ptr(src_pos, LogBytesPerOop, src_pos);
2805 __ sll_ptr(dst_pos, LogBytesPerOop, dst_pos);
2806 __ add(src, src_pos, from); // src_addr
2807 __ add(dst, dst_pos, to); // dst_addr
2808 __ signx(length, count); // length (reloaded)
2809
2810 Register sco_temp = O3; // this register is free now
2811 assert_different_registers(from, to, count, sco_temp,
2812 G4_dst_klass, G3_src_klass);
2813
2814 // Generate the type check.
2815 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
2816 Klass::super_check_offset_offset_in_bytes());
2817 __ lduw(G4_dst_klass, sco_offset, sco_temp);
2818 generate_type_check(G3_src_klass, sco_temp, G4_dst_klass,
2819 O5_temp, L_plain_copy);
2820
2821 // Fetch destination element klass from the objArrayKlass header.
2822 int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
2823 objArrayKlass::element_klass_offset_in_bytes());
2824
2825 // the checkcast_copy loop needs two extra arguments:
2826 __ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass
2827 // lduw(O4, sco_offset, O3); // sco of elem klass
2828
2829 __ br(Assembler::always, false, Assembler::pt, checkcast_copy_entry);
2830 __ delayed()->lduw(O4, sco_offset, O3);
2831 }
2832
2833 __ BIND(L_failed);
2834 __ retl();
2835 __ delayed()->sub(G0, 1, O0); // return -1
2836 return start;
2837 }
2838
2839 void generate_arraycopy_stubs() {
2840
2841 // Note: the disjoint stubs must be generated first, some of
2842 // the conjoint stubs use them.
2843 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
2844 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
2845 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy");
2846 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
2847 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy");
2848 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
2849 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
2850 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, "arrayof_jint_disjoint_arraycopy");
2851 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
2852 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, "arrayof_oop_disjoint_arraycopy");
2853
2854 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
2855 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
2856 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, "jint_arraycopy");
2857 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy");
2858 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, "oop_arraycopy");
2859 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy");
2860 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
2861 #ifdef _LP64
2862 // since sizeof(jint) < sizeof(HeapWord), there's a different flavor:
2863 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, "arrayof_jint_arraycopy");
2864 #else
2865 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
2866 #endif
2867 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
2868 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
2869
2870 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
2871 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
2872 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
2873 }
2874
2875 void generate_initial() {
2876 // Generates all stubs and initializes the entry points
2877
2878 //------------------------------------------------------------------------------------------------------------------------
2879 // entry points that exist in all platforms
2880 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
2881 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
2882 StubRoutines::_forward_exception_entry = generate_forward_exception();
2883
2884 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
2885 StubRoutines::_catch_exception_entry = generate_catch_exception();
2886
2887 //------------------------------------------------------------------------------------------------------------------------
2888 // entry points that are platform specific
2889 StubRoutines::Sparc::_test_stop_entry = generate_test_stop();
2890
2891 StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine();
2892 StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows();
2893
2894 #if !defined(COMPILER2) && !defined(_LP64)
2895 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
2896 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg();
2897 StubRoutines::_atomic_add_entry = generate_atomic_add();
2898 StubRoutines::_atomic_xchg_ptr_entry = StubRoutines::_atomic_xchg_entry;
2899 StubRoutines::_atomic_cmpxchg_ptr_entry = StubRoutines::_atomic_cmpxchg_entry;
2900 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
2901 StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry;
2902 StubRoutines::_fence_entry = generate_fence();
2903 #endif // COMPILER2 !=> _LP64
2904
2905 StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check();
2906 }
2907
2908
2909 void generate_all() {
2910 // Generates all stubs and initializes the entry points
2911
2912 // These entry points require SharedInfo::stack0 to be set up in non-core builds
2913 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false);
2914 StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true);
2915 StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
2916 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
2917 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
2918
2919 StubRoutines::_handler_for_unsafe_access_entry =
2920 generate_handler_for_unsafe_access();
2921
2922 // support for verify_oop (must happen after universe_init)
2923 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine();
2924
2925 // arraycopy stubs used by compilers
2926 generate_arraycopy_stubs();
2927 }
2928
2929
2930 public:
2931 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
2932 // replace the standard masm with a special one:
2933 _masm = new MacroAssembler(code);
2934
2935 _stub_count = !all ? 0x100 : 0x200;
2936 if (all) {
2937 generate_all();
2938 } else {
2939 generate_initial();
2940 }
2941
2942 // make sure this stub is available for all local calls
2943 if (_atomic_add_stub.is_unbound()) {
2944 // generate a second time, if necessary
2945 (void) generate_atomic_add();
2946 }
2947 }
2948
2949
2950 private:
2951 int _stub_count;
2952 void stub_prolog(StubCodeDesc* cdesc) {
2953 # ifdef ASSERT
2954 // put extra information in the stub code, to make it more readable
2955 #ifdef _LP64
2956 // Write the high part of the address
2957 // [RGV] Check if there is a dependency on the size of this prolog
2958 __ emit_data((intptr_t)cdesc >> 32, relocInfo::none);
2959 #endif
2960 __ emit_data((intptr_t)cdesc, relocInfo::none);
2961 __ emit_data(++_stub_count, relocInfo::none);
2962 # endif
2963 align(true);
2964 }
2965
2966 void align(bool at_header = false) {
2967 // %%%%% move this constant somewhere else
2968 // UltraSPARC cache line size is 8 instructions:
2969 const unsigned int icache_line_size = 32;
2970 const unsigned int icache_half_line_size = 16;
2971
2972 if (at_header) {
2973 while ((intptr_t)(__ pc()) % icache_line_size != 0) {
2974 __ emit_data(0, relocInfo::none);
2975 }
2976 } else {
2977 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
2978 __ nop();
2979 }
2980 }
2981 }
2982
2983 }; // end class declaration
2984
2985
2986 address StubGenerator::disjoint_byte_copy_entry = NULL;
2987 address StubGenerator::disjoint_short_copy_entry = NULL;
2988 address StubGenerator::disjoint_int_copy_entry = NULL;
2989 address StubGenerator::disjoint_long_copy_entry = NULL;
2990 address StubGenerator::disjoint_oop_copy_entry = NULL;
2991
2992 address StubGenerator::byte_copy_entry = NULL;
2993 address StubGenerator::short_copy_entry = NULL;
2994 address StubGenerator::int_copy_entry = NULL;
2995 address StubGenerator::long_copy_entry = NULL;
2996 address StubGenerator::oop_copy_entry = NULL;
2997
2998 address StubGenerator::checkcast_copy_entry = NULL;
2999
3000 void StubGenerator_generate(CodeBuffer* code, bool all) {
3001 StubGenerator g(code, all);
3002 }