Mercurial > hg > graal-compiler
annotate src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 1708:a03ae377b2e8
6930581: G1: assert(ParallelGCThreads > 1 || n_yielded() == _hrrs->occupied(),"Should have yielded all the ..
Summary: During RSet updating, when ParallelGCThreads is zero, references that point into the collection set are added directly the referenced region's RSet. This can cause the sparse table in the RSet to expand. RSet scanning and the "occupied" routine will then operate on different instances of the sparse table causing the assert to trip. This may also cause some cards added post expansion to be missed during RSet scanning. When ParallelGCThreads is non-zero such references are recorded on the "references to be scanned" queue and the card containing the reference is recorded in a dirty card queue for use in the event of an evacuation failure. Employ the parallel code in the serial case to avoid expanding the RSets of regions in the collection set.
Reviewed-by: iveresov, ysr, tonyp
author | johnc |
---|---|
date | Fri, 06 Aug 2010 10:17:21 -0700 |
parents | e7ec8cd4dd8a |
children | d6f45b55c972 |
rev | line source |
---|---|
0 | 1 /* |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1506
diff
changeset
|
2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1506
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1506
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1506
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
25 #include "incls/_precompiled.incl" | |
26 #include "incls/_stubGenerator_sparc.cpp.incl" | |
27 | |
28 // Declaration and definition of StubGenerator (no .hpp file). | |
29 // For a more detailed description of the stub routine structure | |
30 // see the comment in stubRoutines.hpp. | |
31 | |
32 #define __ _masm-> | |
33 | |
34 #ifdef PRODUCT | |
35 #define BLOCK_COMMENT(str) /* nothing */ | |
36 #else | |
37 #define BLOCK_COMMENT(str) __ block_comment(str) | |
38 #endif | |
39 | |
40 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") | |
41 | |
42 // Note: The register L7 is used as L7_thread_cache, and may not be used | |
43 // any other way within this module. | |
44 | |
45 | |
46 static const Register& Lstub_temp = L2; | |
47 | |
48 // ------------------------------------------------------------------------------------------------------------------------- | |
49 // Stub Code definitions | |
50 | |
51 static address handle_unsafe_access() { | |
52 JavaThread* thread = JavaThread::current(); | |
53 address pc = thread->saved_exception_pc(); | |
54 address npc = thread->saved_exception_npc(); | |
55 // pc is the instruction which we must emulate | |
56 // doing a no-op is fine: return garbage from the load | |
57 | |
58 // request an async exception | |
59 thread->set_pending_unsafe_access_error(); | |
60 | |
61 // return address of next instruction to execute | |
62 return npc; | |
63 } | |
64 | |
65 class StubGenerator: public StubCodeGenerator { | |
66 private: | |
67 | |
68 #ifdef PRODUCT | |
69 #define inc_counter_np(a,b,c) (0) | |
70 #else | |
71 #define inc_counter_np(counter, t1, t2) \ | |
72 BLOCK_COMMENT("inc_counter " #counter); \ | |
727 | 73 __ inc_counter(&counter, t1, t2); |
0 | 74 #endif |
75 | |
76 //---------------------------------------------------------------------------------------------------- | |
77 // Call stubs are used to call Java from C | |
78 | |
79 address generate_call_stub(address& return_pc) { | |
80 StubCodeMark mark(this, "StubRoutines", "call_stub"); | |
81 address start = __ pc(); | |
82 | |
83 // Incoming arguments: | |
84 // | |
85 // o0 : call wrapper address | |
86 // o1 : result (address) | |
87 // o2 : result type | |
88 // o3 : method | |
89 // o4 : (interpreter) entry point | |
90 // o5 : parameters (address) | |
91 // [sp + 0x5c]: parameter size (in words) | |
92 // [sp + 0x60]: thread | |
93 // | |
94 // +---------------+ <--- sp + 0 | |
95 // | | | |
96 // . reg save area . | |
97 // | | | |
98 // +---------------+ <--- sp + 0x40 | |
99 // | | | |
100 // . extra 7 slots . | |
101 // | | | |
102 // +---------------+ <--- sp + 0x5c | |
103 // | param. size | | |
104 // +---------------+ <--- sp + 0x60 | |
105 // | thread | | |
106 // +---------------+ | |
107 // | | | |
108 | |
109 // note: if the link argument position changes, adjust | |
110 // the code in frame::entry_frame_call_wrapper() | |
111 | |
112 const Argument link = Argument(0, false); // used only for GC | |
113 const Argument result = Argument(1, false); | |
114 const Argument result_type = Argument(2, false); | |
115 const Argument method = Argument(3, false); | |
116 const Argument entry_point = Argument(4, false); | |
117 const Argument parameters = Argument(5, false); | |
118 const Argument parameter_size = Argument(6, false); | |
119 const Argument thread = Argument(7, false); | |
120 | |
121 // setup thread register | |
122 __ ld_ptr(thread.as_address(), G2_thread); | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
123 __ reinit_heapbase(); |
0 | 124 |
125 #ifdef ASSERT | |
126 // make sure we have no pending exceptions | |
127 { const Register t = G3_scratch; | |
128 Label L; | |
129 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t); | |
130 __ br_null(t, false, Assembler::pt, L); | |
131 __ delayed()->nop(); | |
132 __ stop("StubRoutines::call_stub: entered with pending exception"); | |
133 __ bind(L); | |
134 } | |
135 #endif | |
136 | |
137 // create activation frame & allocate space for parameters | |
138 { const Register t = G3_scratch; | |
139 __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words) | |
140 __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words) | |
141 __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words) | |
1506 | 142 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes |
0 | 143 __ neg(t); // negate so it can be used with save |
144 __ save(SP, t, SP); // setup new frame | |
145 } | |
146 | |
147 // +---------------+ <--- sp + 0 | |
148 // | | | |
149 // . reg save area . | |
150 // | | | |
151 // +---------------+ <--- sp + 0x40 | |
152 // | | | |
153 // . extra 7 slots . | |
154 // | | | |
155 // +---------------+ <--- sp + 0x5c | |
156 // | empty slot | (only if parameter size is even) | |
157 // +---------------+ | |
158 // | | | |
159 // . parameters . | |
160 // | | | |
161 // +---------------+ <--- fp + 0 | |
162 // | | | |
163 // . reg save area . | |
164 // | | | |
165 // +---------------+ <--- fp + 0x40 | |
166 // | | | |
167 // . extra 7 slots . | |
168 // | | | |
169 // +---------------+ <--- fp + 0x5c | |
170 // | param. size | | |
171 // +---------------+ <--- fp + 0x60 | |
172 // | thread | | |
173 // +---------------+ | |
174 // | | | |
175 | |
176 // pass parameters if any | |
177 BLOCK_COMMENT("pass parameters if any"); | |
178 { const Register src = parameters.as_in().as_register(); | |
179 const Register dst = Lentry_args; | |
180 const Register tmp = G3_scratch; | |
181 const Register cnt = G4_scratch; | |
182 | |
183 // test if any parameters & setup of Lentry_args | |
184 Label exit; | |
185 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter | |
186 __ add( FP, STACK_BIAS, dst ); | |
187 __ tst(cnt); | |
188 __ br(Assembler::zero, false, Assembler::pn, exit); | |
189 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args | |
190 | |
191 // copy parameters if any | |
192 Label loop; | |
193 __ BIND(loop); | |
194 // Store parameter value | |
195 __ ld_ptr(src, 0, tmp); | |
196 __ add(src, BytesPerWord, src); | |
1506 | 197 __ st_ptr(tmp, dst, 0); |
0 | 198 __ deccc(cnt); |
199 __ br(Assembler::greater, false, Assembler::pt, loop); | |
1506 | 200 __ delayed()->sub(dst, Interpreter::stackElementSize, dst); |
0 | 201 |
202 // done | |
203 __ BIND(exit); | |
204 } | |
205 | |
206 // setup parameters, method & call Java function | |
207 #ifdef ASSERT | |
208 // layout_activation_impl checks it's notion of saved SP against | |
209 // this register, so if this changes update it as well. | |
210 const Register saved_SP = Lscratch; | |
211 __ mov(SP, saved_SP); // keep track of SP before call | |
212 #endif | |
213 | |
214 // setup parameters | |
215 const Register t = G3_scratch; | |
216 __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words) | |
1506 | 217 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes |
0 | 218 __ sub(FP, t, Gargs); // setup parameter pointer |
219 #ifdef _LP64 | |
220 __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias | |
221 #endif | |
222 __ mov(SP, O5_savedSP); | |
223 | |
224 | |
225 // do the call | |
226 // | |
227 // the following register must be setup: | |
228 // | |
229 // G2_thread | |
230 // G5_method | |
231 // Gargs | |
232 BLOCK_COMMENT("call Java function"); | |
233 __ jmpl(entry_point.as_in().as_register(), G0, O7); | |
234 __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method | |
235 | |
236 BLOCK_COMMENT("call_stub_return_address:"); | |
237 return_pc = __ pc(); | |
238 | |
239 // The callee, if it wasn't interpreted, can return with SP changed so | |
240 // we can no longer assert of change of SP. | |
241 | |
242 // store result depending on type | |
243 // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE | |
244 // is treated as T_INT) | |
245 { const Register addr = result .as_in().as_register(); | |
246 const Register type = result_type.as_in().as_register(); | |
247 Label is_long, is_float, is_double, is_object, exit; | |
248 __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object); | |
249 __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float); | |
250 __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double); | |
251 __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long); | |
252 __ delayed()->nop(); | |
253 | |
254 // store int result | |
255 __ st(O0, addr, G0); | |
256 | |
257 __ BIND(exit); | |
258 __ ret(); | |
259 __ delayed()->restore(); | |
260 | |
261 __ BIND(is_object); | |
262 __ ba(false, exit); | |
263 __ delayed()->st_ptr(O0, addr, G0); | |
264 | |
265 __ BIND(is_float); | |
266 __ ba(false, exit); | |
267 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0); | |
268 | |
269 __ BIND(is_double); | |
270 __ ba(false, exit); | |
271 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0); | |
272 | |
273 __ BIND(is_long); | |
274 #ifdef _LP64 | |
275 __ ba(false, exit); | |
276 __ delayed()->st_long(O0, addr, G0); // store entire long | |
277 #else | |
278 #if defined(COMPILER2) | |
279 // All return values are where we want them, except for Longs. C2 returns | |
280 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1. | |
281 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit | |
282 // build we simply always use G1. | |
283 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to | |
284 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node | |
285 // first which would move g1 -> O0/O1 and destroy the exception we were throwing. | |
286 | |
287 __ ba(false, exit); | |
288 __ delayed()->stx(G1, addr, G0); // store entire long | |
289 #else | |
290 __ st(O1, addr, BytesPerInt); | |
291 __ ba(false, exit); | |
292 __ delayed()->st(O0, addr, G0); | |
293 #endif /* COMPILER2 */ | |
294 #endif /* _LP64 */ | |
295 } | |
296 return start; | |
297 } | |
298 | |
299 | |
300 //---------------------------------------------------------------------------------------------------- | |
301 // Return point for a Java call if there's an exception thrown in Java code. | |
302 // The exception is caught and transformed into a pending exception stored in | |
303 // JavaThread that can be tested from within the VM. | |
304 // | |
305 // Oexception: exception oop | |
306 | |
307 address generate_catch_exception() { | |
308 StubCodeMark mark(this, "StubRoutines", "catch_exception"); | |
309 | |
310 address start = __ pc(); | |
311 // verify that thread corresponds | |
312 __ verify_thread(); | |
313 | |
314 const Register& temp_reg = Gtemp; | |
727 | 315 Address pending_exception_addr (G2_thread, Thread::pending_exception_offset()); |
316 Address exception_file_offset_addr(G2_thread, Thread::exception_file_offset ()); | |
317 Address exception_line_offset_addr(G2_thread, Thread::exception_line_offset ()); | |
0 | 318 |
319 // set pending exception | |
320 __ verify_oop(Oexception); | |
321 __ st_ptr(Oexception, pending_exception_addr); | |
322 __ set((intptr_t)__FILE__, temp_reg); | |
323 __ st_ptr(temp_reg, exception_file_offset_addr); | |
324 __ set((intptr_t)__LINE__, temp_reg); | |
325 __ st(temp_reg, exception_line_offset_addr); | |
326 | |
327 // complete return to VM | |
328 assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before"); | |
329 | |
727 | 330 AddressLiteral stub_ret(StubRoutines::_call_stub_return_address); |
331 __ jump_to(stub_ret, temp_reg); | |
0 | 332 __ delayed()->nop(); |
333 | |
334 return start; | |
335 } | |
336 | |
337 | |
338 //---------------------------------------------------------------------------------------------------- | |
339 // Continuation point for runtime calls returning with a pending exception | |
340 // The pending exception check happened in the runtime or native call stub | |
341 // The pending exception in Thread is converted into a Java-level exception | |
342 // | |
343 // Contract with Java-level exception handler: O0 = exception | |
344 // O1 = throwing pc | |
345 | |
346 address generate_forward_exception() { | |
347 StubCodeMark mark(this, "StubRoutines", "forward_exception"); | |
348 address start = __ pc(); | |
349 | |
350 // Upon entry, O7 has the return address returning into Java | |
351 // (interpreted or compiled) code; i.e. the return address | |
352 // becomes the throwing pc. | |
353 | |
354 const Register& handler_reg = Gtemp; | |
355 | |
727 | 356 Address exception_addr(G2_thread, Thread::pending_exception_offset()); |
0 | 357 |
358 #ifdef ASSERT | |
359 // make sure that this code is only executed if there is a pending exception | |
360 { Label L; | |
361 __ ld_ptr(exception_addr, Gtemp); | |
362 __ br_notnull(Gtemp, false, Assembler::pt, L); | |
363 __ delayed()->nop(); | |
364 __ stop("StubRoutines::forward exception: no pending exception (1)"); | |
365 __ bind(L); | |
366 } | |
367 #endif | |
368 | |
369 // compute exception handler into handler_reg | |
370 __ get_thread(); | |
371 __ ld_ptr(exception_addr, Oexception); | |
372 __ verify_oop(Oexception); | |
373 __ save_frame(0); // compensates for compiler weakness | |
374 __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC | |
375 BLOCK_COMMENT("call exception_handler_for_return_address"); | |
1295 | 376 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch); |
0 | 377 __ mov(O0, handler_reg); |
378 __ restore(); // compensates for compiler weakness | |
379 | |
380 __ ld_ptr(exception_addr, Oexception); | |
381 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC | |
382 | |
383 #ifdef ASSERT | |
384 // make sure exception is set | |
385 { Label L; | |
386 __ br_notnull(Oexception, false, Assembler::pt, L); | |
387 __ delayed()->nop(); | |
388 __ stop("StubRoutines::forward exception: no pending exception (2)"); | |
389 __ bind(L); | |
390 } | |
391 #endif | |
392 // jump to exception handler | |
393 __ jmp(handler_reg, 0); | |
394 // clear pending exception | |
395 __ delayed()->st_ptr(G0, exception_addr); | |
396 | |
397 return start; | |
398 } | |
399 | |
400 | |
401 //------------------------------------------------------------------------------------------------------------------------ | |
402 // Continuation point for throwing of implicit exceptions that are not handled in | |
403 // the current activation. Fabricates an exception oop and initiates normal | |
404 // exception dispatching in this frame. Only callee-saved registers are preserved | |
405 // (through the normal register window / RegisterMap handling). | |
406 // If the compiler needs all registers to be preserved between the fault | |
407 // point and the exception handler then it must assume responsibility for that in | |
408 // AbstractCompiler::continuation_for_implicit_null_exception or | |
409 // continuation_for_implicit_division_by_zero_exception. All other implicit | |
410 // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are | |
411 // either at call sites or otherwise assume that stack unwinding will be initiated, | |
412 // so caller saved registers were assumed volatile in the compiler. | |
413 | |
414 // Note that we generate only this stub into a RuntimeStub, because it needs to be | |
415 // properly traversed and ignored during GC, so we change the meaning of the "__" | |
416 // macro within this method. | |
417 #undef __ | |
418 #define __ masm-> | |
419 | |
420 address generate_throw_exception(const char* name, address runtime_entry, bool restore_saved_exception_pc) { | |
421 #ifdef ASSERT | |
422 int insts_size = VerifyThread ? 1 * K : 600; | |
423 #else | |
424 int insts_size = VerifyThread ? 1 * K : 256; | |
425 #endif /* ASSERT */ | |
426 int locs_size = 32; | |
427 | |
428 CodeBuffer code(name, insts_size, locs_size); | |
429 MacroAssembler* masm = new MacroAssembler(&code); | |
430 | |
431 __ verify_thread(); | |
432 | |
433 // This is an inlined and slightly modified version of call_VM | |
434 // which has the ability to fetch the return PC out of thread-local storage | |
435 __ assert_not_delayed(); | |
436 | |
437 // Note that we always push a frame because on the SPARC | |
438 // architecture, for all of our implicit exception kinds at call | |
439 // sites, the implicit exception is taken before the callee frame | |
440 // is pushed. | |
441 __ save_frame(0); | |
442 | |
443 int frame_complete = __ offset(); | |
444 | |
445 if (restore_saved_exception_pc) { | |
727 | 446 __ ld_ptr(G2_thread, JavaThread::saved_exception_pc_offset(), I7); |
0 | 447 __ sub(I7, frame::pc_return_offset, I7); |
448 } | |
449 | |
450 // Note that we always have a runtime stub frame on the top of stack by this point | |
451 Register last_java_sp = SP; | |
452 // 64-bit last_java_sp is biased! | |
453 __ set_last_Java_frame(last_java_sp, G0); | |
454 if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early | |
455 __ save_thread(noreg); | |
456 // do the call | |
457 BLOCK_COMMENT("call runtime_entry"); | |
458 __ call(runtime_entry, relocInfo::runtime_call_type); | |
459 if (!VerifyThread) | |
460 __ delayed()->mov(G2_thread, O0); // pass thread as first argument | |
461 else | |
462 __ delayed()->nop(); // (thread already passed) | |
463 __ restore_thread(noreg); | |
464 __ reset_last_Java_frame(); | |
465 | |
466 // check for pending exceptions. use Gtemp as scratch register. | |
467 #ifdef ASSERT | |
468 Label L; | |
469 | |
727 | 470 Address exception_addr(G2_thread, Thread::pending_exception_offset()); |
0 | 471 Register scratch_reg = Gtemp; |
472 __ ld_ptr(exception_addr, scratch_reg); | |
473 __ br_notnull(scratch_reg, false, Assembler::pt, L); | |
474 __ delayed()->nop(); | |
475 __ should_not_reach_here(); | |
476 __ bind(L); | |
477 #endif // ASSERT | |
478 BLOCK_COMMENT("call forward_exception_entry"); | |
479 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); | |
480 // we use O7 linkage so that forward_exception_entry has the issuing PC | |
481 __ delayed()->restore(); | |
482 | |
483 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false); | |
484 return stub->entry_point(); | |
485 } | |
486 | |
487 #undef __ | |
488 #define __ _masm-> | |
489 | |
490 | |
491 // Generate a routine that sets all the registers so we | |
492 // can tell if the stop routine prints them correctly. | |
493 address generate_test_stop() { | |
494 StubCodeMark mark(this, "StubRoutines", "test_stop"); | |
495 address start = __ pc(); | |
496 | |
497 int i; | |
498 | |
499 __ save_frame(0); | |
500 | |
501 static jfloat zero = 0.0, one = 1.0; | |
502 | |
503 // put addr in L0, then load through L0 to F0 | |
504 __ set((intptr_t)&zero, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F0); | |
505 __ set((intptr_t)&one, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F1); // 1.0 to F1 | |
506 | |
507 // use add to put 2..18 in F2..F18 | |
508 for ( i = 2; i <= 18; ++i ) { | |
509 __ fadd( FloatRegisterImpl::S, F1, as_FloatRegister(i-1), as_FloatRegister(i)); | |
510 } | |
511 | |
512 // Now put double 2 in F16, double 18 in F18 | |
513 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F2, F16 ); | |
514 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F18, F18 ); | |
515 | |
516 // use add to put 20..32 in F20..F32 | |
517 for (i = 20; i < 32; i += 2) { | |
518 __ fadd( FloatRegisterImpl::D, F16, as_FloatRegister(i-2), as_FloatRegister(i)); | |
519 } | |
520 | |
521 // put 0..7 in i's, 8..15 in l's, 16..23 in o's, 24..31 in g's | |
522 for ( i = 0; i < 8; ++i ) { | |
523 if (i < 6) { | |
524 __ set( i, as_iRegister(i)); | |
525 __ set(16 + i, as_oRegister(i)); | |
526 __ set(24 + i, as_gRegister(i)); | |
527 } | |
528 __ set( 8 + i, as_lRegister(i)); | |
529 } | |
530 | |
531 __ stop("testing stop"); | |
532 | |
533 | |
534 __ ret(); | |
535 __ delayed()->restore(); | |
536 | |
537 return start; | |
538 } | |
539 | |
540 | |
541 address generate_stop_subroutine() { | |
542 StubCodeMark mark(this, "StubRoutines", "stop_subroutine"); | |
543 address start = __ pc(); | |
544 | |
545 __ stop_subroutine(); | |
546 | |
547 return start; | |
548 } | |
549 | |
550 address generate_flush_callers_register_windows() { | |
551 StubCodeMark mark(this, "StubRoutines", "flush_callers_register_windows"); | |
552 address start = __ pc(); | |
553 | |
554 __ flush_windows(); | |
555 __ retl(false); | |
556 __ delayed()->add( FP, STACK_BIAS, O0 ); | |
557 // The returned value must be a stack pointer whose register save area | |
558 // is flushed, and will stay flushed while the caller executes. | |
559 | |
560 return start; | |
561 } | |
562 | |
563 // Helper functions for v8 atomic operations. | |
564 // | |
565 void get_v8_oop_lock_ptr(Register lock_ptr_reg, Register mark_oop_reg, Register scratch_reg) { | |
566 if (mark_oop_reg == noreg) { | |
567 address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(); | |
568 __ set((intptr_t)lock_ptr, lock_ptr_reg); | |
569 } else { | |
570 assert(scratch_reg != noreg, "just checking"); | |
571 address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache; | |
572 __ set((intptr_t)lock_ptr, lock_ptr_reg); | |
573 __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg); | |
574 __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg); | |
575 } | |
576 } | |
577 | |
578 void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) { | |
579 | |
580 get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg); | |
581 __ set(StubRoutines::Sparc::locked, lock_reg); | |
582 // Initialize yield counter | |
583 __ mov(G0,yield_reg); | |
584 | |
585 __ BIND(retry); | |
586 __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount); | |
587 __ br(Assembler::less, false, Assembler::pt, dontyield); | |
588 __ delayed()->nop(); | |
589 | |
590 // This code can only be called from inside the VM, this | |
591 // stub is only invoked from Atomic::add(). We do not | |
592 // want to use call_VM, because _last_java_sp and such | |
593 // must already be set. | |
594 // | |
595 // Save the regs and make space for a C call | |
596 __ save(SP, -96, SP); | |
597 __ save_all_globals_into_locals(); | |
598 BLOCK_COMMENT("call os::naked_sleep"); | |
599 __ call(CAST_FROM_FN_PTR(address, os::naked_sleep)); | |
600 __ delayed()->nop(); | |
601 __ restore_globals_from_locals(); | |
602 __ restore(); | |
603 // reset the counter | |
604 __ mov(G0,yield_reg); | |
605 | |
606 __ BIND(dontyield); | |
607 | |
608 // try to get lock | |
609 __ swap(lock_ptr_reg, 0, lock_reg); | |
610 | |
611 // did we get the lock? | |
612 __ cmp(lock_reg, StubRoutines::Sparc::unlocked); | |
613 __ br(Assembler::notEqual, true, Assembler::pn, retry); | |
614 __ delayed()->add(yield_reg,1,yield_reg); | |
615 | |
616 // yes, got lock. do the operation here. | |
617 } | |
618 | |
619 void generate_v8_lock_epilogue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) { | |
620 __ st(lock_reg, lock_ptr_reg, 0); // unlock | |
621 } | |
622 | |
623 // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest). | |
624 // | |
625 // Arguments : | |
626 // | |
627 // exchange_value: O0 | |
628 // dest: O1 | |
629 // | |
630 // Results: | |
631 // | |
632 // O0: the value previously stored in dest | |
633 // | |
634 address generate_atomic_xchg() { | |
635 StubCodeMark mark(this, "StubRoutines", "atomic_xchg"); | |
636 address start = __ pc(); | |
637 | |
638 if (UseCASForSwap) { | |
639 // Use CAS instead of swap, just in case the MP hardware | |
640 // prefers to work with just one kind of synch. instruction. | |
641 Label retry; | |
642 __ BIND(retry); | |
643 __ mov(O0, O3); // scratch copy of exchange value | |
644 __ ld(O1, 0, O2); // observe the previous value | |
645 // try to replace O2 with O3 | |
646 __ cas_under_lock(O1, O2, O3, | |
647 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); | |
648 __ cmp(O2, O3); | |
649 __ br(Assembler::notEqual, false, Assembler::pn, retry); | |
650 __ delayed()->nop(); | |
651 | |
652 __ retl(false); | |
653 __ delayed()->mov(O2, O0); // report previous value to caller | |
654 | |
655 } else { | |
656 if (VM_Version::v9_instructions_work()) { | |
657 __ retl(false); | |
658 __ delayed()->swap(O1, 0, O0); | |
659 } else { | |
660 const Register& lock_reg = O2; | |
661 const Register& lock_ptr_reg = O3; | |
662 const Register& yield_reg = O4; | |
663 | |
664 Label retry; | |
665 Label dontyield; | |
666 | |
667 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); | |
668 // got the lock, do the swap | |
669 __ swap(O1, 0, O0); | |
670 | |
671 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); | |
672 __ retl(false); | |
673 __ delayed()->nop(); | |
674 } | |
675 } | |
676 | |
677 return start; | |
678 } | |
679 | |
680 | |
681 // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value) | |
682 // | |
683 // Arguments : | |
684 // | |
685 // exchange_value: O0 | |
686 // dest: O1 | |
687 // compare_value: O2 | |
688 // | |
689 // Results: | |
690 // | |
691 // O0: the value previously stored in dest | |
692 // | |
693 // Overwrites (v8): O3,O4,O5 | |
694 // | |
695 address generate_atomic_cmpxchg() { | |
696 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg"); | |
697 address start = __ pc(); | |
698 | |
699 // cmpxchg(dest, compare_value, exchange_value) | |
700 __ cas_under_lock(O1, O2, O0, | |
701 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); | |
702 __ retl(false); | |
703 __ delayed()->nop(); | |
704 | |
705 return start; | |
706 } | |
707 | |
708 // Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value) | |
709 // | |
710 // Arguments : | |
711 // | |
712 // exchange_value: O1:O0 | |
713 // dest: O2 | |
714 // compare_value: O4:O3 | |
715 // | |
716 // Results: | |
717 // | |
718 // O1:O0: the value previously stored in dest | |
719 // | |
720 // This only works on V9, on V8 we don't generate any | |
721 // code and just return NULL. | |
722 // | |
723 // Overwrites: G1,G2,G3 | |
724 // | |
725 address generate_atomic_cmpxchg_long() { | |
726 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long"); | |
727 address start = __ pc(); | |
728 | |
729 if (!VM_Version::supports_cx8()) | |
730 return NULL;; | |
731 __ sllx(O0, 32, O0); | |
732 __ srl(O1, 0, O1); | |
733 __ or3(O0,O1,O0); // O0 holds 64-bit value from compare_value | |
734 __ sllx(O3, 32, O3); | |
735 __ srl(O4, 0, O4); | |
736 __ or3(O3,O4,O3); // O3 holds 64-bit value from exchange_value | |
737 __ casx(O2, O3, O0); | |
738 __ srl(O0, 0, O1); // unpacked return value in O1:O0 | |
739 __ retl(false); | |
740 __ delayed()->srlx(O0, 32, O0); | |
741 | |
742 return start; | |
743 } | |
744 | |
745 | |
746 // Support for jint Atomic::add(jint add_value, volatile jint* dest). | |
747 // | |
748 // Arguments : | |
749 // | |
750 // add_value: O0 (e.g., +1 or -1) | |
751 // dest: O1 | |
752 // | |
753 // Results: | |
754 // | |
755 // O0: the new value stored in dest | |
756 // | |
757 // Overwrites (v9): O3 | |
758 // Overwrites (v8): O3,O4,O5 | |
759 // | |
760 address generate_atomic_add() { | |
761 StubCodeMark mark(this, "StubRoutines", "atomic_add"); | |
762 address start = __ pc(); | |
763 __ BIND(_atomic_add_stub); | |
764 | |
765 if (VM_Version::v9_instructions_work()) { | |
766 Label(retry); | |
767 __ BIND(retry); | |
768 | |
769 __ lduw(O1, 0, O2); | |
770 __ add(O0, O2, O3); | |
771 __ cas(O1, O2, O3); | |
772 __ cmp( O2, O3); | |
773 __ br(Assembler::notEqual, false, Assembler::pn, retry); | |
774 __ delayed()->nop(); | |
775 __ retl(false); | |
776 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3 | |
777 } else { | |
778 const Register& lock_reg = O2; | |
779 const Register& lock_ptr_reg = O3; | |
780 const Register& value_reg = O4; | |
781 const Register& yield_reg = O5; | |
782 | |
783 Label(retry); | |
784 Label(dontyield); | |
785 | |
786 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); | |
787 // got lock, do the increment | |
788 __ ld(O1, 0, value_reg); | |
789 __ add(O0, value_reg, value_reg); | |
790 __ st(value_reg, O1, 0); | |
791 | |
792 // %%% only for RMO and PSO | |
793 __ membar(Assembler::StoreStore); | |
794 | |
795 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield); | |
796 | |
797 __ retl(false); | |
798 __ delayed()->mov(value_reg, O0); | |
799 } | |
800 | |
801 return start; | |
802 } | |
803 Label _atomic_add_stub; // called from other stubs | |
804 | |
805 | |
806 //------------------------------------------------------------------------------------------------------------------------ | |
807 // The following routine generates a subroutine to throw an asynchronous | |
808 // UnknownError when an unsafe access gets a fault that could not be | |
809 // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.) | |
810 // | |
811 // Arguments : | |
812 // | |
813 // trapping PC: O7 | |
814 // | |
815 // Results: | |
816 // posts an asynchronous exception, skips the trapping instruction | |
817 // | |
818 | |
819 address generate_handler_for_unsafe_access() { | |
820 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); | |
821 address start = __ pc(); | |
822 | |
823 const int preserve_register_words = (64 * 2); | |
727 | 824 Address preserve_addr(FP, (-preserve_register_words * wordSize) + STACK_BIAS); |
0 | 825 |
826 Register Lthread = L7_thread_cache; | |
827 int i; | |
828 | |
829 __ save_frame(0); | |
830 __ mov(G1, L1); | |
831 __ mov(G2, L2); | |
832 __ mov(G3, L3); | |
833 __ mov(G4, L4); | |
834 __ mov(G5, L5); | |
835 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) { | |
836 __ stf(FloatRegisterImpl::D, as_FloatRegister(i), preserve_addr, i * wordSize); | |
837 } | |
838 | |
839 address entry_point = CAST_FROM_FN_PTR(address, handle_unsafe_access); | |
840 BLOCK_COMMENT("call handle_unsafe_access"); | |
841 __ call(entry_point, relocInfo::runtime_call_type); | |
842 __ delayed()->nop(); | |
843 | |
844 __ mov(L1, G1); | |
845 __ mov(L2, G2); | |
846 __ mov(L3, G3); | |
847 __ mov(L4, G4); | |
848 __ mov(L5, G5); | |
849 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) { | |
850 __ ldf(FloatRegisterImpl::D, preserve_addr, as_FloatRegister(i), i * wordSize); | |
851 } | |
852 | |
853 __ verify_thread(); | |
854 | |
855 __ jmp(O0, 0); | |
856 __ delayed()->restore(); | |
857 | |
858 return start; | |
859 } | |
860 | |
861 | |
862 // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super ); | |
863 // Arguments : | |
864 // | |
865 // ret : O0, returned | |
866 // icc/xcc: set as O0 (depending on wordSize) | |
867 // sub : O1, argument, not changed | |
868 // super: O2, argument, not changed | |
869 // raddr: O7, blown by call | |
870 address generate_partial_subtype_check() { | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
871 __ align(CodeEntryAlignment); |
0 | 872 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); |
873 address start = __ pc(); | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
874 Label miss; |
0 | 875 |
876 #if defined(COMPILER2) && !defined(_LP64) | |
877 // Do not use a 'save' because it blows the 64-bit O registers. | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
878 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned) |
0 | 879 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize); |
880 __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize); | |
881 __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize); | |
882 __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize); | |
883 Register Rret = O0; | |
884 Register Rsub = O1; | |
885 Register Rsuper = O2; | |
886 #else | |
887 __ save_frame(0); | |
888 Register Rret = I0; | |
889 Register Rsub = I1; | |
890 Register Rsuper = I2; | |
891 #endif | |
892 | |
893 Register L0_ary_len = L0; | |
894 Register L1_ary_ptr = L1; | |
895 Register L2_super = L2; | |
896 Register L3_index = L3; | |
897 | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
898 __ check_klass_subtype_slow_path(Rsub, Rsuper, |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
899 L0, L1, L2, L3, |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
900 NULL, &miss); |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
901 |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
902 // Match falls through here. |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
903 __ addcc(G0,0,Rret); // set Z flags, Z result |
0 | 904 |
905 #if defined(COMPILER2) && !defined(_LP64) | |
906 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); | |
907 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); | |
908 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); | |
909 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3); | |
910 __ retl(); // Result in Rret is zero; flags set to Z | |
911 __ delayed()->add(SP,4*wordSize,SP); | |
912 #else | |
913 __ ret(); // Result in Rret is zero; flags set to Z | |
914 __ delayed()->restore(); | |
915 #endif | |
916 | |
917 __ BIND(miss); | |
918 __ addcc(G0,1,Rret); // set NZ flags, NZ result | |
919 | |
920 #if defined(COMPILER2) && !defined(_LP64) | |
921 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); | |
922 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); | |
923 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); | |
924 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3); | |
925 __ retl(); // Result in Rret is != 0; flags set to NZ | |
926 __ delayed()->add(SP,4*wordSize,SP); | |
927 #else | |
928 __ ret(); // Result in Rret is != 0; flags set to NZ | |
929 __ delayed()->restore(); | |
930 #endif | |
931 | |
932 return start; | |
933 } | |
934 | |
935 | |
936 // Called from MacroAssembler::verify_oop | |
937 // | |
938 address generate_verify_oop_subroutine() { | |
939 StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); | |
940 | |
941 address start = __ pc(); | |
942 | |
943 __ verify_oop_subroutine(); | |
944 | |
945 return start; | |
946 } | |
947 | |
948 static address disjoint_byte_copy_entry; | |
949 static address disjoint_short_copy_entry; | |
950 static address disjoint_int_copy_entry; | |
951 static address disjoint_long_copy_entry; | |
952 static address disjoint_oop_copy_entry; | |
953 | |
954 static address byte_copy_entry; | |
955 static address short_copy_entry; | |
956 static address int_copy_entry; | |
957 static address long_copy_entry; | |
958 static address oop_copy_entry; | |
959 | |
960 static address checkcast_copy_entry; | |
961 | |
962 // | |
963 // Verify that a register contains clean 32-bits positive value | |
964 // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax). | |
965 // | |
966 // Input: | |
967 // Rint - 32-bits value | |
968 // Rtmp - scratch | |
969 // | |
970 void assert_clean_int(Register Rint, Register Rtmp) { | |
971 #if defined(ASSERT) && defined(_LP64) | |
972 __ signx(Rint, Rtmp); | |
973 __ cmp(Rint, Rtmp); | |
974 __ breakpoint_trap(Assembler::notEqual, Assembler::xcc); | |
975 #endif | |
976 } | |
977 | |
978 // | |
979 // Generate overlap test for array copy stubs | |
980 // | |
981 // Input: | |
982 // O0 - array1 | |
983 // O1 - array2 | |
984 // O2 - element count | |
985 // | |
986 // Kills temps: O3, O4 | |
987 // | |
988 void array_overlap_test(address no_overlap_target, int log2_elem_size) { | |
989 assert(no_overlap_target != NULL, "must be generated"); | |
990 array_overlap_test(no_overlap_target, NULL, log2_elem_size); | |
991 } | |
992 void array_overlap_test(Label& L_no_overlap, int log2_elem_size) { | |
993 array_overlap_test(NULL, &L_no_overlap, log2_elem_size); | |
994 } | |
995 void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) { | |
996 const Register from = O0; | |
997 const Register to = O1; | |
998 const Register count = O2; | |
999 const Register to_from = O3; // to - from | |
1000 const Register byte_count = O4; // count << log2_elem_size | |
1001 | |
1002 __ subcc(to, from, to_from); | |
1003 __ sll_ptr(count, log2_elem_size, byte_count); | |
1004 if (NOLp == NULL) | |
1005 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, no_overlap_target); | |
1006 else | |
1007 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, (*NOLp)); | |
1008 __ delayed()->cmp(to_from, byte_count); | |
1009 if (NOLp == NULL) | |
1655
e7ec8cd4dd8a
6962569: assembler_sparc.cpp:1969: assert(false) failed: error
tonyp
parents:
1579
diff
changeset
|
1010 __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, no_overlap_target); |
0 | 1011 else |
1655
e7ec8cd4dd8a
6962569: assembler_sparc.cpp:1969: assert(false) failed: error
tonyp
parents:
1579
diff
changeset
|
1012 __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, (*NOLp)); |
0 | 1013 __ delayed()->nop(); |
1014 } | |
1015 | |
1016 // | |
1017 // Generate pre-write barrier for array. | |
1018 // | |
1019 // Input: | |
1020 // addr - register containing starting address | |
1021 // count - register containing element count | |
1022 // tmp - scratch register | |
1023 // | |
1024 // The input registers are overwritten. | |
1025 // | |
1026 void gen_write_ref_array_pre_barrier(Register addr, Register count) { | |
1027 BarrierSet* bs = Universe::heap()->barrier_set(); | |
1028 if (bs->has_write_ref_pre_barrier()) { | |
1029 assert(bs->has_write_ref_array_pre_opt(), | |
1030 "Else unsupported barrier set."); | |
1031 | |
1032 __ save_frame(0); | |
1033 // Save the necessary global regs... will be used after. | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1034 if (addr->is_global()) { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1035 __ mov(addr, L0); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1036 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1037 if (count->is_global()) { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1038 __ mov(count, L1); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1039 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1040 __ mov(addr->after_save(), O0); |
0 | 1041 // Get the count into O1 |
1042 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)); | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1043 __ delayed()->mov(count->after_save(), O1); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1044 if (addr->is_global()) { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1045 __ mov(L0, addr); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1046 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1047 if (count->is_global()) { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1048 __ mov(L1, count); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1049 } |
0 | 1050 __ restore(); |
1051 } | |
1052 } | |
1053 // | |
1054 // Generate post-write barrier for array. | |
1055 // | |
1056 // Input: | |
1057 // addr - register containing starting address | |
1058 // count - register containing element count | |
1059 // tmp - scratch register | |
1060 // | |
1061 // The input registers are overwritten. | |
1062 // | |
1063 void gen_write_ref_array_post_barrier(Register addr, Register count, | |
1064 Register tmp) { | |
1065 BarrierSet* bs = Universe::heap()->barrier_set(); | |
1066 | |
1067 switch (bs->kind()) { | |
1068 case BarrierSet::G1SATBCT: | |
1069 case BarrierSet::G1SATBCTLogging: | |
1070 { | |
1071 // Get some new fresh output registers. | |
1072 __ save_frame(0); | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1073 __ mov(addr->after_save(), O0); |
0 | 1074 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
1075 __ delayed()->mov(count->after_save(), O1); |
0 | 1076 __ restore(); |
1077 } | |
1078 break; | |
1079 case BarrierSet::CardTableModRef: | |
1080 case BarrierSet::CardTableExtension: | |
1081 { | |
1082 CardTableModRefBS* ct = (CardTableModRefBS*)bs; | |
1083 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); | |
1084 assert_different_registers(addr, count, tmp); | |
1085 | |
1086 Label L_loop; | |
1087 | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
1088 __ sll_ptr(count, LogBytesPerHeapOop, count); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
1089 __ sub(count, BytesPerHeapOop, count); |
0 | 1090 __ add(count, addr, count); |
1091 // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) | |
1092 __ srl_ptr(addr, CardTableModRefBS::card_shift, addr); | |
1093 __ srl_ptr(count, CardTableModRefBS::card_shift, count); | |
1094 __ sub(count, addr, count); | |
727 | 1095 AddressLiteral rs(ct->byte_map_base); |
1096 __ set(rs, tmp); | |
0 | 1097 __ BIND(L_loop); |
727 | 1098 __ stb(G0, tmp, addr); |
0 | 1099 __ subcc(count, 1, count); |
1100 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); | |
1101 __ delayed()->add(addr, 1, addr); | |
727 | 1102 } |
0 | 1103 break; |
1104 case BarrierSet::ModRef: | |
1105 break; | |
727 | 1106 default: |
0 | 1107 ShouldNotReachHere(); |
1108 } | |
1109 } | |
1110 | |
1111 | |
1112 // Copy big chunks forward with shift | |
1113 // | |
1114 // Inputs: | |
1115 // from - source arrays | |
1116 // to - destination array aligned to 8-bytes | |
1117 // count - elements count to copy >= the count equivalent to 16 bytes | |
1118 // count_dec - elements count's decrement equivalent to 16 bytes | |
1119 // L_copy_bytes - copy exit label | |
1120 // | |
1121 void copy_16_bytes_forward_with_shift(Register from, Register to, | |
1122 Register count, int count_dec, Label& L_copy_bytes) { | |
1123 Label L_loop, L_aligned_copy, L_copy_last_bytes; | |
1124 | |
1125 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy | |
1126 __ andcc(from, 7, G1); // misaligned bytes | |
1127 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); | |
1128 __ delayed()->nop(); | |
1129 | |
1130 const Register left_shift = G1; // left shift bit counter | |
1131 const Register right_shift = G5; // right shift bit counter | |
1132 | |
1133 __ sll(G1, LogBitsPerByte, left_shift); | |
1134 __ mov(64, right_shift); | |
1135 __ sub(right_shift, left_shift, right_shift); | |
1136 | |
1137 // | |
1138 // Load 2 aligned 8-bytes chunks and use one from previous iteration | |
1139 // to form 2 aligned 8-bytes chunks to store. | |
1140 // | |
1141 __ deccc(count, count_dec); // Pre-decrement 'count' | |
1142 __ andn(from, 7, from); // Align address | |
1143 __ ldx(from, 0, O3); | |
1144 __ inc(from, 8); | |
1365 | 1145 __ align(OptoLoopAlignment); |
0 | 1146 __ BIND(L_loop); |
1147 __ ldx(from, 0, O4); | |
1148 __ deccc(count, count_dec); // Can we do next iteration after this one? | |
1149 __ ldx(from, 8, G4); | |
1150 __ inc(to, 16); | |
1151 __ inc(from, 16); | |
1152 __ sllx(O3, left_shift, O3); | |
1153 __ srlx(O4, right_shift, G3); | |
1154 __ bset(G3, O3); | |
1155 __ stx(O3, to, -16); | |
1156 __ sllx(O4, left_shift, O4); | |
1157 __ srlx(G4, right_shift, G3); | |
1158 __ bset(G3, O4); | |
1159 __ stx(O4, to, -8); | |
1160 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); | |
1161 __ delayed()->mov(G4, O3); | |
1162 | |
1163 __ inccc(count, count_dec>>1 ); // + 8 bytes | |
1164 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); | |
1165 __ delayed()->inc(count, count_dec>>1); // restore 'count' | |
1166 | |
1167 // copy 8 bytes, part of them already loaded in O3 | |
1168 __ ldx(from, 0, O4); | |
1169 __ inc(to, 8); | |
1170 __ inc(from, 8); | |
1171 __ sllx(O3, left_shift, O3); | |
1172 __ srlx(O4, right_shift, G3); | |
1173 __ bset(O3, G3); | |
1174 __ stx(G3, to, -8); | |
1175 | |
1176 __ BIND(L_copy_last_bytes); | |
1177 __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes | |
1178 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); | |
1179 __ delayed()->sub(from, right_shift, from); // restore address | |
1180 | |
1181 __ BIND(L_aligned_copy); | |
1182 } | |
1183 | |
1184 // Copy big chunks backward with shift | |
1185 // | |
1186 // Inputs: | |
1187 // end_from - source arrays end address | |
1188 // end_to - destination array end address aligned to 8-bytes | |
1189 // count - elements count to copy >= the count equivalent to 16 bytes | |
1190 // count_dec - elements count's decrement equivalent to 16 bytes | |
1191 // L_aligned_copy - aligned copy exit label | |
1192 // L_copy_bytes - copy exit label | |
1193 // | |
1194 void copy_16_bytes_backward_with_shift(Register end_from, Register end_to, | |
1195 Register count, int count_dec, | |
1196 Label& L_aligned_copy, Label& L_copy_bytes) { | |
1197 Label L_loop, L_copy_last_bytes; | |
1198 | |
1199 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy | |
1200 __ andcc(end_from, 7, G1); // misaligned bytes | |
1201 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); | |
1202 __ delayed()->deccc(count, count_dec); // Pre-decrement 'count' | |
1203 | |
1204 const Register left_shift = G1; // left shift bit counter | |
1205 const Register right_shift = G5; // right shift bit counter | |
1206 | |
1207 __ sll(G1, LogBitsPerByte, left_shift); | |
1208 __ mov(64, right_shift); | |
1209 __ sub(right_shift, left_shift, right_shift); | |
1210 | |
1211 // | |
1212 // Load 2 aligned 8-bytes chunks and use one from previous iteration | |
1213 // to form 2 aligned 8-bytes chunks to store. | |
1214 // | |
1215 __ andn(end_from, 7, end_from); // Align address | |
1216 __ ldx(end_from, 0, O3); | |
1365 | 1217 __ align(OptoLoopAlignment); |
0 | 1218 __ BIND(L_loop); |
1219 __ ldx(end_from, -8, O4); | |
1220 __ deccc(count, count_dec); // Can we do next iteration after this one? | |
1221 __ ldx(end_from, -16, G4); | |
1222 __ dec(end_to, 16); | |
1223 __ dec(end_from, 16); | |
1224 __ srlx(O3, right_shift, O3); | |
1225 __ sllx(O4, left_shift, G3); | |
1226 __ bset(G3, O3); | |
1227 __ stx(O3, end_to, 8); | |
1228 __ srlx(O4, right_shift, O4); | |
1229 __ sllx(G4, left_shift, G3); | |
1230 __ bset(G3, O4); | |
1231 __ stx(O4, end_to, 0); | |
1232 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); | |
1233 __ delayed()->mov(G4, O3); | |
1234 | |
1235 __ inccc(count, count_dec>>1 ); // + 8 bytes | |
1236 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); | |
1237 __ delayed()->inc(count, count_dec>>1); // restore 'count' | |
1238 | |
1239 // copy 8 bytes, part of them already loaded in O3 | |
1240 __ ldx(end_from, -8, O4); | |
1241 __ dec(end_to, 8); | |
1242 __ dec(end_from, 8); | |
1243 __ srlx(O3, right_shift, O3); | |
1244 __ sllx(O4, left_shift, G3); | |
1245 __ bset(O3, G3); | |
1246 __ stx(G3, end_to, 0); | |
1247 | |
1248 __ BIND(L_copy_last_bytes); | |
1249 __ srl(left_shift, LogBitsPerByte, left_shift); // misaligned bytes | |
1250 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); | |
1251 __ delayed()->add(end_from, left_shift, end_from); // restore address | |
1252 } | |
1253 | |
1254 // | |
1255 // Generate stub for disjoint byte copy. If "aligned" is true, the | |
1256 // "from" and "to" addresses are assumed to be heapword aligned. | |
1257 // | |
1258 // Arguments for generated stub: | |
1259 // from: O0 | |
1260 // to: O1 | |
1261 // count: O2 treated as signed | |
1262 // | |
1263 address generate_disjoint_byte_copy(bool aligned, const char * name) { | |
1264 __ align(CodeEntryAlignment); | |
1265 StubCodeMark mark(this, "StubRoutines", name); | |
1266 address start = __ pc(); | |
1267 | |
1268 Label L_skip_alignment, L_align; | |
1269 Label L_copy_byte, L_copy_byte_loop, L_exit; | |
1270 | |
1271 const Register from = O0; // source array address | |
1272 const Register to = O1; // destination array address | |
1273 const Register count = O2; // elements count | |
1274 const Register offset = O5; // offset from start of arrays | |
1275 // O3, O4, G3, G4 are used as temp registers | |
1276 | |
1277 assert_clean_int(count, O3); // Make sure 'count' is clean int. | |
1278 | |
1279 if (!aligned) disjoint_byte_copy_entry = __ pc(); | |
1280 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |
1281 if (!aligned) BLOCK_COMMENT("Entry:"); | |
1282 | |
1283 // for short arrays, just do single element copy | |
1284 __ cmp(count, 23); // 16 + 7 | |
1285 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); | |
1286 __ delayed()->mov(G0, offset); | |
1287 | |
1288 if (aligned) { | |
1289 // 'aligned' == true when it is known statically during compilation | |
1290 // of this arraycopy call site that both 'from' and 'to' addresses | |
1291 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). | |
1292 // | |
1293 // Aligned arrays have 4 bytes alignment in 32-bits VM | |
1294 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM | |
1295 // | |
1296 #ifndef _LP64 | |
1297 // copy a 4-bytes word if necessary to align 'to' to 8 bytes | |
1298 __ andcc(to, 7, G0); | |
1299 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment); | |
1300 __ delayed()->ld(from, 0, O3); | |
1301 __ inc(from, 4); | |
1302 __ inc(to, 4); | |
1303 __ dec(count, 4); | |
1304 __ st(O3, to, -4); | |
1305 __ BIND(L_skip_alignment); | |
1306 #endif | |
1307 } else { | |
1308 // copy bytes to align 'to' on 8 byte boundary | |
1309 __ andcc(to, 7, G1); // misaligned bytes | |
1310 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); | |
1311 __ delayed()->neg(G1); | |
1312 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment | |
1313 __ sub(count, G1, count); | |
1314 __ BIND(L_align); | |
1315 __ ldub(from, 0, O3); | |
1316 __ deccc(G1); | |
1317 __ inc(from); | |
1318 __ stb(O3, to, 0); | |
1319 __ br(Assembler::notZero, false, Assembler::pt, L_align); | |
1320 __ delayed()->inc(to); | |
1321 __ BIND(L_skip_alignment); | |
1322 } | |
1323 #ifdef _LP64 | |
1324 if (!aligned) | |
1325 #endif | |
1326 { | |
1327 // Copy with shift 16 bytes per iteration if arrays do not have | |
1328 // the same alignment mod 8, otherwise fall through to the next | |
1329 // code for aligned copy. | |
1330 // The compare above (count >= 23) guarantes 'count' >= 16 bytes. | |
1331 // Also jump over aligned copy after the copy with shift completed. | |
1332 | |
1333 copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte); | |
1334 } | |
1335 | |
1336 // Both array are 8 bytes aligned, copy 16 bytes at a time | |
1337 __ and3(count, 7, G4); // Save count | |
1338 __ srl(count, 3, count); | |
1339 generate_disjoint_long_copy_core(aligned); | |
1340 __ mov(G4, count); // Restore count | |
1341 | |
1342 // copy tailing bytes | |
1343 __ BIND(L_copy_byte); | |
1344 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); | |
1345 __ delayed()->nop(); | |
1365 | 1346 __ align(OptoLoopAlignment); |
0 | 1347 __ BIND(L_copy_byte_loop); |
1348 __ ldub(from, offset, O3); | |
1349 __ deccc(count); | |
1350 __ stb(O3, to, offset); | |
1351 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop); | |
1352 __ delayed()->inc(offset); | |
1353 | |
1354 __ BIND(L_exit); | |
1355 // O3, O4 are used as temp registers | |
1356 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); | |
1357 __ retl(); | |
1358 __ delayed()->mov(G0, O0); // return 0 | |
1359 return start; | |
1360 } | |
1361 | |
1362 // | |
1363 // Generate stub for conjoint byte copy. If "aligned" is true, the | |
1364 // "from" and "to" addresses are assumed to be heapword aligned. | |
1365 // | |
1366 // Arguments for generated stub: | |
1367 // from: O0 | |
1368 // to: O1 | |
1369 // count: O2 treated as signed | |
1370 // | |
1371 address generate_conjoint_byte_copy(bool aligned, const char * name) { | |
1372 // Do reverse copy. | |
1373 | |
1374 __ align(CodeEntryAlignment); | |
1375 StubCodeMark mark(this, "StubRoutines", name); | |
1376 address start = __ pc(); | |
1377 address nooverlap_target = aligned ? | |
1378 StubRoutines::arrayof_jbyte_disjoint_arraycopy() : | |
1379 disjoint_byte_copy_entry; | |
1380 | |
1381 Label L_skip_alignment, L_align, L_aligned_copy; | |
1382 Label L_copy_byte, L_copy_byte_loop, L_exit; | |
1383 | |
1384 const Register from = O0; // source array address | |
1385 const Register to = O1; // destination array address | |
1386 const Register count = O2; // elements count | |
1387 const Register end_from = from; // source array end address | |
1388 const Register end_to = to; // destination array end address | |
1389 | |
1390 assert_clean_int(count, O3); // Make sure 'count' is clean int. | |
1391 | |
1392 if (!aligned) byte_copy_entry = __ pc(); | |
1393 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |
1394 if (!aligned) BLOCK_COMMENT("Entry:"); | |
1395 | |
1396 array_overlap_test(nooverlap_target, 0); | |
1397 | |
1398 __ add(to, count, end_to); // offset after last copied element | |
1399 | |
1400 // for short arrays, just do single element copy | |
1401 __ cmp(count, 23); // 16 + 7 | |
1402 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); | |
1403 __ delayed()->add(from, count, end_from); | |
1404 | |
1405 { | |
1406 // Align end of arrays since they could be not aligned even | |
1407 // when arrays itself are aligned. | |
1408 | |
1409 // copy bytes to align 'end_to' on 8 byte boundary | |
1410 __ andcc(end_to, 7, G1); // misaligned bytes | |
1411 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); | |
1412 __ delayed()->nop(); | |
1413 __ sub(count, G1, count); | |
1414 __ BIND(L_align); | |
1415 __ dec(end_from); | |
1416 __ dec(end_to); | |
1417 __ ldub(end_from, 0, O3); | |
1418 __ deccc(G1); | |
1419 __ brx(Assembler::notZero, false, Assembler::pt, L_align); | |
1420 __ delayed()->stb(O3, end_to, 0); | |
1421 __ BIND(L_skip_alignment); | |
1422 } | |
1423 #ifdef _LP64 | |
1424 if (aligned) { | |
1425 // Both arrays are aligned to 8-bytes in 64-bits VM. | |
1426 // The 'count' is decremented in copy_16_bytes_backward_with_shift() | |
1427 // in unaligned case. | |
1428 __ dec(count, 16); | |
1429 } else | |
1430 #endif | |
1431 { | |
1432 // Copy with shift 16 bytes per iteration if arrays do not have | |
1433 // the same alignment mod 8, otherwise jump to the next | |
1434 // code for aligned copy (and substracting 16 from 'count' before jump). | |
1435 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. | |
1436 // Also jump over aligned copy after the copy with shift completed. | |
1437 | |
1438 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, | |
1439 L_aligned_copy, L_copy_byte); | |
1440 } | |
1441 // copy 4 elements (16 bytes) at a time | |
1365 | 1442 __ align(OptoLoopAlignment); |
0 | 1443 __ BIND(L_aligned_copy); |
1444 __ dec(end_from, 16); | |
1445 __ ldx(end_from, 8, O3); | |
1446 __ ldx(end_from, 0, O4); | |
1447 __ dec(end_to, 16); | |
1448 __ deccc(count, 16); | |
1449 __ stx(O3, end_to, 8); | |
1450 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); | |
1451 __ delayed()->stx(O4, end_to, 0); | |
1452 __ inc(count, 16); | |
1453 | |
1454 // copy 1 element (2 bytes) at a time | |
1455 __ BIND(L_copy_byte); | |
1456 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); | |
1457 __ delayed()->nop(); | |
1365 | 1458 __ align(OptoLoopAlignment); |
0 | 1459 __ BIND(L_copy_byte_loop); |
1460 __ dec(end_from); | |
1461 __ dec(end_to); | |
1462 __ ldub(end_from, 0, O4); | |
1463 __ deccc(count); | |
1464 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop); | |
1465 __ delayed()->stb(O4, end_to, 0); | |
1466 | |
1467 __ BIND(L_exit); | |
1468 // O3, O4 are used as temp registers | |
1469 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4); | |
1470 __ retl(); | |
1471 __ delayed()->mov(G0, O0); // return 0 | |
1472 return start; | |
1473 } | |
1474 | |
1475 // | |
1476 // Generate stub for disjoint short copy. If "aligned" is true, the | |
1477 // "from" and "to" addresses are assumed to be heapword aligned. | |
1478 // | |
1479 // Arguments for generated stub: | |
1480 // from: O0 | |
1481 // to: O1 | |
1482 // count: O2 treated as signed | |
1483 // | |
1484 address generate_disjoint_short_copy(bool aligned, const char * name) { | |
1485 __ align(CodeEntryAlignment); | |
1486 StubCodeMark mark(this, "StubRoutines", name); | |
1487 address start = __ pc(); | |
1488 | |
1489 Label L_skip_alignment, L_skip_alignment2; | |
1490 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit; | |
1491 | |
1492 const Register from = O0; // source array address | |
1493 const Register to = O1; // destination array address | |
1494 const Register count = O2; // elements count | |
1495 const Register offset = O5; // offset from start of arrays | |
1496 // O3, O4, G3, G4 are used as temp registers | |
1497 | |
1498 assert_clean_int(count, O3); // Make sure 'count' is clean int. | |
1499 | |
1500 if (!aligned) disjoint_short_copy_entry = __ pc(); | |
1501 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |
1502 if (!aligned) BLOCK_COMMENT("Entry:"); | |
1503 | |
1504 // for short arrays, just do single element copy | |
1505 __ cmp(count, 11); // 8 + 3 (22 bytes) | |
1506 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); | |
1507 __ delayed()->mov(G0, offset); | |
1508 | |
1509 if (aligned) { | |
1510 // 'aligned' == true when it is known statically during compilation | |
1511 // of this arraycopy call site that both 'from' and 'to' addresses | |
1512 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). | |
1513 // | |
1514 // Aligned arrays have 4 bytes alignment in 32-bits VM | |
1515 // and 8 bytes - in 64-bits VM. | |
1516 // | |
1517 #ifndef _LP64 | |
1518 // copy a 2-elements word if necessary to align 'to' to 8 bytes | |
1519 __ andcc(to, 7, G0); | |
1520 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); | |
1521 __ delayed()->ld(from, 0, O3); | |
1522 __ inc(from, 4); | |
1523 __ inc(to, 4); | |
1524 __ dec(count, 2); | |
1525 __ st(O3, to, -4); | |
1526 __ BIND(L_skip_alignment); | |
1527 #endif | |
1528 } else { | |
1529 // copy 1 element if necessary to align 'to' on an 4 bytes | |
1530 __ andcc(to, 3, G0); | |
1531 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); | |
1532 __ delayed()->lduh(from, 0, O3); | |
1533 __ inc(from, 2); | |
1534 __ inc(to, 2); | |
1535 __ dec(count); | |
1536 __ sth(O3, to, -2); | |
1537 __ BIND(L_skip_alignment); | |
1538 | |
1539 // copy 2 elements to align 'to' on an 8 byte boundary | |
1540 __ andcc(to, 7, G0); | |
1541 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); | |
1542 __ delayed()->lduh(from, 0, O3); | |
1543 __ dec(count, 2); | |
1544 __ lduh(from, 2, O4); | |
1545 __ inc(from, 4); | |
1546 __ inc(to, 4); | |
1547 __ sth(O3, to, -4); | |
1548 __ sth(O4, to, -2); | |
1549 __ BIND(L_skip_alignment2); | |
1550 } | |
1551 #ifdef _LP64 | |
1552 if (!aligned) | |
1553 #endif | |
1554 { | |
1555 // Copy with shift 16 bytes per iteration if arrays do not have | |
1556 // the same alignment mod 8, otherwise fall through to the next | |
1557 // code for aligned copy. | |
1558 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. | |
1559 // Also jump over aligned copy after the copy with shift completed. | |
1560 | |
1561 copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes); | |
1562 } | |
1563 | |
1564 // Both array are 8 bytes aligned, copy 16 bytes at a time | |
1565 __ and3(count, 3, G4); // Save | |
1566 __ srl(count, 2, count); | |
1567 generate_disjoint_long_copy_core(aligned); | |
1568 __ mov(G4, count); // restore | |
1569 | |
1570 // copy 1 element at a time | |
1571 __ BIND(L_copy_2_bytes); | |
1572 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); | |
1573 __ delayed()->nop(); | |
1365 | 1574 __ align(OptoLoopAlignment); |
0 | 1575 __ BIND(L_copy_2_bytes_loop); |
1576 __ lduh(from, offset, O3); | |
1577 __ deccc(count); | |
1578 __ sth(O3, to, offset); | |
1579 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop); | |
1580 __ delayed()->inc(offset, 2); | |
1581 | |
1582 __ BIND(L_exit); | |
1583 // O3, O4 are used as temp registers | |
1584 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); | |
1585 __ retl(); | |
1586 __ delayed()->mov(G0, O0); // return 0 | |
1587 return start; | |
1588 } | |
1589 | |
1590 // | |
1591 // Generate stub for conjoint short copy. If "aligned" is true, the | |
1592 // "from" and "to" addresses are assumed to be heapword aligned. | |
1593 // | |
1594 // Arguments for generated stub: | |
1595 // from: O0 | |
1596 // to: O1 | |
1597 // count: O2 treated as signed | |
1598 // | |
1599 address generate_conjoint_short_copy(bool aligned, const char * name) { | |
1600 // Do reverse copy. | |
1601 | |
1602 __ align(CodeEntryAlignment); | |
1603 StubCodeMark mark(this, "StubRoutines", name); | |
1604 address start = __ pc(); | |
1605 address nooverlap_target = aligned ? | |
1606 StubRoutines::arrayof_jshort_disjoint_arraycopy() : | |
1607 disjoint_short_copy_entry; | |
1608 | |
1609 Label L_skip_alignment, L_skip_alignment2, L_aligned_copy; | |
1610 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit; | |
1611 | |
1612 const Register from = O0; // source array address | |
1613 const Register to = O1; // destination array address | |
1614 const Register count = O2; // elements count | |
1615 const Register end_from = from; // source array end address | |
1616 const Register end_to = to; // destination array end address | |
1617 | |
1618 const Register byte_count = O3; // bytes count to copy | |
1619 | |
1620 assert_clean_int(count, O3); // Make sure 'count' is clean int. | |
1621 | |
1622 if (!aligned) short_copy_entry = __ pc(); | |
1623 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |
1624 if (!aligned) BLOCK_COMMENT("Entry:"); | |
1625 | |
1626 array_overlap_test(nooverlap_target, 1); | |
1627 | |
1628 __ sllx(count, LogBytesPerShort, byte_count); | |
1629 __ add(to, byte_count, end_to); // offset after last copied element | |
1630 | |
1631 // for short arrays, just do single element copy | |
1632 __ cmp(count, 11); // 8 + 3 (22 bytes) | |
1633 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); | |
1634 __ delayed()->add(from, byte_count, end_from); | |
1635 | |
1636 { | |
1637 // Align end of arrays since they could be not aligned even | |
1638 // when arrays itself are aligned. | |
1639 | |
1640 // copy 1 element if necessary to align 'end_to' on an 4 bytes | |
1641 __ andcc(end_to, 3, G0); | |
1642 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); | |
1643 __ delayed()->lduh(end_from, -2, O3); | |
1644 __ dec(end_from, 2); | |
1645 __ dec(end_to, 2); | |
1646 __ dec(count); | |
1647 __ sth(O3, end_to, 0); | |
1648 __ BIND(L_skip_alignment); | |
1649 | |
1650 // copy 2 elements to align 'end_to' on an 8 byte boundary | |
1651 __ andcc(end_to, 7, G0); | |
1652 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); | |
1653 __ delayed()->lduh(end_from, -2, O3); | |
1654 __ dec(count, 2); | |
1655 __ lduh(end_from, -4, O4); | |
1656 __ dec(end_from, 4); | |
1657 __ dec(end_to, 4); | |
1658 __ sth(O3, end_to, 2); | |
1659 __ sth(O4, end_to, 0); | |
1660 __ BIND(L_skip_alignment2); | |
1661 } | |
1662 #ifdef _LP64 | |
1663 if (aligned) { | |
1664 // Both arrays are aligned to 8-bytes in 64-bits VM. | |
1665 // The 'count' is decremented in copy_16_bytes_backward_with_shift() | |
1666 // in unaligned case. | |
1667 __ dec(count, 8); | |
1668 } else | |
1669 #endif | |
1670 { | |
1671 // Copy with shift 16 bytes per iteration if arrays do not have | |
1672 // the same alignment mod 8, otherwise jump to the next | |
1673 // code for aligned copy (and substracting 8 from 'count' before jump). | |
1674 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. | |
1675 // Also jump over aligned copy after the copy with shift completed. | |
1676 | |
1677 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, | |
1678 L_aligned_copy, L_copy_2_bytes); | |
1679 } | |
1680 // copy 4 elements (16 bytes) at a time | |
1365 | 1681 __ align(OptoLoopAlignment); |
0 | 1682 __ BIND(L_aligned_copy); |
1683 __ dec(end_from, 16); | |
1684 __ ldx(end_from, 8, O3); | |
1685 __ ldx(end_from, 0, O4); | |
1686 __ dec(end_to, 16); | |
1687 __ deccc(count, 8); | |
1688 __ stx(O3, end_to, 8); | |
1689 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); | |
1690 __ delayed()->stx(O4, end_to, 0); | |
1691 __ inc(count, 8); | |
1692 | |
1693 // copy 1 element (2 bytes) at a time | |
1694 __ BIND(L_copy_2_bytes); | |
1695 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); | |
1696 __ delayed()->nop(); | |
1697 __ BIND(L_copy_2_bytes_loop); | |
1698 __ dec(end_from, 2); | |
1699 __ dec(end_to, 2); | |
1700 __ lduh(end_from, 0, O4); | |
1701 __ deccc(count); | |
1702 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop); | |
1703 __ delayed()->sth(O4, end_to, 0); | |
1704 | |
1705 __ BIND(L_exit); | |
1706 // O3, O4 are used as temp registers | |
1707 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); | |
1708 __ retl(); | |
1709 __ delayed()->mov(G0, O0); // return 0 | |
1710 return start; | |
1711 } | |
1712 | |
1713 // | |
1714 // Generate core code for disjoint int copy (and oop copy on 32-bit). | |
1715 // If "aligned" is true, the "from" and "to" addresses are assumed | |
1716 // to be heapword aligned. | |
1717 // | |
1718 // Arguments: | |
1719 // from: O0 | |
1720 // to: O1 | |
1721 // count: O2 treated as signed | |
1722 // | |
1723 void generate_disjoint_int_copy_core(bool aligned) { | |
1724 | |
1725 Label L_skip_alignment, L_aligned_copy; | |
1726 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; | |
1727 | |
1728 const Register from = O0; // source array address | |
1729 const Register to = O1; // destination array address | |
1730 const Register count = O2; // elements count | |
1731 const Register offset = O5; // offset from start of arrays | |
1732 // O3, O4, G3, G4 are used as temp registers | |
1733 | |
1734 // 'aligned' == true when it is known statically during compilation | |
1735 // of this arraycopy call site that both 'from' and 'to' addresses | |
1736 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). | |
1737 // | |
1738 // Aligned arrays have 4 bytes alignment in 32-bits VM | |
1739 // and 8 bytes - in 64-bits VM. | |
1740 // | |
1741 #ifdef _LP64 | |
1742 if (!aligned) | |
1743 #endif | |
1744 { | |
1745 // The next check could be put under 'ifndef' since the code in | |
1746 // generate_disjoint_long_copy_core() has own checks and set 'offset'. | |
1747 | |
1748 // for short arrays, just do single element copy | |
1749 __ cmp(count, 5); // 4 + 1 (20 bytes) | |
1750 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); | |
1751 __ delayed()->mov(G0, offset); | |
1752 | |
1753 // copy 1 element to align 'to' on an 8 byte boundary | |
1754 __ andcc(to, 7, G0); | |
1755 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); | |
1756 __ delayed()->ld(from, 0, O3); | |
1757 __ inc(from, 4); | |
1758 __ inc(to, 4); | |
1759 __ dec(count); | |
1760 __ st(O3, to, -4); | |
1761 __ BIND(L_skip_alignment); | |
1762 | |
1763 // if arrays have same alignment mod 8, do 4 elements copy | |
1764 __ andcc(from, 7, G0); | |
1765 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); | |
1766 __ delayed()->ld(from, 0, O3); | |
1767 | |
1768 // | |
1769 // Load 2 aligned 8-bytes chunks and use one from previous iteration | |
1770 // to form 2 aligned 8-bytes chunks to store. | |
1771 // | |
1772 // copy_16_bytes_forward_with_shift() is not used here since this | |
1773 // code is more optimal. | |
1774 | |
1775 // copy with shift 4 elements (16 bytes) at a time | |
1776 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4 | |
1777 | |
1365 | 1778 __ align(OptoLoopAlignment); |
0 | 1779 __ BIND(L_copy_16_bytes); |
1780 __ ldx(from, 4, O4); | |
1781 __ deccc(count, 4); // Can we do next iteration after this one? | |
1782 __ ldx(from, 12, G4); | |
1783 __ inc(to, 16); | |
1784 __ inc(from, 16); | |
1785 __ sllx(O3, 32, O3); | |
1786 __ srlx(O4, 32, G3); | |
1787 __ bset(G3, O3); | |
1788 __ stx(O3, to, -16); | |
1789 __ sllx(O4, 32, O4); | |
1790 __ srlx(G4, 32, G3); | |
1791 __ bset(G3, O4); | |
1792 __ stx(O4, to, -8); | |
1793 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); | |
1794 __ delayed()->mov(G4, O3); | |
1795 | |
1796 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); | |
1797 __ delayed()->inc(count, 4); // restore 'count' | |
1798 | |
1799 __ BIND(L_aligned_copy); | |
1800 } | |
1801 // copy 4 elements (16 bytes) at a time | |
1802 __ and3(count, 1, G4); // Save | |
1803 __ srl(count, 1, count); | |
1804 generate_disjoint_long_copy_core(aligned); | |
1805 __ mov(G4, count); // Restore | |
1806 | |
1807 // copy 1 element at a time | |
1808 __ BIND(L_copy_4_bytes); | |
1809 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); | |
1810 __ delayed()->nop(); | |
1811 __ BIND(L_copy_4_bytes_loop); | |
1812 __ ld(from, offset, O3); | |
1813 __ deccc(count); | |
1814 __ st(O3, to, offset); | |
1815 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop); | |
1816 __ delayed()->inc(offset, 4); | |
1817 __ BIND(L_exit); | |
1818 } | |
1819 | |
1820 // | |
1821 // Generate stub for disjoint int copy. If "aligned" is true, the | |
1822 // "from" and "to" addresses are assumed to be heapword aligned. | |
1823 // | |
1824 // Arguments for generated stub: | |
1825 // from: O0 | |
1826 // to: O1 | |
1827 // count: O2 treated as signed | |
1828 // | |
1829 address generate_disjoint_int_copy(bool aligned, const char * name) { | |
1830 __ align(CodeEntryAlignment); | |
1831 StubCodeMark mark(this, "StubRoutines", name); | |
1832 address start = __ pc(); | |
1833 | |
1834 const Register count = O2; | |
1835 assert_clean_int(count, O3); // Make sure 'count' is clean int. | |
1836 | |
1837 if (!aligned) disjoint_int_copy_entry = __ pc(); | |
1838 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |
1839 if (!aligned) BLOCK_COMMENT("Entry:"); | |
1840 | |
1841 generate_disjoint_int_copy_core(aligned); | |
1842 | |
1843 // O3, O4 are used as temp registers | |
1844 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); | |
1845 __ retl(); | |
1846 __ delayed()->mov(G0, O0); // return 0 | |
1847 return start; | |
1848 } | |
1849 | |
1850 // | |
1851 // Generate core code for conjoint int copy (and oop copy on 32-bit). | |
1852 // If "aligned" is true, the "from" and "to" addresses are assumed | |
1853 // to be heapword aligned. | |
1854 // | |
1855 // Arguments: | |
1856 // from: O0 | |
1857 // to: O1 | |
1858 // count: O2 treated as signed | |
1859 // | |
1860 void generate_conjoint_int_copy_core(bool aligned) { | |
1861 // Do reverse copy. | |
1862 | |
1863 Label L_skip_alignment, L_aligned_copy; | |
1864 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; | |
1865 | |
1866 const Register from = O0; // source array address | |
1867 const Register to = O1; // destination array address | |
1868 const Register count = O2; // elements count | |
1869 const Register end_from = from; // source array end address | |
1870 const Register end_to = to; // destination array end address | |
1871 // O3, O4, O5, G3 are used as temp registers | |
1872 | |
1873 const Register byte_count = O3; // bytes count to copy | |
1874 | |
1875 __ sllx(count, LogBytesPerInt, byte_count); | |
1876 __ add(to, byte_count, end_to); // offset after last copied element | |
1877 | |
1878 __ cmp(count, 5); // for short arrays, just do single element copy | |
1879 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); | |
1880 __ delayed()->add(from, byte_count, end_from); | |
1881 | |
1882 // copy 1 element to align 'to' on an 8 byte boundary | |
1883 __ andcc(end_to, 7, G0); | |
1884 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); | |
1885 __ delayed()->nop(); | |
1886 __ dec(count); | |
1887 __ dec(end_from, 4); | |
1888 __ dec(end_to, 4); | |
1889 __ ld(end_from, 0, O4); | |
1890 __ st(O4, end_to, 0); | |
1891 __ BIND(L_skip_alignment); | |
1892 | |
1893 // Check if 'end_from' and 'end_to' has the same alignment. | |
1894 __ andcc(end_from, 7, G0); | |
1895 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); | |
1896 __ delayed()->dec(count, 4); // The cmp at the start guaranty cnt >= 4 | |
1897 | |
1898 // copy with shift 4 elements (16 bytes) at a time | |
1899 // | |
1900 // Load 2 aligned 8-bytes chunks and use one from previous iteration | |
1901 // to form 2 aligned 8-bytes chunks to store. | |
1902 // | |
1903 __ ldx(end_from, -4, O3); | |
1365 | 1904 __ align(OptoLoopAlignment); |
0 | 1905 __ BIND(L_copy_16_bytes); |
1906 __ ldx(end_from, -12, O4); | |
1907 __ deccc(count, 4); | |
1908 __ ldx(end_from, -20, O5); | |
1909 __ dec(end_to, 16); | |
1910 __ dec(end_from, 16); | |
1911 __ srlx(O3, 32, O3); | |
1912 __ sllx(O4, 32, G3); | |
1913 __ bset(G3, O3); | |
1914 __ stx(O3, end_to, 8); | |
1915 __ srlx(O4, 32, O4); | |
1916 __ sllx(O5, 32, G3); | |
1917 __ bset(O4, G3); | |
1918 __ stx(G3, end_to, 0); | |
1919 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); | |
1920 __ delayed()->mov(O5, O3); | |
1921 | |
1922 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); | |
1923 __ delayed()->inc(count, 4); | |
1924 | |
1925 // copy 4 elements (16 bytes) at a time | |
1365 | 1926 __ align(OptoLoopAlignment); |
0 | 1927 __ BIND(L_aligned_copy); |
1928 __ dec(end_from, 16); | |
1929 __ ldx(end_from, 8, O3); | |
1930 __ ldx(end_from, 0, O4); | |
1931 __ dec(end_to, 16); | |
1932 __ deccc(count, 4); | |
1933 __ stx(O3, end_to, 8); | |
1934 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); | |
1935 __ delayed()->stx(O4, end_to, 0); | |
1936 __ inc(count, 4); | |
1937 | |
1938 // copy 1 element (4 bytes) at a time | |
1939 __ BIND(L_copy_4_bytes); | |
1940 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); | |
1941 __ delayed()->nop(); | |
1942 __ BIND(L_copy_4_bytes_loop); | |
1943 __ dec(end_from, 4); | |
1944 __ dec(end_to, 4); | |
1945 __ ld(end_from, 0, O4); | |
1946 __ deccc(count); | |
1947 __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop); | |
1948 __ delayed()->st(O4, end_to, 0); | |
1949 __ BIND(L_exit); | |
1950 } | |
1951 | |
1952 // | |
1953 // Generate stub for conjoint int copy. If "aligned" is true, the | |
1954 // "from" and "to" addresses are assumed to be heapword aligned. | |
1955 // | |
1956 // Arguments for generated stub: | |
1957 // from: O0 | |
1958 // to: O1 | |
1959 // count: O2 treated as signed | |
1960 // | |
1961 address generate_conjoint_int_copy(bool aligned, const char * name) { | |
1962 __ align(CodeEntryAlignment); | |
1963 StubCodeMark mark(this, "StubRoutines", name); | |
1964 address start = __ pc(); | |
1965 | |
1966 address nooverlap_target = aligned ? | |
1967 StubRoutines::arrayof_jint_disjoint_arraycopy() : | |
1968 disjoint_int_copy_entry; | |
1969 | |
1970 assert_clean_int(O2, O3); // Make sure 'count' is clean int. | |
1971 | |
1972 if (!aligned) int_copy_entry = __ pc(); | |
1973 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |
1974 if (!aligned) BLOCK_COMMENT("Entry:"); | |
1975 | |
1976 array_overlap_test(nooverlap_target, 2); | |
1977 | |
1978 generate_conjoint_int_copy_core(aligned); | |
1979 | |
1980 // O3, O4 are used as temp registers | |
1981 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); | |
1982 __ retl(); | |
1983 __ delayed()->mov(G0, O0); // return 0 | |
1984 return start; | |
1985 } | |
1986 | |
1987 // | |
1988 // Generate core code for disjoint long copy (and oop copy on 64-bit). | |
1989 // "aligned" is ignored, because we must make the stronger | |
1990 // assumption that both addresses are always 64-bit aligned. | |
1991 // | |
1992 // Arguments: | |
1993 // from: O0 | |
1994 // to: O1 | |
1995 // count: O2 treated as signed | |
1996 // | |
1364
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
1997 // count -= 2; |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
1998 // if ( count >= 0 ) { // >= 2 elements |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
1999 // if ( count > 6) { // >= 8 elements |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2000 // count -= 6; // original count - 8 |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2001 // do { |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2002 // copy_8_elements; |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2003 // count -= 8; |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2004 // } while ( count >= 0 ); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2005 // count += 6; |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2006 // } |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2007 // if ( count >= 0 ) { // >= 2 elements |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2008 // do { |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2009 // copy_2_elements; |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2010 // } while ( (count=count-2) >= 0 ); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2011 // } |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2012 // } |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2013 // count += 2; |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2014 // if ( count != 0 ) { // 1 element left |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2015 // copy_1_element; |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2016 // } |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2017 // |
0 | 2018 void generate_disjoint_long_copy_core(bool aligned) { |
2019 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; | |
2020 const Register from = O0; // source array address | |
2021 const Register to = O1; // destination array address | |
2022 const Register count = O2; // elements count | |
2023 const Register offset0 = O4; // element offset | |
2024 const Register offset8 = O5; // next element offset | |
2025 | |
2026 __ deccc(count, 2); | |
2027 __ mov(G0, offset0); // offset from start of arrays (0) | |
2028 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); | |
2029 __ delayed()->add(offset0, 8, offset8); | |
1364
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2030 |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2031 // Copy by 64 bytes chunks |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2032 Label L_copy_64_bytes; |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2033 const Register from64 = O3; // source address |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2034 const Register to64 = G3; // destination address |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2035 __ subcc(count, 6, O3); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2036 __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes ); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2037 __ delayed()->mov(to, to64); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2038 // Now we can use O4(offset0), O5(offset8) as temps |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2039 __ mov(O3, count); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2040 __ mov(from, from64); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2041 |
1365 | 2042 __ align(OptoLoopAlignment); |
1364
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2043 __ BIND(L_copy_64_bytes); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2044 for( int off = 0; off < 64; off += 16 ) { |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2045 __ ldx(from64, off+0, O4); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2046 __ ldx(from64, off+8, O5); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2047 __ stx(O4, to64, off+0); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2048 __ stx(O5, to64, off+8); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2049 } |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2050 __ deccc(count, 8); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2051 __ inc(from64, 64); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2052 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_64_bytes); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2053 __ delayed()->inc(to64, 64); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2054 |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2055 // Restore O4(offset0), O5(offset8) |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2056 __ sub(from64, from, offset0); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2057 __ inccc(count, 6); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2058 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2059 __ delayed()->add(offset0, 8, offset8); |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2060 |
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2061 // Copy by 16 bytes chunks |
1365 | 2062 __ align(OptoLoopAlignment); |
0 | 2063 __ BIND(L_copy_16_bytes); |
2064 __ ldx(from, offset0, O3); | |
2065 __ ldx(from, offset8, G3); | |
2066 __ deccc(count, 2); | |
2067 __ stx(O3, to, offset0); | |
2068 __ inc(offset0, 16); | |
2069 __ stx(G3, to, offset8); | |
2070 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); | |
2071 __ delayed()->inc(offset8, 16); | |
2072 | |
1364
0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
kvn
parents:
1295
diff
changeset
|
2073 // Copy last 8 bytes |
0 | 2074 __ BIND(L_copy_8_bytes); |
2075 __ inccc(count, 2); | |
2076 __ brx(Assembler::zero, true, Assembler::pn, L_exit ); | |
2077 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs | |
2078 __ ldx(from, offset0, O3); | |
2079 __ stx(O3, to, offset0); | |
2080 __ BIND(L_exit); | |
2081 } | |
2082 | |
2083 // | |
2084 // Generate stub for disjoint long copy. | |
2085 // "aligned" is ignored, because we must make the stronger | |
2086 // assumption that both addresses are always 64-bit aligned. | |
2087 // | |
2088 // Arguments for generated stub: | |
2089 // from: O0 | |
2090 // to: O1 | |
2091 // count: O2 treated as signed | |
2092 // | |
2093 address generate_disjoint_long_copy(bool aligned, const char * name) { | |
2094 __ align(CodeEntryAlignment); | |
2095 StubCodeMark mark(this, "StubRoutines", name); | |
2096 address start = __ pc(); | |
2097 | |
2098 assert_clean_int(O2, O3); // Make sure 'count' is clean int. | |
2099 | |
2100 if (!aligned) disjoint_long_copy_entry = __ pc(); | |
2101 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |
2102 if (!aligned) BLOCK_COMMENT("Entry:"); | |
2103 | |
2104 generate_disjoint_long_copy_core(aligned); | |
2105 | |
2106 // O3, O4 are used as temp registers | |
2107 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); | |
2108 __ retl(); | |
2109 __ delayed()->mov(G0, O0); // return 0 | |
2110 return start; | |
2111 } | |
2112 | |
2113 // | |
2114 // Generate core code for conjoint long copy (and oop copy on 64-bit). | |
2115 // "aligned" is ignored, because we must make the stronger | |
2116 // assumption that both addresses are always 64-bit aligned. | |
2117 // | |
2118 // Arguments: | |
2119 // from: O0 | |
2120 // to: O1 | |
2121 // count: O2 treated as signed | |
2122 // | |
2123 void generate_conjoint_long_copy_core(bool aligned) { | |
2124 // Do reverse copy. | |
2125 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; | |
2126 const Register from = O0; // source array address | |
2127 const Register to = O1; // destination array address | |
2128 const Register count = O2; // elements count | |
2129 const Register offset8 = O4; // element offset | |
2130 const Register offset0 = O5; // previous element offset | |
2131 | |
2132 __ subcc(count, 1, count); | |
2133 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes ); | |
2134 __ delayed()->sllx(count, LogBytesPerLong, offset8); | |
2135 __ sub(offset8, 8, offset0); | |
1365 | 2136 __ align(OptoLoopAlignment); |
0 | 2137 __ BIND(L_copy_16_bytes); |
2138 __ ldx(from, offset8, O2); | |
2139 __ ldx(from, offset0, O3); | |
2140 __ stx(O2, to, offset8); | |
2141 __ deccc(offset8, 16); // use offset8 as counter | |
2142 __ stx(O3, to, offset0); | |
2143 __ brx(Assembler::greater, false, Assembler::pt, L_copy_16_bytes); | |
2144 __ delayed()->dec(offset0, 16); | |
2145 | |
2146 __ BIND(L_copy_8_bytes); | |
2147 __ brx(Assembler::negative, false, Assembler::pn, L_exit ); | |
2148 __ delayed()->nop(); | |
2149 __ ldx(from, 0, O3); | |
2150 __ stx(O3, to, 0); | |
2151 __ BIND(L_exit); | |
2152 } | |
2153 | |
2154 // Generate stub for conjoint long copy. | |
2155 // "aligned" is ignored, because we must make the stronger | |
2156 // assumption that both addresses are always 64-bit aligned. | |
2157 // | |
2158 // Arguments for generated stub: | |
2159 // from: O0 | |
2160 // to: O1 | |
2161 // count: O2 treated as signed | |
2162 // | |
2163 address generate_conjoint_long_copy(bool aligned, const char * name) { | |
2164 __ align(CodeEntryAlignment); | |
2165 StubCodeMark mark(this, "StubRoutines", name); | |
2166 address start = __ pc(); | |
2167 | |
2168 assert(!aligned, "usage"); | |
2169 address nooverlap_target = disjoint_long_copy_entry; | |
2170 | |
2171 assert_clean_int(O2, O3); // Make sure 'count' is clean int. | |
2172 | |
2173 if (!aligned) long_copy_entry = __ pc(); | |
2174 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) | |
2175 if (!aligned) BLOCK_COMMENT("Entry:"); | |
2176 | |
2177 array_overlap_test(nooverlap_target, 3); | |
2178 | |
2179 generate_conjoint_long_copy_core(aligned); | |
2180 | |
2181 // O3, O4 are used as temp registers | |
2182 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); | |
2183 __ retl(); | |
2184 __ delayed()->mov(G0, O0); // return 0 | |
2185 return start; | |
2186 } | |
2187 | |
2188 // Generate stub for disjoint oop copy. If "aligned" is true, the | |
2189 // "from" and "to" addresses are assumed to be heapword aligned. | |
2190 // | |
2191 // Arguments for generated stub: | |
2192 // from: O0 | |
2193 // to: O1 | |
2194 // count: O2 treated as signed | |
2195 // | |
2196 address generate_disjoint_oop_copy(bool aligned, const char * name) { | |
2197 | |
2198 const Register from = O0; // source array address | |
2199 const Register to = O1; // destination array address | |
2200 const Register count = O2; // elements count | |
2201 | |
2202 __ align(CodeEntryAlignment); | |
2203 StubCodeMark mark(this, "StubRoutines", name); | |
2204 address start = __ pc(); | |
2205 | |
2206 assert_clean_int(count, O3); // Make sure 'count' is clean int. | |
2207 | |
2208 if (!aligned) disjoint_oop_copy_entry = __ pc(); | |
2209 // caller can pass a 64-bit byte count here | |
2210 if (!aligned) BLOCK_COMMENT("Entry:"); | |
2211 | |
2212 // save arguments for barrier generation | |
2213 __ mov(to, G1); | |
2214 __ mov(count, G5); | |
2215 gen_write_ref_array_pre_barrier(G1, G5); | |
2216 #ifdef _LP64 | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2217 assert_clean_int(count, O3); // Make sure 'count' is clean int. |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2218 if (UseCompressedOops) { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2219 generate_disjoint_int_copy_core(aligned); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2220 } else { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2221 generate_disjoint_long_copy_core(aligned); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2222 } |
0 | 2223 #else |
2224 generate_disjoint_int_copy_core(aligned); | |
2225 #endif | |
2226 // O0 is used as temp register | |
2227 gen_write_ref_array_post_barrier(G1, G5, O0); | |
2228 | |
2229 // O3, O4 are used as temp registers | |
2230 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); | |
2231 __ retl(); | |
2232 __ delayed()->mov(G0, O0); // return 0 | |
2233 return start; | |
2234 } | |
2235 | |
2236 // Generate stub for conjoint oop copy. If "aligned" is true, the | |
2237 // "from" and "to" addresses are assumed to be heapword aligned. | |
2238 // | |
2239 // Arguments for generated stub: | |
2240 // from: O0 | |
2241 // to: O1 | |
2242 // count: O2 treated as signed | |
2243 // | |
2244 address generate_conjoint_oop_copy(bool aligned, const char * name) { | |
2245 | |
2246 const Register from = O0; // source array address | |
2247 const Register to = O1; // destination array address | |
2248 const Register count = O2; // elements count | |
2249 | |
2250 __ align(CodeEntryAlignment); | |
2251 StubCodeMark mark(this, "StubRoutines", name); | |
2252 address start = __ pc(); | |
2253 | |
2254 assert_clean_int(count, O3); // Make sure 'count' is clean int. | |
2255 | |
2256 if (!aligned) oop_copy_entry = __ pc(); | |
2257 // caller can pass a 64-bit byte count here | |
2258 if (!aligned) BLOCK_COMMENT("Entry:"); | |
2259 | |
2260 // save arguments for barrier generation | |
2261 __ mov(to, G1); | |
2262 __ mov(count, G5); | |
2263 | |
2264 gen_write_ref_array_pre_barrier(G1, G5); | |
2265 | |
2266 address nooverlap_target = aligned ? | |
2267 StubRoutines::arrayof_oop_disjoint_arraycopy() : | |
2268 disjoint_oop_copy_entry; | |
2269 | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2270 array_overlap_test(nooverlap_target, LogBytesPerHeapOop); |
0 | 2271 |
2272 #ifdef _LP64 | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2273 if (UseCompressedOops) { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2274 generate_conjoint_int_copy_core(aligned); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2275 } else { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2276 generate_conjoint_long_copy_core(aligned); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2277 } |
0 | 2278 #else |
2279 generate_conjoint_int_copy_core(aligned); | |
2280 #endif | |
2281 | |
2282 // O0 is used as temp register | |
2283 gen_write_ref_array_post_barrier(G1, G5, O0); | |
2284 | |
2285 // O3, O4 are used as temp registers | |
2286 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); | |
2287 __ retl(); | |
2288 __ delayed()->mov(G0, O0); // return 0 | |
2289 return start; | |
2290 } | |
2291 | |
2292 | |
2293 // Helper for generating a dynamic type check. | |
2294 // Smashes only the given temp registers. | |
2295 void generate_type_check(Register sub_klass, | |
2296 Register super_check_offset, | |
2297 Register super_klass, | |
2298 Register temp, | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2299 Label& L_success) { |
0 | 2300 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); |
2301 | |
2302 BLOCK_COMMENT("type_check:"); | |
2303 | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2304 Label L_miss, L_pop_to_miss; |
0 | 2305 |
2306 assert_clean_int(super_check_offset, temp); | |
2307 | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2308 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg, |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2309 &L_success, &L_miss, NULL, |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2310 super_check_offset); |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2311 |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2312 BLOCK_COMMENT("type_check_slow_path:"); |
0 | 2313 __ save_frame(0); |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2314 __ check_klass_subtype_slow_path(sub_klass->after_save(), |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2315 super_klass->after_save(), |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2316 L0, L1, L2, L4, |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2317 NULL, &L_pop_to_miss); |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2318 __ ba(false, L_success); |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2319 __ delayed()->restore(); |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2320 |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2321 __ bind(L_pop_to_miss); |
0 | 2322 __ restore(); |
2323 | |
2324 // Fall through on failure! | |
2325 __ BIND(L_miss); | |
2326 } | |
2327 | |
2328 | |
2329 // Generate stub for checked oop copy. | |
2330 // | |
2331 // Arguments for generated stub: | |
2332 // from: O0 | |
2333 // to: O1 | |
2334 // count: O2 treated as signed | |
2335 // ckoff: O3 (super_check_offset) | |
2336 // ckval: O4 (super_klass) | |
2337 // ret: O0 zero for success; (-1^K) where K is partial transfer count | |
2338 // | |
2339 address generate_checkcast_copy(const char* name) { | |
2340 | |
2341 const Register O0_from = O0; // source array address | |
2342 const Register O1_to = O1; // destination array address | |
2343 const Register O2_count = O2; // elements count | |
2344 const Register O3_ckoff = O3; // super_check_offset | |
2345 const Register O4_ckval = O4; // super_klass | |
2346 | |
2347 const Register O5_offset = O5; // loop var, with stride wordSize | |
2348 const Register G1_remain = G1; // loop var, with stride -1 | |
2349 const Register G3_oop = G3; // actual oop copied | |
2350 const Register G4_klass = G4; // oop._klass | |
2351 const Register G5_super = G5; // oop._klass._primary_supers[ckval] | |
2352 | |
2353 __ align(CodeEntryAlignment); | |
2354 StubCodeMark mark(this, "StubRoutines", name); | |
2355 address start = __ pc(); | |
2356 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
2357 gen_write_ref_array_pre_barrier(O1, O2); |
0 | 2358 |
2359 #ifdef ASSERT | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2360 // We sometimes save a frame (see generate_type_check below). |
0 | 2361 // If this will cause trouble, let's fail now instead of later. |
2362 __ save_frame(0); | |
2363 __ restore(); | |
2364 #endif | |
2365 | |
2366 #ifdef ASSERT | |
2367 // caller guarantees that the arrays really are different | |
2368 // otherwise, we would have to make conjoint checks | |
2369 { Label L; | |
2370 __ mov(O3, G1); // spill: overlap test smashes O3 | |
2371 __ mov(O4, G4); // spill: overlap test smashes O4 | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2372 array_overlap_test(L, LogBytesPerHeapOop); |
0 | 2373 __ stop("checkcast_copy within a single array"); |
2374 __ bind(L); | |
2375 __ mov(G1, O3); | |
2376 __ mov(G4, O4); | |
2377 } | |
2378 #endif //ASSERT | |
2379 | |
2380 assert_clean_int(O2_count, G1); // Make sure 'count' is clean int. | |
2381 | |
2382 checkcast_copy_entry = __ pc(); | |
2383 // caller can pass a 64-bit byte count here (from generic stub) | |
2384 BLOCK_COMMENT("Entry:"); | |
2385 | |
2386 Label load_element, store_element, do_card_marks, fail, done; | |
2387 __ addcc(O2_count, 0, G1_remain); // initialize loop index, and test it | |
2388 __ brx(Assembler::notZero, false, Assembler::pt, load_element); | |
2389 __ delayed()->mov(G0, O5_offset); // offset from start of arrays | |
2390 | |
2391 // Empty array: Nothing to do. | |
2392 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); | |
2393 __ retl(); | |
2394 __ delayed()->set(0, O0); // return 0 on (trivial) success | |
2395 | |
2396 // ======== begin loop ======== | |
2397 // (Loop is rotated; its entry is load_element.) | |
2398 // Loop variables: | |
2399 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays | |
2400 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining* | |
2401 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super | |
1365 | 2402 __ align(OptoLoopAlignment); |
0 | 2403 |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2404 __ BIND(store_element); |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2405 __ deccc(G1_remain); // decrement the count |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2406 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2407 __ inc(O5_offset, heapOopSize); // step to next offset |
0 | 2408 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); |
2409 __ delayed()->set(0, O0); // return -1 on success | |
2410 | |
2411 // ======== loop entry is here ======== | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2412 __ BIND(load_element); |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2413 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop |
0 | 2414 __ br_null(G3_oop, true, Assembler::pt, store_element); |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2415 __ delayed()->nop(); |
0 | 2416 |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2417 __ load_klass(G3_oop, G4_klass); // query the object klass |
0 | 2418 |
2419 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, | |
2420 // branch to this on success: | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2421 store_element); |
0 | 2422 // ======== end loop ======== |
2423 | |
2424 // It was a real error; we must depend on the caller to finish the job. | |
2425 // Register G1 has number of *remaining* oops, O2 number of *total* oops. | |
2426 // Emit GC store barriers for the oops we have copied (O2 minus G1), | |
2427 // and report their number to the caller. | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2428 __ BIND(fail); |
0 | 2429 __ subcc(O2_count, G1_remain, O2_count); |
2430 __ brx(Assembler::zero, false, Assembler::pt, done); | |
2431 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller | |
2432 | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2433 __ BIND(do_card_marks); |
0 | 2434 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] |
2435 | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
642
diff
changeset
|
2436 __ BIND(done); |
0 | 2437 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); |
2438 __ retl(); | |
2439 __ delayed()->nop(); // return value in 00 | |
2440 | |
2441 return start; | |
2442 } | |
2443 | |
2444 | |
2445 // Generate 'unsafe' array copy stub | |
2446 // Though just as safe as the other stubs, it takes an unscaled | |
2447 // size_t argument instead of an element count. | |
2448 // | |
2449 // Arguments for generated stub: | |
2450 // from: O0 | |
2451 // to: O1 | |
2452 // count: O2 byte count, treated as ssize_t, can be zero | |
2453 // | |
2454 // Examines the alignment of the operands and dispatches | |
2455 // to a long, int, short, or byte copy loop. | |
2456 // | |
2457 address generate_unsafe_copy(const char* name) { | |
2458 | |
2459 const Register O0_from = O0; // source array address | |
2460 const Register O1_to = O1; // destination array address | |
2461 const Register O2_count = O2; // elements count | |
2462 | |
2463 const Register G1_bits = G1; // test copy of low bits | |
2464 | |
2465 __ align(CodeEntryAlignment); | |
2466 StubCodeMark mark(this, "StubRoutines", name); | |
2467 address start = __ pc(); | |
2468 | |
2469 // bump this on entry, not on exit: | |
2470 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, G1, G3); | |
2471 | |
2472 __ or3(O0_from, O1_to, G1_bits); | |
2473 __ or3(O2_count, G1_bits, G1_bits); | |
2474 | |
2475 __ btst(BytesPerLong-1, G1_bits); | |
2476 __ br(Assembler::zero, true, Assembler::pt, | |
2477 long_copy_entry, relocInfo::runtime_call_type); | |
2478 // scale the count on the way out: | |
2479 __ delayed()->srax(O2_count, LogBytesPerLong, O2_count); | |
2480 | |
2481 __ btst(BytesPerInt-1, G1_bits); | |
2482 __ br(Assembler::zero, true, Assembler::pt, | |
2483 int_copy_entry, relocInfo::runtime_call_type); | |
2484 // scale the count on the way out: | |
2485 __ delayed()->srax(O2_count, LogBytesPerInt, O2_count); | |
2486 | |
2487 __ btst(BytesPerShort-1, G1_bits); | |
2488 __ br(Assembler::zero, true, Assembler::pt, | |
2489 short_copy_entry, relocInfo::runtime_call_type); | |
2490 // scale the count on the way out: | |
2491 __ delayed()->srax(O2_count, LogBytesPerShort, O2_count); | |
2492 | |
2493 __ br(Assembler::always, false, Assembler::pt, | |
2494 byte_copy_entry, relocInfo::runtime_call_type); | |
2495 __ delayed()->nop(); | |
2496 | |
2497 return start; | |
2498 } | |
2499 | |
2500 | |
2501 // Perform range checks on the proposed arraycopy. | |
2502 // Kills the two temps, but nothing else. | |
2503 // Also, clean the sign bits of src_pos and dst_pos. | |
2504 void arraycopy_range_checks(Register src, // source array oop (O0) | |
2505 Register src_pos, // source position (O1) | |
2506 Register dst, // destination array oo (O2) | |
2507 Register dst_pos, // destination position (O3) | |
2508 Register length, // length of copy (O4) | |
2509 Register temp1, Register temp2, | |
2510 Label& L_failed) { | |
2511 BLOCK_COMMENT("arraycopy_range_checks:"); | |
2512 | |
2513 // if (src_pos + length > arrayOop(src)->length() ) FAIL; | |
2514 | |
2515 const Register array_length = temp1; // scratch | |
2516 const Register end_pos = temp2; // scratch | |
2517 | |
2518 // Note: This next instruction may be in the delay slot of a branch: | |
2519 __ add(length, src_pos, end_pos); // src_pos + length | |
2520 __ lduw(src, arrayOopDesc::length_offset_in_bytes(), array_length); | |
2521 __ cmp(end_pos, array_length); | |
2522 __ br(Assembler::greater, false, Assembler::pn, L_failed); | |
2523 | |
2524 // if (dst_pos + length > arrayOop(dst)->length() ) FAIL; | |
2525 __ delayed()->add(length, dst_pos, end_pos); // dst_pos + length | |
2526 __ lduw(dst, arrayOopDesc::length_offset_in_bytes(), array_length); | |
2527 __ cmp(end_pos, array_length); | |
2528 __ br(Assembler::greater, false, Assembler::pn, L_failed); | |
2529 | |
2530 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. | |
2531 // Move with sign extension can be used since they are positive. | |
2532 __ delayed()->signx(src_pos, src_pos); | |
2533 __ signx(dst_pos, dst_pos); | |
2534 | |
2535 BLOCK_COMMENT("arraycopy_range_checks done"); | |
2536 } | |
2537 | |
2538 | |
2539 // | |
2540 // Generate generic array copy stubs | |
2541 // | |
2542 // Input: | |
2543 // O0 - src oop | |
2544 // O1 - src_pos | |
2545 // O2 - dst oop | |
2546 // O3 - dst_pos | |
2547 // O4 - element count | |
2548 // | |
2549 // Output: | |
2550 // O0 == 0 - success | |
2551 // O0 == -1 - need to call System.arraycopy | |
2552 // | |
2553 address generate_generic_copy(const char *name) { | |
2554 | |
2555 Label L_failed, L_objArray; | |
2556 | |
2557 // Input registers | |
2558 const Register src = O0; // source array oop | |
2559 const Register src_pos = O1; // source position | |
2560 const Register dst = O2; // destination array oop | |
2561 const Register dst_pos = O3; // destination position | |
2562 const Register length = O4; // elements count | |
2563 | |
2564 // registers used as temp | |
2565 const Register G3_src_klass = G3; // source array klass | |
2566 const Register G4_dst_klass = G4; // destination array klass | |
2567 const Register G5_lh = G5; // layout handler | |
2568 const Register O5_temp = O5; | |
2569 | |
2570 __ align(CodeEntryAlignment); | |
2571 StubCodeMark mark(this, "StubRoutines", name); | |
2572 address start = __ pc(); | |
2573 | |
2574 // bump this on entry, not on exit: | |
2575 inc_counter_np(SharedRuntime::_generic_array_copy_ctr, G1, G3); | |
2576 | |
2577 // In principle, the int arguments could be dirty. | |
2578 //assert_clean_int(src_pos, G1); | |
2579 //assert_clean_int(dst_pos, G1); | |
2580 //assert_clean_int(length, G1); | |
2581 | |
2582 //----------------------------------------------------------------------- | |
2583 // Assembler stubs will be used for this call to arraycopy | |
2584 // if the following conditions are met: | |
2585 // | |
2586 // (1) src and dst must not be null. | |
2587 // (2) src_pos must not be negative. | |
2588 // (3) dst_pos must not be negative. | |
2589 // (4) length must not be negative. | |
2590 // (5) src klass and dst klass should be the same and not NULL. | |
2591 // (6) src and dst should be arrays. | |
2592 // (7) src_pos + length must not exceed length of src. | |
2593 // (8) dst_pos + length must not exceed length of dst. | |
2594 BLOCK_COMMENT("arraycopy initial argument checks"); | |
2595 | |
2596 // if (src == NULL) return -1; | |
2597 __ br_null(src, false, Assembler::pn, L_failed); | |
2598 | |
2599 // if (src_pos < 0) return -1; | |
2600 __ delayed()->tst(src_pos); | |
2601 __ br(Assembler::negative, false, Assembler::pn, L_failed); | |
2602 __ delayed()->nop(); | |
2603 | |
2604 // if (dst == NULL) return -1; | |
2605 __ br_null(dst, false, Assembler::pn, L_failed); | |
2606 | |
2607 // if (dst_pos < 0) return -1; | |
2608 __ delayed()->tst(dst_pos); | |
2609 __ br(Assembler::negative, false, Assembler::pn, L_failed); | |
2610 | |
2611 // if (length < 0) return -1; | |
2612 __ delayed()->tst(length); | |
2613 __ br(Assembler::negative, false, Assembler::pn, L_failed); | |
2614 | |
2615 BLOCK_COMMENT("arraycopy argument klass checks"); | |
2616 // get src->klass() | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2617 if (UseCompressedOops) { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2618 __ delayed()->nop(); // ??? not good |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2619 __ load_klass(src, G3_src_klass); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2620 } else { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2621 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2622 } |
0 | 2623 |
2624 #ifdef ASSERT | |
2625 // assert(src->klass() != NULL); | |
2626 BLOCK_COMMENT("assert klasses not null"); | |
2627 { Label L_a, L_b; | |
2628 __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2629 __ delayed()->nop(); |
0 | 2630 __ bind(L_a); |
2631 __ stop("broken null klass"); | |
2632 __ bind(L_b); | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2633 __ load_klass(dst, G4_dst_klass); |
0 | 2634 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also |
2635 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp | |
2636 BLOCK_COMMENT("assert done"); | |
2637 } | |
2638 #endif | |
2639 | |
2640 // Load layout helper | |
2641 // | |
2642 // |array_tag| | header_size | element_type | |log2_element_size| | |
2643 // 32 30 24 16 8 2 0 | |
2644 // | |
2645 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 | |
2646 // | |
2647 | |
2648 int lh_offset = klassOopDesc::header_size() * HeapWordSize + | |
2649 Klass::layout_helper_offset_in_bytes(); | |
2650 | |
2651 // Load 32-bits signed value. Use br() instruction with it to check icc. | |
2652 __ lduw(G3_src_klass, lh_offset, G5_lh); | |
2653 | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2654 if (UseCompressedOops) { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2655 __ load_klass(dst, G4_dst_klass); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2656 } |
0 | 2657 // Handle objArrays completely differently... |
2658 juint objArray_lh = Klass::array_layout_helper(T_OBJECT); | |
2659 __ set(objArray_lh, O5_temp); | |
2660 __ cmp(G5_lh, O5_temp); | |
2661 __ br(Assembler::equal, false, Assembler::pt, L_objArray); | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2662 if (UseCompressedOops) { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2663 __ delayed()->nop(); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2664 } else { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2665 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2666 } |
0 | 2667 |
2668 // if (src->klass() != dst->klass()) return -1; | |
2669 __ cmp(G3_src_klass, G4_dst_klass); | |
2670 __ brx(Assembler::notEqual, false, Assembler::pn, L_failed); | |
2671 __ delayed()->nop(); | |
2672 | |
2673 // if (!src->is_Array()) return -1; | |
2674 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0 | |
2675 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed); | |
2676 | |
2677 // At this point, it is known to be a typeArray (array_tag 0x3). | |
2678 #ifdef ASSERT | |
2679 __ delayed()->nop(); | |
2680 { Label L; | |
2681 jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift); | |
2682 __ set(lh_prim_tag_in_place, O5_temp); | |
2683 __ cmp(G5_lh, O5_temp); | |
2684 __ br(Assembler::greaterEqual, false, Assembler::pt, L); | |
2685 __ delayed()->nop(); | |
2686 __ stop("must be a primitive array"); | |
2687 __ bind(L); | |
2688 } | |
2689 #else | |
2690 __ delayed(); // match next insn to prev branch | |
2691 #endif | |
2692 | |
2693 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, | |
2694 O5_temp, G4_dst_klass, L_failed); | |
2695 | |
2696 // typeArrayKlass | |
2697 // | |
2698 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); | |
2699 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); | |
2700 // | |
2701 | |
2702 const Register G4_offset = G4_dst_klass; // array offset | |
2703 const Register G3_elsize = G3_src_klass; // log2 element size | |
2704 | |
2705 __ srl(G5_lh, Klass::_lh_header_size_shift, G4_offset); | |
2706 __ and3(G4_offset, Klass::_lh_header_size_mask, G4_offset); // array_offset | |
2707 __ add(src, G4_offset, src); // src array offset | |
2708 __ add(dst, G4_offset, dst); // dst array offset | |
2709 __ and3(G5_lh, Klass::_lh_log2_element_size_mask, G3_elsize); // log2 element size | |
2710 | |
2711 // next registers should be set before the jump to corresponding stub | |
2712 const Register from = O0; // source array address | |
2713 const Register to = O1; // destination array address | |
2714 const Register count = O2; // elements count | |
2715 | |
2716 // 'from', 'to', 'count' registers should be set in this order | |
2717 // since they are the same as 'src', 'src_pos', 'dst'. | |
2718 | |
2719 BLOCK_COMMENT("scale indexes to element size"); | |
2720 __ sll_ptr(src_pos, G3_elsize, src_pos); | |
2721 __ sll_ptr(dst_pos, G3_elsize, dst_pos); | |
2722 __ add(src, src_pos, from); // src_addr | |
2723 __ add(dst, dst_pos, to); // dst_addr | |
2724 | |
2725 BLOCK_COMMENT("choose copy loop based on element size"); | |
2726 __ cmp(G3_elsize, 0); | |
2727 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jbyte_arraycopy); | |
2728 __ delayed()->signx(length, count); // length | |
2729 | |
2730 __ cmp(G3_elsize, LogBytesPerShort); | |
2731 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jshort_arraycopy); | |
2732 __ delayed()->signx(length, count); // length | |
2733 | |
2734 __ cmp(G3_elsize, LogBytesPerInt); | |
2735 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jint_arraycopy); | |
2736 __ delayed()->signx(length, count); // length | |
2737 #ifdef ASSERT | |
2738 { Label L; | |
2739 __ cmp(G3_elsize, LogBytesPerLong); | |
2740 __ br(Assembler::equal, false, Assembler::pt, L); | |
2741 __ delayed()->nop(); | |
2742 __ stop("must be long copy, but elsize is wrong"); | |
2743 __ bind(L); | |
2744 } | |
2745 #endif | |
2746 __ br(Assembler::always,false,Assembler::pt,StubRoutines::_jlong_arraycopy); | |
2747 __ delayed()->signx(length, count); // length | |
2748 | |
2749 // objArrayKlass | |
2750 __ BIND(L_objArray); | |
2751 // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length | |
2752 | |
2753 Label L_plain_copy, L_checkcast_copy; | |
2754 // test array classes for subtyping | |
2755 __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality | |
2756 __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy); | |
2757 __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below | |
2758 | |
2759 // Identically typed arrays can be copied without element-wise checks. | |
2760 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, | |
2761 O5_temp, G5_lh, L_failed); | |
2762 | |
2763 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset | |
2764 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2765 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2766 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos); |
0 | 2767 __ add(src, src_pos, from); // src_addr |
2768 __ add(dst, dst_pos, to); // dst_addr | |
2769 __ BIND(L_plain_copy); | |
2770 __ br(Assembler::always, false, Assembler::pt,StubRoutines::_oop_arraycopy); | |
2771 __ delayed()->signx(length, count); // length | |
2772 | |
2773 __ BIND(L_checkcast_copy); | |
2774 // live at this point: G3_src_klass, G4_dst_klass | |
2775 { | |
2776 // Before looking at dst.length, make sure dst is also an objArray. | |
2777 // lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted to delay slot | |
2778 __ cmp(G5_lh, O5_temp); | |
2779 __ br(Assembler::notEqual, false, Assembler::pn, L_failed); | |
2780 | |
2781 // It is safe to examine both src.length and dst.length. | |
2782 __ delayed(); // match next insn to prev branch | |
2783 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, | |
2784 O5_temp, G5_lh, L_failed); | |
2785 | |
2786 // Marshal the base address arguments now, freeing registers. | |
2787 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset | |
2788 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2789 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
16
diff
changeset
|
2790 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos); |
0 | 2791 __ add(src, src_pos, from); // src_addr |
2792 __ add(dst, dst_pos, to); // dst_addr | |
2793 __ signx(length, count); // length (reloaded) | |
2794 | |
2795 Register sco_temp = O3; // this register is free now | |
2796 assert_different_registers(from, to, count, sco_temp, | |
2797 G4_dst_klass, G3_src_klass); | |
2798 | |
2799 // Generate the type check. | |
2800 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + | |
2801 Klass::super_check_offset_offset_in_bytes()); | |
2802 __ lduw(G4_dst_klass, sco_offset, sco_temp); | |
2803 generate_type_check(G3_src_klass, sco_temp, G4_dst_klass, | |
2804 O5_temp, L_plain_copy); | |
2805 | |
2806 // Fetch destination element klass from the objArrayKlass header. | |
2807 int ek_offset = (klassOopDesc::header_size() * HeapWordSize + | |
2808 objArrayKlass::element_klass_offset_in_bytes()); | |
2809 | |
2810 // the checkcast_copy loop needs two extra arguments: | |
2811 __ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass | |
2812 // lduw(O4, sco_offset, O3); // sco of elem klass | |
2813 | |
2814 __ br(Assembler::always, false, Assembler::pt, checkcast_copy_entry); | |
2815 __ delayed()->lduw(O4, sco_offset, O3); | |
2816 } | |
2817 | |
2818 __ BIND(L_failed); | |
2819 __ retl(); | |
2820 __ delayed()->sub(G0, 1, O0); // return -1 | |
2821 return start; | |
2822 } | |
2823 | |
2824 void generate_arraycopy_stubs() { | |
2825 | |
2826 // Note: the disjoint stubs must be generated first, some of | |
2827 // the conjoint stubs use them. | |
2828 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); | |
2829 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); | |
2830 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy"); | |
2831 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); | |
2832 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy"); | |
2833 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy"); | |
2834 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy"); | |
2835 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, "arrayof_jint_disjoint_arraycopy"); | |
2836 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy"); | |
2837 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, "arrayof_oop_disjoint_arraycopy"); | |
2838 | |
2839 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); | |
2840 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); | |
2841 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, "jint_arraycopy"); | |
2842 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); | |
2843 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, "oop_arraycopy"); | |
2844 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy"); | |
2845 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy"); | |
2846 #ifdef _LP64 | |
2847 // since sizeof(jint) < sizeof(HeapWord), there's a different flavor: | |
2848 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, "arrayof_jint_arraycopy"); | |
2849 #else | |
2850 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; | |
2851 #endif | |
2852 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; | |
2853 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; | |
2854 | |
2855 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy"); | |
2856 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy"); | |
2857 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy"); | |
2858 } | |
2859 | |
2860 void generate_initial() { | |
2861 // Generates all stubs and initializes the entry points | |
2862 | |
2863 //------------------------------------------------------------------------------------------------------------------------ | |
2864 // entry points that exist in all platforms | |
2865 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than | |
2866 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. | |
2867 StubRoutines::_forward_exception_entry = generate_forward_exception(); | |
2868 | |
2869 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); | |
2870 StubRoutines::_catch_exception_entry = generate_catch_exception(); | |
2871 | |
2872 //------------------------------------------------------------------------------------------------------------------------ | |
2873 // entry points that are platform specific | |
2874 StubRoutines::Sparc::_test_stop_entry = generate_test_stop(); | |
2875 | |
2876 StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine(); | |
2877 StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows(); | |
2878 | |
2879 #if !defined(COMPILER2) && !defined(_LP64) | |
2880 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); | |
2881 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg(); | |
2882 StubRoutines::_atomic_add_entry = generate_atomic_add(); | |
2883 StubRoutines::_atomic_xchg_ptr_entry = StubRoutines::_atomic_xchg_entry; | |
2884 StubRoutines::_atomic_cmpxchg_ptr_entry = StubRoutines::_atomic_cmpxchg_entry; | |
2885 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long(); | |
2886 StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry; | |
2887 #endif // COMPILER2 !=> _LP64 | |
2888 } | |
2889 | |
2890 | |
2891 void generate_all() { | |
2892 // Generates all stubs and initializes the entry points | |
2893 | |
642
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
422
diff
changeset
|
2894 // Generate partial_subtype_check first here since its code depends on |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
422
diff
changeset
|
2895 // UseZeroBaseCompressedOops which is defined after heap initialization. |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
422
diff
changeset
|
2896 StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check(); |
0 | 2897 // These entry points require SharedInfo::stack0 to be set up in non-core builds |
2898 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); | |
16
f8236e79048a
6664627: Merge changes made only in hotspot 11 forward to jdk 7
dcubed
parents:
0
diff
changeset
|
2899 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); |
0 | 2900 StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true); |
2901 StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true); | |
2902 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); | |
2903 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); | |
2904 | |
2905 StubRoutines::_handler_for_unsafe_access_entry = | |
2906 generate_handler_for_unsafe_access(); | |
2907 | |
2908 // support for verify_oop (must happen after universe_init) | |
2909 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine(); | |
2910 | |
2911 // arraycopy stubs used by compilers | |
2912 generate_arraycopy_stubs(); | |
1174
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
727
diff
changeset
|
2913 |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
727
diff
changeset
|
2914 // Don't initialize the platform math functions since sparc |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
727
diff
changeset
|
2915 // doesn't have intrinsics for these operations. |
0 | 2916 } |
2917 | |
2918 | |
2919 public: | |
2920 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { | |
2921 // replace the standard masm with a special one: | |
2922 _masm = new MacroAssembler(code); | |
2923 | |
2924 _stub_count = !all ? 0x100 : 0x200; | |
2925 if (all) { | |
2926 generate_all(); | |
2927 } else { | |
2928 generate_initial(); | |
2929 } | |
2930 | |
2931 // make sure this stub is available for all local calls | |
2932 if (_atomic_add_stub.is_unbound()) { | |
2933 // generate a second time, if necessary | |
2934 (void) generate_atomic_add(); | |
2935 } | |
2936 } | |
2937 | |
2938 | |
2939 private: | |
2940 int _stub_count; | |
2941 void stub_prolog(StubCodeDesc* cdesc) { | |
2942 # ifdef ASSERT | |
2943 // put extra information in the stub code, to make it more readable | |
2944 #ifdef _LP64 | |
2945 // Write the high part of the address | |
2946 // [RGV] Check if there is a dependency on the size of this prolog | |
2947 __ emit_data((intptr_t)cdesc >> 32, relocInfo::none); | |
2948 #endif | |
2949 __ emit_data((intptr_t)cdesc, relocInfo::none); | |
2950 __ emit_data(++_stub_count, relocInfo::none); | |
2951 # endif | |
2952 align(true); | |
2953 } | |
2954 | |
2955 void align(bool at_header = false) { | |
2956 // %%%%% move this constant somewhere else | |
2957 // UltraSPARC cache line size is 8 instructions: | |
2958 const unsigned int icache_line_size = 32; | |
2959 const unsigned int icache_half_line_size = 16; | |
2960 | |
2961 if (at_header) { | |
2962 while ((intptr_t)(__ pc()) % icache_line_size != 0) { | |
2963 __ emit_data(0, relocInfo::none); | |
2964 } | |
2965 } else { | |
2966 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) { | |
2967 __ nop(); | |
2968 } | |
2969 } | |
2970 } | |
2971 | |
2972 }; // end class declaration | |
2973 | |
2974 | |
2975 address StubGenerator::disjoint_byte_copy_entry = NULL; | |
2976 address StubGenerator::disjoint_short_copy_entry = NULL; | |
2977 address StubGenerator::disjoint_int_copy_entry = NULL; | |
2978 address StubGenerator::disjoint_long_copy_entry = NULL; | |
2979 address StubGenerator::disjoint_oop_copy_entry = NULL; | |
2980 | |
2981 address StubGenerator::byte_copy_entry = NULL; | |
2982 address StubGenerator::short_copy_entry = NULL; | |
2983 address StubGenerator::int_copy_entry = NULL; | |
2984 address StubGenerator::long_copy_entry = NULL; | |
2985 address StubGenerator::oop_copy_entry = NULL; | |
2986 | |
2987 address StubGenerator::checkcast_copy_entry = NULL; | |
2988 | |
2989 void StubGenerator_generate(CodeBuffer* code, bool all) { | |
2990 StubGenerator g(code, all); | |
2991 } |