Mercurial > hg > graal-jvmci-8
comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 4137:04b9a2566eec
Merge with hsx23/hotspot.
author | Thomas Wuerthinger <thomas.wuerthinger@oracle.com> |
---|---|
date | Sat, 17 Dec 2011 21:40:27 +0100 |
parents | a92cdbac8b9e |
children | 33df1aeaebbf |
comparison
equal
deleted
inserted
replaced
3737:9dc19b7d89a3 | 4137:04b9a2566eec |
---|---|
148 #ifdef ASSERT | 148 #ifdef ASSERT |
149 // make sure we have no pending exceptions | 149 // make sure we have no pending exceptions |
150 { const Register t = G3_scratch; | 150 { const Register t = G3_scratch; |
151 Label L; | 151 Label L; |
152 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t); | 152 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t); |
153 __ br_null(t, false, Assembler::pt, L); | 153 __ br_null_short(t, Assembler::pt, L); |
154 __ delayed()->nop(); | |
155 __ stop("StubRoutines::call_stub: entered with pending exception"); | 154 __ stop("StubRoutines::call_stub: entered with pending exception"); |
156 __ bind(L); | 155 __ bind(L); |
157 } | 156 } |
158 #endif | 157 #endif |
159 | 158 |
205 | 204 |
206 // test if any parameters & setup of Lentry_args | 205 // test if any parameters & setup of Lentry_args |
207 Label exit; | 206 Label exit; |
208 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter | 207 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter |
209 __ add( FP, STACK_BIAS, dst ); | 208 __ add( FP, STACK_BIAS, dst ); |
210 __ tst(cnt); | 209 __ cmp_zero_and_br(Assembler::zero, cnt, exit); |
211 __ br(Assembler::zero, false, Assembler::pn, exit); | |
212 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args | 210 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args |
213 | 211 |
214 // copy parameters if any | 212 // copy parameters if any |
215 Label loop; | 213 Label loop; |
216 __ BIND(loop); | 214 __ BIND(loop); |
280 __ BIND(exit); | 278 __ BIND(exit); |
281 __ ret(); | 279 __ ret(); |
282 __ delayed()->restore(); | 280 __ delayed()->restore(); |
283 | 281 |
284 __ BIND(is_object); | 282 __ BIND(is_object); |
285 __ ba(false, exit); | 283 __ ba(exit); |
286 __ delayed()->st_ptr(O0, addr, G0); | 284 __ delayed()->st_ptr(O0, addr, G0); |
287 | 285 |
288 __ BIND(is_float); | 286 __ BIND(is_float); |
289 __ ba(false, exit); | 287 __ ba(exit); |
290 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0); | 288 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0); |
291 | 289 |
292 __ BIND(is_double); | 290 __ BIND(is_double); |
293 __ ba(false, exit); | 291 __ ba(exit); |
294 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0); | 292 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0); |
295 | 293 |
296 __ BIND(is_long); | 294 __ BIND(is_long); |
297 #ifdef _LP64 | 295 #ifdef _LP64 |
298 __ ba(false, exit); | 296 __ ba(exit); |
299 __ delayed()->st_long(O0, addr, G0); // store entire long | 297 __ delayed()->st_long(O0, addr, G0); // store entire long |
300 #else | 298 #else |
301 #if defined(COMPILER2) | 299 #if defined(COMPILER2) |
302 // All return values are where we want them, except for Longs. C2 returns | 300 // All return values are where we want them, except for Longs. C2 returns |
303 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1. | 301 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1. |
305 // build we simply always use G1. | 303 // build we simply always use G1. |
306 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to | 304 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to |
307 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node | 305 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node |
308 // first which would move g1 -> O0/O1 and destroy the exception we were throwing. | 306 // first which would move g1 -> O0/O1 and destroy the exception we were throwing. |
309 | 307 |
310 __ ba(false, exit); | 308 __ ba(exit); |
311 __ delayed()->stx(G1, addr, G0); // store entire long | 309 __ delayed()->stx(G1, addr, G0); // store entire long |
312 #else | 310 #else |
313 __ st(O1, addr, BytesPerInt); | 311 __ st(O1, addr, BytesPerInt); |
314 __ ba(false, exit); | 312 __ ba(exit); |
315 __ delayed()->st(O0, addr, G0); | 313 __ delayed()->st(O0, addr, G0); |
316 #endif /* COMPILER2 */ | 314 #endif /* COMPILER2 */ |
317 #endif /* _LP64 */ | 315 #endif /* _LP64 */ |
318 } | 316 } |
319 return start; | 317 return start; |
380 | 378 |
381 #ifdef ASSERT | 379 #ifdef ASSERT |
382 // make sure that this code is only executed if there is a pending exception | 380 // make sure that this code is only executed if there is a pending exception |
383 { Label L; | 381 { Label L; |
384 __ ld_ptr(exception_addr, Gtemp); | 382 __ ld_ptr(exception_addr, Gtemp); |
385 __ br_notnull(Gtemp, false, Assembler::pt, L); | 383 __ br_notnull_short(Gtemp, Assembler::pt, L); |
386 __ delayed()->nop(); | |
387 __ stop("StubRoutines::forward exception: no pending exception (1)"); | 384 __ stop("StubRoutines::forward exception: no pending exception (1)"); |
388 __ bind(L); | 385 __ bind(L); |
389 } | 386 } |
390 #endif | 387 #endif |
391 | 388 |
404 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC | 401 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC |
405 | 402 |
406 #ifdef ASSERT | 403 #ifdef ASSERT |
407 // make sure exception is set | 404 // make sure exception is set |
408 { Label L; | 405 { Label L; |
409 __ br_notnull(Oexception, false, Assembler::pt, L); | 406 __ br_notnull_short(Oexception, Assembler::pt, L); |
410 __ delayed()->nop(); | |
411 __ stop("StubRoutines::forward exception: no pending exception (2)"); | 407 __ stop("StubRoutines::forward exception: no pending exception (2)"); |
412 __ bind(L); | 408 __ bind(L); |
413 } | 409 } |
414 #endif | 410 #endif |
415 // jump to exception handler | 411 // jump to exception handler |
464 // is pushed. | 460 // is pushed. |
465 __ save_frame(0); | 461 __ save_frame(0); |
466 | 462 |
467 int frame_complete = __ offset(); | 463 int frame_complete = __ offset(); |
468 | 464 |
469 if (restore_saved_exception_pc) { | |
470 __ ld_ptr(G2_thread, JavaThread::saved_exception_pc_offset(), I7); | |
471 __ sub(I7, frame::pc_return_offset, I7); | |
472 } | |
473 | |
474 // Note that we always have a runtime stub frame on the top of stack by this point | 465 // Note that we always have a runtime stub frame on the top of stack by this point |
475 Register last_java_sp = SP; | 466 Register last_java_sp = SP; |
476 // 64-bit last_java_sp is biased! | 467 // 64-bit last_java_sp is biased! |
477 __ set_last_Java_frame(last_java_sp, G0); | 468 __ set_last_Java_frame(last_java_sp, G0); |
478 if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early | 469 if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early |
499 Label L; | 490 Label L; |
500 | 491 |
501 Address exception_addr(G2_thread, Thread::pending_exception_offset()); | 492 Address exception_addr(G2_thread, Thread::pending_exception_offset()); |
502 Register scratch_reg = Gtemp; | 493 Register scratch_reg = Gtemp; |
503 __ ld_ptr(exception_addr, scratch_reg); | 494 __ ld_ptr(exception_addr, scratch_reg); |
504 __ br_notnull(scratch_reg, false, Assembler::pt, L); | 495 __ br_notnull_short(scratch_reg, Assembler::pt, L); |
505 __ delayed()->nop(); | |
506 __ should_not_reach_here(); | 496 __ should_not_reach_here(); |
507 __ bind(L); | 497 __ bind(L); |
508 #endif // ASSERT | 498 #endif // ASSERT |
509 BLOCK_COMMENT("call forward_exception_entry"); | 499 BLOCK_COMMENT("call forward_exception_entry"); |
510 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); | 500 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); |
612 __ set(StubRoutines::Sparc::locked, lock_reg); | 602 __ set(StubRoutines::Sparc::locked, lock_reg); |
613 // Initialize yield counter | 603 // Initialize yield counter |
614 __ mov(G0,yield_reg); | 604 __ mov(G0,yield_reg); |
615 | 605 |
616 __ BIND(retry); | 606 __ BIND(retry); |
617 __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount); | 607 __ cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dontyield); |
618 __ br(Assembler::less, false, Assembler::pt, dontyield); | |
619 __ delayed()->nop(); | |
620 | 608 |
621 // This code can only be called from inside the VM, this | 609 // This code can only be called from inside the VM, this |
622 // stub is only invoked from Atomic::add(). We do not | 610 // stub is only invoked from Atomic::add(). We do not |
623 // want to use call_VM, because _last_java_sp and such | 611 // want to use call_VM, because _last_java_sp and such |
624 // must already be set. | 612 // must already be set. |
674 __ mov(O0, O3); // scratch copy of exchange value | 662 __ mov(O0, O3); // scratch copy of exchange value |
675 __ ld(O1, 0, O2); // observe the previous value | 663 __ ld(O1, 0, O2); // observe the previous value |
676 // try to replace O2 with O3 | 664 // try to replace O2 with O3 |
677 __ cas_under_lock(O1, O2, O3, | 665 __ cas_under_lock(O1, O2, O3, |
678 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); | 666 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); |
679 __ cmp(O2, O3); | 667 __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry); |
680 __ br(Assembler::notEqual, false, Assembler::pn, retry); | |
681 __ delayed()->nop(); | |
682 | 668 |
683 __ retl(false); | 669 __ retl(false); |
684 __ delayed()->mov(O2, O0); // report previous value to caller | 670 __ delayed()->mov(O2, O0); // report previous value to caller |
685 | 671 |
686 } else { | 672 } else { |
796 if (VM_Version::v9_instructions_work()) { | 782 if (VM_Version::v9_instructions_work()) { |
797 Label(retry); | 783 Label(retry); |
798 __ BIND(retry); | 784 __ BIND(retry); |
799 | 785 |
800 __ lduw(O1, 0, O2); | 786 __ lduw(O1, 0, O2); |
801 __ add(O0, O2, O3); | 787 __ add(O0, O2, O3); |
802 __ cas(O1, O2, O3); | 788 __ cas(O1, O2, O3); |
803 __ cmp( O2, O3); | 789 __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry); |
804 __ br(Assembler::notEqual, false, Assembler::pn, retry); | |
805 __ delayed()->nop(); | |
806 __ retl(false); | 790 __ retl(false); |
807 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3 | 791 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3 |
808 } else { | 792 } else { |
809 const Register& lock_reg = O2; | 793 const Register& lock_reg = O2; |
810 const Register& lock_ptr_reg = O3; | 794 const Register& lock_ptr_reg = O3; |
1133 default: | 1117 default: |
1134 ShouldNotReachHere(); | 1118 ShouldNotReachHere(); |
1135 } | 1119 } |
1136 } | 1120 } |
1137 | 1121 |
1122 // | |
1123 // Generate main code for disjoint arraycopy | |
1124 // | |
1125 typedef void (StubGenerator::*CopyLoopFunc)(Register from, Register to, Register count, int count_dec, | |
1126 Label& L_loop, bool use_prefetch, bool use_bis); | |
1127 | |
1128 void disjoint_copy_core(Register from, Register to, Register count, int log2_elem_size, | |
1129 int iter_size, CopyLoopFunc copy_loop_func) { | |
1130 Label L_copy; | |
1131 | |
1132 assert(log2_elem_size <= 3, "the following code should be changed"); | |
1133 int count_dec = 16>>log2_elem_size; | |
1134 | |
1135 int prefetch_dist = MAX2(ArraycopySrcPrefetchDistance, ArraycopyDstPrefetchDistance); | |
1136 assert(prefetch_dist < 4096, "invalid value"); | |
1137 prefetch_dist = (prefetch_dist + (iter_size-1)) & (-iter_size); // round up to one iteration copy size | |
1138 int prefetch_count = (prefetch_dist >> log2_elem_size); // elements count | |
1139 | |
1140 if (UseBlockCopy) { | |
1141 Label L_block_copy, L_block_copy_prefetch, L_skip_block_copy; | |
1142 | |
1143 // 64 bytes tail + bytes copied in one loop iteration | |
1144 int tail_size = 64 + iter_size; | |
1145 int block_copy_count = (MAX2(tail_size, (int)BlockCopyLowLimit)) >> log2_elem_size; | |
1146 // Use BIS copy only for big arrays since it requires membar. | |
1147 __ set(block_copy_count, O4); | |
1148 __ cmp_and_br_short(count, O4, Assembler::lessUnsigned, Assembler::pt, L_skip_block_copy); | |
1149 // This code is for disjoint source and destination: | |
1150 // to <= from || to >= from+count | |
1151 // but BIS will stomp over 'from' if (to > from-tail_size && to <= from) | |
1152 __ sub(from, to, O4); | |
1153 __ srax(O4, 4, O4); // divide by 16 since following short branch have only 5 bits for imm. | |
1154 __ cmp_and_br_short(O4, (tail_size>>4), Assembler::lessEqualUnsigned, Assembler::pn, L_skip_block_copy); | |
1155 | |
1156 __ wrasi(G0, Assembler::ASI_ST_BLKINIT_PRIMARY); | |
1157 // BIS should not be used to copy tail (64 bytes+iter_size) | |
1158 // to avoid zeroing of following values. | |
1159 __ sub(count, (tail_size>>log2_elem_size), count); // count is still positive >= 0 | |
1160 | |
1161 if (prefetch_count > 0) { // rounded up to one iteration count | |
1162 // Do prefetching only if copy size is bigger | |
1163 // than prefetch distance. | |
1164 __ set(prefetch_count, O4); | |
1165 __ cmp_and_brx_short(count, O4, Assembler::less, Assembler::pt, L_block_copy); | |
1166 __ sub(count, prefetch_count, count); | |
1167 | |
1168 (this->*copy_loop_func)(from, to, count, count_dec, L_block_copy_prefetch, true, true); | |
1169 __ add(count, prefetch_count, count); // restore count | |
1170 | |
1171 } // prefetch_count > 0 | |
1172 | |
1173 (this->*copy_loop_func)(from, to, count, count_dec, L_block_copy, false, true); | |
1174 __ add(count, (tail_size>>log2_elem_size), count); // restore count | |
1175 | |
1176 __ wrasi(G0, Assembler::ASI_PRIMARY_NOFAULT); | |
1177 // BIS needs membar. | |
1178 __ membar(Assembler::StoreLoad); | |
1179 // Copy tail | |
1180 __ ba_short(L_copy); | |
1181 | |
1182 __ BIND(L_skip_block_copy); | |
1183 } // UseBlockCopy | |
1184 | |
1185 if (prefetch_count > 0) { // rounded up to one iteration count | |
1186 // Do prefetching only if copy size is bigger | |
1187 // than prefetch distance. | |
1188 __ set(prefetch_count, O4); | |
1189 __ cmp_and_brx_short(count, O4, Assembler::lessUnsigned, Assembler::pt, L_copy); | |
1190 __ sub(count, prefetch_count, count); | |
1191 | |
1192 Label L_copy_prefetch; | |
1193 (this->*copy_loop_func)(from, to, count, count_dec, L_copy_prefetch, true, false); | |
1194 __ add(count, prefetch_count, count); // restore count | |
1195 | |
1196 } // prefetch_count > 0 | |
1197 | |
1198 (this->*copy_loop_func)(from, to, count, count_dec, L_copy, false, false); | |
1199 } | |
1200 | |
1201 | |
1202 | |
1203 // | |
1204 // Helper methods for copy_16_bytes_forward_with_shift() | |
1205 // | |
1206 void copy_16_bytes_shift_loop(Register from, Register to, Register count, int count_dec, | |
1207 Label& L_loop, bool use_prefetch, bool use_bis) { | |
1208 | |
1209 const Register left_shift = G1; // left shift bit counter | |
1210 const Register right_shift = G5; // right shift bit counter | |
1211 | |
1212 __ align(OptoLoopAlignment); | |
1213 __ BIND(L_loop); | |
1214 if (use_prefetch) { | |
1215 if (ArraycopySrcPrefetchDistance > 0) { | |
1216 __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads); | |
1217 } | |
1218 if (ArraycopyDstPrefetchDistance > 0) { | |
1219 __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads); | |
1220 } | |
1221 } | |
1222 __ ldx(from, 0, O4); | |
1223 __ ldx(from, 8, G4); | |
1224 __ inc(to, 16); | |
1225 __ inc(from, 16); | |
1226 __ deccc(count, count_dec); // Can we do next iteration after this one? | |
1227 __ srlx(O4, right_shift, G3); | |
1228 __ bset(G3, O3); | |
1229 __ sllx(O4, left_shift, O4); | |
1230 __ srlx(G4, right_shift, G3); | |
1231 __ bset(G3, O4); | |
1232 if (use_bis) { | |
1233 __ stxa(O3, to, -16); | |
1234 __ stxa(O4, to, -8); | |
1235 } else { | |
1236 __ stx(O3, to, -16); | |
1237 __ stx(O4, to, -8); | |
1238 } | |
1239 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); | |
1240 __ delayed()->sllx(G4, left_shift, O3); | |
1241 } | |
1138 | 1242 |
1139 // Copy big chunks forward with shift | 1243 // Copy big chunks forward with shift |
1140 // | 1244 // |
1141 // Inputs: | 1245 // Inputs: |
1142 // from - source arrays | 1246 // from - source arrays |
1144 // count - elements count to copy >= the count equivalent to 16 bytes | 1248 // count - elements count to copy >= the count equivalent to 16 bytes |
1145 // count_dec - elements count's decrement equivalent to 16 bytes | 1249 // count_dec - elements count's decrement equivalent to 16 bytes |
1146 // L_copy_bytes - copy exit label | 1250 // L_copy_bytes - copy exit label |
1147 // | 1251 // |
1148 void copy_16_bytes_forward_with_shift(Register from, Register to, | 1252 void copy_16_bytes_forward_with_shift(Register from, Register to, |
1149 Register count, int count_dec, Label& L_copy_bytes) { | 1253 Register count, int log2_elem_size, Label& L_copy_bytes) { |
1150 Label L_loop, L_aligned_copy, L_copy_last_bytes; | 1254 Label L_aligned_copy, L_copy_last_bytes; |
1255 assert(log2_elem_size <= 3, "the following code should be changed"); | |
1256 int count_dec = 16>>log2_elem_size; | |
1151 | 1257 |
1152 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy | 1258 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy |
1153 __ andcc(from, 7, G1); // misaligned bytes | 1259 __ andcc(from, 7, G1); // misaligned bytes |
1154 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); | 1260 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); |
1155 __ delayed()->nop(); | 1261 __ delayed()->nop(); |
1156 | 1262 |
1157 const Register left_shift = G1; // left shift bit counter | 1263 const Register left_shift = G1; // left shift bit counter |
1158 const Register right_shift = G5; // right shift bit counter | 1264 const Register right_shift = G5; // right shift bit counter |
1159 | 1265 |
1160 __ sll(G1, LogBitsPerByte, left_shift); | 1266 __ sll(G1, LogBitsPerByte, left_shift); |
1161 __ mov(64, right_shift); | 1267 __ mov(64, right_shift); |
1162 __ sub(right_shift, left_shift, right_shift); | 1268 __ sub(right_shift, left_shift, right_shift); |
1163 | 1269 |
1164 // | 1270 // |
1165 // Load 2 aligned 8-bytes chunks and use one from previous iteration | 1271 // Load 2 aligned 8-bytes chunks and use one from previous iteration |
1166 // to form 2 aligned 8-bytes chunks to store. | 1272 // to form 2 aligned 8-bytes chunks to store. |
1167 // | 1273 // |
1168 __ deccc(count, count_dec); // Pre-decrement 'count' | 1274 __ dec(count, count_dec); // Pre-decrement 'count' |
1169 __ andn(from, 7, from); // Align address | 1275 __ andn(from, 7, from); // Align address |
1170 __ ldx(from, 0, O3); | 1276 __ ldx(from, 0, O3); |
1171 __ inc(from, 8); | 1277 __ inc(from, 8); |
1172 __ align(OptoLoopAlignment); | 1278 __ sllx(O3, left_shift, O3); |
1173 __ BIND(L_loop); | 1279 |
1174 __ ldx(from, 0, O4); | 1280 disjoint_copy_core(from, to, count, log2_elem_size, 16, copy_16_bytes_shift_loop); |
1175 __ deccc(count, count_dec); // Can we do next iteration after this one? | 1281 |
1176 __ ldx(from, 8, G4); | 1282 __ inccc(count, count_dec>>1 ); // + 8 bytes |
1177 __ inc(to, 16); | 1283 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); |
1178 __ inc(from, 16); | 1284 __ delayed()->inc(count, count_dec>>1); // restore 'count' |
1179 __ sllx(O3, left_shift, O3); | 1285 |
1180 __ srlx(O4, right_shift, G3); | 1286 // copy 8 bytes, part of them already loaded in O3 |
1181 __ bset(G3, O3); | 1287 __ ldx(from, 0, O4); |
1182 __ stx(O3, to, -16); | 1288 __ inc(to, 8); |
1183 __ sllx(O4, left_shift, O4); | 1289 __ inc(from, 8); |
1184 __ srlx(G4, right_shift, G3); | 1290 __ srlx(O4, right_shift, G3); |
1185 __ bset(G3, O4); | 1291 __ bset(O3, G3); |
1186 __ stx(O4, to, -8); | 1292 __ stx(G3, to, -8); |
1187 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); | |
1188 __ delayed()->mov(G4, O3); | |
1189 | |
1190 __ inccc(count, count_dec>>1 ); // + 8 bytes | |
1191 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); | |
1192 __ delayed()->inc(count, count_dec>>1); // restore 'count' | |
1193 | |
1194 // copy 8 bytes, part of them already loaded in O3 | |
1195 __ ldx(from, 0, O4); | |
1196 __ inc(to, 8); | |
1197 __ inc(from, 8); | |
1198 __ sllx(O3, left_shift, O3); | |
1199 __ srlx(O4, right_shift, G3); | |
1200 __ bset(O3, G3); | |
1201 __ stx(G3, to, -8); | |
1202 | 1293 |
1203 __ BIND(L_copy_last_bytes); | 1294 __ BIND(L_copy_last_bytes); |
1204 __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes | 1295 __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes |
1205 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); | 1296 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); |
1206 __ delayed()->sub(from, right_shift, from); // restore address | 1297 __ delayed()->sub(from, right_shift, from); // restore address |
1207 | 1298 |
1208 __ BIND(L_aligned_copy); | 1299 __ BIND(L_aligned_copy); |
1209 } | 1300 } |
1210 | 1301 |
1211 // Copy big chunks backward with shift | 1302 // Copy big chunks backward with shift |
1357 // the same alignment mod 8, otherwise fall through to the next | 1448 // the same alignment mod 8, otherwise fall through to the next |
1358 // code for aligned copy. | 1449 // code for aligned copy. |
1359 // The compare above (count >= 23) guarantes 'count' >= 16 bytes. | 1450 // The compare above (count >= 23) guarantes 'count' >= 16 bytes. |
1360 // Also jump over aligned copy after the copy with shift completed. | 1451 // Also jump over aligned copy after the copy with shift completed. |
1361 | 1452 |
1362 copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte); | 1453 copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte); |
1363 } | 1454 } |
1364 | 1455 |
1365 // Both array are 8 bytes aligned, copy 16 bytes at a time | 1456 // Both array are 8 bytes aligned, copy 16 bytes at a time |
1366 __ and3(count, 7, G4); // Save count | 1457 __ and3(count, 7, G4); // Save count |
1367 __ srl(count, 3, count); | 1458 __ srl(count, 3, count); |
1368 generate_disjoint_long_copy_core(aligned); | 1459 generate_disjoint_long_copy_core(aligned); |
1369 __ mov(G4, count); // Restore count | 1460 __ mov(G4, count); // Restore count |
1370 | 1461 |
1371 // copy tailing bytes | 1462 // copy tailing bytes |
1372 __ BIND(L_copy_byte); | 1463 __ BIND(L_copy_byte); |
1373 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); | 1464 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
1374 __ delayed()->nop(); | |
1375 __ align(OptoLoopAlignment); | 1465 __ align(OptoLoopAlignment); |
1376 __ BIND(L_copy_byte_loop); | 1466 __ BIND(L_copy_byte_loop); |
1377 __ ldub(from, offset, O3); | 1467 __ ldub(from, offset, O3); |
1378 __ deccc(count); | 1468 __ deccc(count); |
1379 __ stb(O3, to, offset); | 1469 __ stb(O3, to, offset); |
1480 __ delayed()->stx(O4, end_to, 0); | 1570 __ delayed()->stx(O4, end_to, 0); |
1481 __ inc(count, 16); | 1571 __ inc(count, 16); |
1482 | 1572 |
1483 // copy 1 element (2 bytes) at a time | 1573 // copy 1 element (2 bytes) at a time |
1484 __ BIND(L_copy_byte); | 1574 __ BIND(L_copy_byte); |
1485 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); | 1575 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
1486 __ delayed()->nop(); | |
1487 __ align(OptoLoopAlignment); | 1576 __ align(OptoLoopAlignment); |
1488 __ BIND(L_copy_byte_loop); | 1577 __ BIND(L_copy_byte_loop); |
1489 __ dec(end_from); | 1578 __ dec(end_from); |
1490 __ dec(end_to); | 1579 __ dec(end_to); |
1491 __ ldub(end_from, 0, O4); | 1580 __ ldub(end_from, 0, O4); |
1587 // the same alignment mod 8, otherwise fall through to the next | 1676 // the same alignment mod 8, otherwise fall through to the next |
1588 // code for aligned copy. | 1677 // code for aligned copy. |
1589 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. | 1678 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. |
1590 // Also jump over aligned copy after the copy with shift completed. | 1679 // Also jump over aligned copy after the copy with shift completed. |
1591 | 1680 |
1592 copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes); | 1681 copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes); |
1593 } | 1682 } |
1594 | 1683 |
1595 // Both array are 8 bytes aligned, copy 16 bytes at a time | 1684 // Both array are 8 bytes aligned, copy 16 bytes at a time |
1596 __ and3(count, 3, G4); // Save | 1685 __ and3(count, 3, G4); // Save |
1597 __ srl(count, 2, count); | 1686 __ srl(count, 2, count); |
1598 generate_disjoint_long_copy_core(aligned); | 1687 generate_disjoint_long_copy_core(aligned); |
1599 __ mov(G4, count); // restore | 1688 __ mov(G4, count); // restore |
1600 | 1689 |
1601 // copy 1 element at a time | 1690 // copy 1 element at a time |
1602 __ BIND(L_copy_2_bytes); | 1691 __ BIND(L_copy_2_bytes); |
1603 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); | 1692 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
1604 __ delayed()->nop(); | |
1605 __ align(OptoLoopAlignment); | 1693 __ align(OptoLoopAlignment); |
1606 __ BIND(L_copy_2_bytes_loop); | 1694 __ BIND(L_copy_2_bytes_loop); |
1607 __ lduh(from, offset, O3); | 1695 __ lduh(from, offset, O3); |
1608 __ deccc(count); | 1696 __ deccc(count); |
1609 __ sth(O3, to, offset); | 1697 __ sth(O3, to, offset); |
1944 __ delayed()->stx(O4, end_to, 0); | 2032 __ delayed()->stx(O4, end_to, 0); |
1945 __ inc(count, 8); | 2033 __ inc(count, 8); |
1946 | 2034 |
1947 // copy 1 element (2 bytes) at a time | 2035 // copy 1 element (2 bytes) at a time |
1948 __ BIND(L_copy_2_bytes); | 2036 __ BIND(L_copy_2_bytes); |
1949 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); | 2037 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
1950 __ delayed()->nop(); | |
1951 __ BIND(L_copy_2_bytes_loop); | 2038 __ BIND(L_copy_2_bytes_loop); |
1952 __ dec(end_from, 2); | 2039 __ dec(end_from, 2); |
1953 __ dec(end_to, 2); | 2040 __ dec(end_to, 2); |
1954 __ lduh(end_from, 0, O4); | 2041 __ lduh(end_from, 0, O4); |
1955 __ deccc(count); | 2042 __ deccc(count); |
1963 __ delayed()->mov(G0, O0); // return 0 | 2050 __ delayed()->mov(G0, O0); // return 0 |
1964 return start; | 2051 return start; |
1965 } | 2052 } |
1966 | 2053 |
1967 // | 2054 // |
2055 // Helper methods for generate_disjoint_int_copy_core() | |
2056 // | |
2057 void copy_16_bytes_loop(Register from, Register to, Register count, int count_dec, | |
2058 Label& L_loop, bool use_prefetch, bool use_bis) { | |
2059 | |
2060 __ align(OptoLoopAlignment); | |
2061 __ BIND(L_loop); | |
2062 if (use_prefetch) { | |
2063 if (ArraycopySrcPrefetchDistance > 0) { | |
2064 __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads); | |
2065 } | |
2066 if (ArraycopyDstPrefetchDistance > 0) { | |
2067 __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads); | |
2068 } | |
2069 } | |
2070 __ ldx(from, 4, O4); | |
2071 __ ldx(from, 12, G4); | |
2072 __ inc(to, 16); | |
2073 __ inc(from, 16); | |
2074 __ deccc(count, 4); // Can we do next iteration after this one? | |
2075 | |
2076 __ srlx(O4, 32, G3); | |
2077 __ bset(G3, O3); | |
2078 __ sllx(O4, 32, O4); | |
2079 __ srlx(G4, 32, G3); | |
2080 __ bset(G3, O4); | |
2081 if (use_bis) { | |
2082 __ stxa(O3, to, -16); | |
2083 __ stxa(O4, to, -8); | |
2084 } else { | |
2085 __ stx(O3, to, -16); | |
2086 __ stx(O4, to, -8); | |
2087 } | |
2088 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); | |
2089 __ delayed()->sllx(G4, 32, O3); | |
2090 | |
2091 } | |
2092 | |
2093 // | |
1968 // Generate core code for disjoint int copy (and oop copy on 32-bit). | 2094 // Generate core code for disjoint int copy (and oop copy on 32-bit). |
1969 // If "aligned" is true, the "from" and "to" addresses are assumed | 2095 // If "aligned" is true, the "from" and "to" addresses are assumed |
1970 // to be heapword aligned. | 2096 // to be heapword aligned. |
1971 // | 2097 // |
1972 // Arguments: | 2098 // Arguments: |
1975 // count: O2 treated as signed | 2101 // count: O2 treated as signed |
1976 // | 2102 // |
1977 void generate_disjoint_int_copy_core(bool aligned) { | 2103 void generate_disjoint_int_copy_core(bool aligned) { |
1978 | 2104 |
1979 Label L_skip_alignment, L_aligned_copy; | 2105 Label L_skip_alignment, L_aligned_copy; |
1980 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; | 2106 Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; |
1981 | 2107 |
1982 const Register from = O0; // source array address | 2108 const Register from = O0; // source array address |
1983 const Register to = O1; // destination array address | 2109 const Register to = O1; // destination array address |
1984 const Register count = O2; // elements count | 2110 const Register count = O2; // elements count |
1985 const Register offset = O5; // offset from start of arrays | 2111 const Register offset = O5; // offset from start of arrays |
2026 // copy_16_bytes_forward_with_shift() is not used here since this | 2152 // copy_16_bytes_forward_with_shift() is not used here since this |
2027 // code is more optimal. | 2153 // code is more optimal. |
2028 | 2154 |
2029 // copy with shift 4 elements (16 bytes) at a time | 2155 // copy with shift 4 elements (16 bytes) at a time |
2030 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4 | 2156 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4 |
2031 | 2157 __ sllx(O3, 32, O3); |
2032 __ align(OptoLoopAlignment); | 2158 |
2033 __ BIND(L_copy_16_bytes); | 2159 disjoint_copy_core(from, to, count, 2, 16, copy_16_bytes_loop); |
2034 __ ldx(from, 4, O4); | |
2035 __ deccc(count, 4); // Can we do next iteration after this one? | |
2036 __ ldx(from, 12, G4); | |
2037 __ inc(to, 16); | |
2038 __ inc(from, 16); | |
2039 __ sllx(O3, 32, O3); | |
2040 __ srlx(O4, 32, G3); | |
2041 __ bset(G3, O3); | |
2042 __ stx(O3, to, -16); | |
2043 __ sllx(O4, 32, O4); | |
2044 __ srlx(G4, 32, G3); | |
2045 __ bset(G3, O4); | |
2046 __ stx(O4, to, -8); | |
2047 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); | |
2048 __ delayed()->mov(G4, O3); | |
2049 | 2160 |
2050 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); | 2161 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); |
2051 __ delayed()->inc(count, 4); // restore 'count' | 2162 __ delayed()->inc(count, 4); // restore 'count' |
2052 | 2163 |
2053 __ BIND(L_aligned_copy); | 2164 __ BIND(L_aligned_copy); |
2054 } | 2165 } // !aligned |
2166 | |
2055 // copy 4 elements (16 bytes) at a time | 2167 // copy 4 elements (16 bytes) at a time |
2056 __ and3(count, 1, G4); // Save | 2168 __ and3(count, 1, G4); // Save |
2057 __ srl(count, 1, count); | 2169 __ srl(count, 1, count); |
2058 generate_disjoint_long_copy_core(aligned); | 2170 generate_disjoint_long_copy_core(aligned); |
2059 __ mov(G4, count); // Restore | 2171 __ mov(G4, count); // Restore |
2060 | 2172 |
2061 // copy 1 element at a time | 2173 // copy 1 element at a time |
2062 __ BIND(L_copy_4_bytes); | 2174 __ BIND(L_copy_4_bytes); |
2063 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); | 2175 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
2064 __ delayed()->nop(); | |
2065 __ BIND(L_copy_4_bytes_loop); | 2176 __ BIND(L_copy_4_bytes_loop); |
2066 __ ld(from, offset, O3); | 2177 __ ld(from, offset, O3); |
2067 __ deccc(count); | 2178 __ deccc(count); |
2068 __ st(O3, to, offset); | 2179 __ st(O3, to, offset); |
2069 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop); | 2180 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop); |
2191 __ delayed()->stx(O4, end_to, 0); | 2302 __ delayed()->stx(O4, end_to, 0); |
2192 __ inc(count, 4); | 2303 __ inc(count, 4); |
2193 | 2304 |
2194 // copy 1 element (4 bytes) at a time | 2305 // copy 1 element (4 bytes) at a time |
2195 __ BIND(L_copy_4_bytes); | 2306 __ BIND(L_copy_4_bytes); |
2196 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); | 2307 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); |
2197 __ delayed()->nop(); | |
2198 __ BIND(L_copy_4_bytes_loop); | 2308 __ BIND(L_copy_4_bytes_loop); |
2199 __ dec(end_from, 4); | 2309 __ dec(end_from, 4); |
2200 __ dec(end_to, 4); | 2310 __ dec(end_to, 4); |
2201 __ ld(end_from, 0, O4); | 2311 __ ld(end_from, 0, O4); |
2202 __ deccc(count); | 2312 __ deccc(count); |
2235 // O3, O4 are used as temp registers | 2345 // O3, O4 are used as temp registers |
2236 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); | 2346 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); |
2237 __ retl(); | 2347 __ retl(); |
2238 __ delayed()->mov(G0, O0); // return 0 | 2348 __ delayed()->mov(G0, O0); // return 0 |
2239 return start; | 2349 return start; |
2350 } | |
2351 | |
2352 // | |
2353 // Helper methods for generate_disjoint_long_copy_core() | |
2354 // | |
2355 void copy_64_bytes_loop(Register from, Register to, Register count, int count_dec, | |
2356 Label& L_loop, bool use_prefetch, bool use_bis) { | |
2357 __ align(OptoLoopAlignment); | |
2358 __ BIND(L_loop); | |
2359 for (int off = 0; off < 64; off += 16) { | |
2360 if (use_prefetch && (off & 31) == 0) { | |
2361 if (ArraycopySrcPrefetchDistance > 0) { | |
2362 __ prefetch(from, ArraycopySrcPrefetchDistance+off, Assembler::severalReads); | |
2363 } | |
2364 if (ArraycopyDstPrefetchDistance > 0) { | |
2365 __ prefetch(to, ArraycopyDstPrefetchDistance+off, Assembler::severalWritesAndPossiblyReads); | |
2366 } | |
2367 } | |
2368 __ ldx(from, off+0, O4); | |
2369 __ ldx(from, off+8, O5); | |
2370 if (use_bis) { | |
2371 __ stxa(O4, to, off+0); | |
2372 __ stxa(O5, to, off+8); | |
2373 } else { | |
2374 __ stx(O4, to, off+0); | |
2375 __ stx(O5, to, off+8); | |
2376 } | |
2377 } | |
2378 __ deccc(count, 8); | |
2379 __ inc(from, 64); | |
2380 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop); | |
2381 __ delayed()->inc(to, 64); | |
2240 } | 2382 } |
2241 | 2383 |
2242 // | 2384 // |
2243 // Generate core code for disjoint long copy (and oop copy on 64-bit). | 2385 // Generate core code for disjoint long copy (and oop copy on 64-bit). |
2244 // "aligned" is ignored, because we must make the stronger | 2386 // "aligned" is ignored, because we must make the stronger |
2276 const Register to = O1; // destination array address | 2418 const Register to = O1; // destination array address |
2277 const Register count = O2; // elements count | 2419 const Register count = O2; // elements count |
2278 const Register offset0 = O4; // element offset | 2420 const Register offset0 = O4; // element offset |
2279 const Register offset8 = O5; // next element offset | 2421 const Register offset8 = O5; // next element offset |
2280 | 2422 |
2281 __ deccc(count, 2); | 2423 __ deccc(count, 2); |
2282 __ mov(G0, offset0); // offset from start of arrays (0) | 2424 __ mov(G0, offset0); // offset from start of arrays (0) |
2283 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); | 2425 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); |
2284 __ delayed()->add(offset0, 8, offset8); | 2426 __ delayed()->add(offset0, 8, offset8); |
2285 | 2427 |
2286 // Copy by 64 bytes chunks | 2428 // Copy by 64 bytes chunks |
2287 Label L_copy_64_bytes; | 2429 |
2288 const Register from64 = O3; // source address | 2430 const Register from64 = O3; // source address |
2289 const Register to64 = G3; // destination address | 2431 const Register to64 = G3; // destination address |
2290 __ subcc(count, 6, O3); | 2432 __ subcc(count, 6, O3); |
2291 __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes ); | 2433 __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes ); |
2292 __ delayed()->mov(to, to64); | 2434 __ delayed()->mov(to, to64); |
2293 // Now we can use O4(offset0), O5(offset8) as temps | 2435 // Now we can use O4(offset0), O5(offset8) as temps |
2294 __ mov(O3, count); | 2436 __ mov(O3, count); |
2295 __ mov(from, from64); | 2437 // count >= 0 (original count - 8) |
2296 | 2438 __ mov(from, from64); |
2297 __ align(OptoLoopAlignment); | 2439 |
2298 __ BIND(L_copy_64_bytes); | 2440 disjoint_copy_core(from64, to64, count, 3, 64, copy_64_bytes_loop); |
2299 for( int off = 0; off < 64; off += 16 ) { | |
2300 __ ldx(from64, off+0, O4); | |
2301 __ ldx(from64, off+8, O5); | |
2302 __ stx(O4, to64, off+0); | |
2303 __ stx(O5, to64, off+8); | |
2304 } | |
2305 __ deccc(count, 8); | |
2306 __ inc(from64, 64); | |
2307 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_64_bytes); | |
2308 __ delayed()->inc(to64, 64); | |
2309 | 2441 |
2310 // Restore O4(offset0), O5(offset8) | 2442 // Restore O4(offset0), O5(offset8) |
2311 __ sub(from64, from, offset0); | 2443 __ sub(from64, from, offset0); |
2312 __ inccc(count, 6); | 2444 __ inccc(count, 6); // restore count |
2313 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); | 2445 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); |
2314 __ delayed()->add(offset0, 8, offset8); | 2446 __ delayed()->add(offset0, 8, offset8); |
2315 | 2447 |
2316 // Copy by 16 bytes chunks | 2448 // Copy by 16 bytes chunks |
2317 __ align(OptoLoopAlignment); | 2449 __ align(OptoLoopAlignment); |
2574 __ save_frame(0); | 2706 __ save_frame(0); |
2575 __ check_klass_subtype_slow_path(sub_klass->after_save(), | 2707 __ check_klass_subtype_slow_path(sub_klass->after_save(), |
2576 super_klass->after_save(), | 2708 super_klass->after_save(), |
2577 L0, L1, L2, L4, | 2709 L0, L1, L2, L4, |
2578 NULL, &L_pop_to_miss); | 2710 NULL, &L_pop_to_miss); |
2579 __ ba(false, L_success); | 2711 __ ba(L_success); |
2580 __ delayed()->restore(); | 2712 __ delayed()->restore(); |
2581 | 2713 |
2582 __ bind(L_pop_to_miss); | 2714 __ bind(L_pop_to_miss); |
2583 __ restore(); | 2715 __ restore(); |
2584 | 2716 |
2671 __ delayed()->set(0, O0); // return -1 on success | 2803 __ delayed()->set(0, O0); // return -1 on success |
2672 | 2804 |
2673 // ======== loop entry is here ======== | 2805 // ======== loop entry is here ======== |
2674 __ BIND(load_element); | 2806 __ BIND(load_element); |
2675 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop | 2807 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop |
2676 __ br_null(G3_oop, true, Assembler::pt, store_element); | 2808 __ br_null_short(G3_oop, Assembler::pt, store_element); |
2677 __ delayed()->nop(); | |
2678 | 2809 |
2679 __ load_klass(G3_oop, G4_klass); // query the object klass | 2810 __ load_klass(G3_oop, G4_klass); // query the object klass |
2680 | 2811 |
2681 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, | 2812 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, |
2682 // branch to this on success: | 2813 // branch to this on success: |
2894 | 3025 |
2895 #ifdef ASSERT | 3026 #ifdef ASSERT |
2896 // assert(src->klass() != NULL); | 3027 // assert(src->klass() != NULL); |
2897 BLOCK_COMMENT("assert klasses not null"); | 3028 BLOCK_COMMENT("assert klasses not null"); |
2898 { Label L_a, L_b; | 3029 { Label L_a, L_b; |
2899 __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL | 3030 __ br_notnull_short(G3_src_klass, Assembler::pt, L_b); // it is broken if klass is NULL |
2900 __ delayed()->nop(); | |
2901 __ bind(L_a); | 3031 __ bind(L_a); |
2902 __ stop("broken null klass"); | 3032 __ stop("broken null klass"); |
2903 __ bind(L_b); | 3033 __ bind(L_b); |
2904 __ load_klass(dst, G4_dst_klass); | 3034 __ load_klass(dst, G4_dst_klass); |
2905 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also | 3035 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also |
2935 } else { | 3065 } else { |
2936 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass); | 3066 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass); |
2937 } | 3067 } |
2938 | 3068 |
2939 // if (src->klass() != dst->klass()) return -1; | 3069 // if (src->klass() != dst->klass()) return -1; |
2940 __ cmp(G3_src_klass, G4_dst_klass); | 3070 __ cmp_and_brx_short(G3_src_klass, G4_dst_klass, Assembler::notEqual, Assembler::pn, L_failed); |
2941 __ brx(Assembler::notEqual, false, Assembler::pn, L_failed); | |
2942 __ delayed()->nop(); | |
2943 | 3071 |
2944 // if (!src->is_Array()) return -1; | 3072 // if (!src->is_Array()) return -1; |
2945 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0 | 3073 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0 |
2946 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed); | 3074 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed); |
2947 | 3075 |
3005 __ cmp(G3_elsize, LogBytesPerInt); | 3133 __ cmp(G3_elsize, LogBytesPerInt); |
3006 __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy); | 3134 __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy); |
3007 __ delayed()->signx(length, count); // length | 3135 __ delayed()->signx(length, count); // length |
3008 #ifdef ASSERT | 3136 #ifdef ASSERT |
3009 { Label L; | 3137 { Label L; |
3010 __ cmp(G3_elsize, LogBytesPerLong); | 3138 __ cmp_and_br_short(G3_elsize, LogBytesPerLong, Assembler::equal, Assembler::pt, L); |
3011 __ br(Assembler::equal, false, Assembler::pt, L); | |
3012 __ delayed()->nop(); | |
3013 __ stop("must be long copy, but elsize is wrong"); | 3139 __ stop("must be long copy, but elsize is wrong"); |
3014 __ bind(L); | 3140 __ bind(L); |
3015 } | 3141 } |
3016 #endif | 3142 #endif |
3017 __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy); | 3143 __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy); |
3089 __ BIND(L_failed); | 3215 __ BIND(L_failed); |
3090 __ retl(); | 3216 __ retl(); |
3091 __ delayed()->sub(G0, 1, O0); // return -1 | 3217 __ delayed()->sub(G0, 1, O0); // return -1 |
3092 return start; | 3218 return start; |
3093 } | 3219 } |
3220 | |
3221 // | |
3222 // Generate stub for heap zeroing. | |
3223 // "to" address is aligned to jlong (8 bytes). | |
3224 // | |
3225 // Arguments for generated stub: | |
3226 // to: O0 | |
3227 // count: O1 treated as signed (count of HeapWord) | |
3228 // count could be 0 | |
3229 // | |
3230 address generate_zero_aligned_words(const char* name) { | |
3231 __ align(CodeEntryAlignment); | |
3232 StubCodeMark mark(this, "StubRoutines", name); | |
3233 address start = __ pc(); | |
3234 | |
3235 const Register to = O0; // source array address | |
3236 const Register count = O1; // HeapWords count | |
3237 const Register temp = O2; // scratch | |
3238 | |
3239 Label Ldone; | |
3240 __ sllx(count, LogHeapWordSize, count); // to bytes count | |
3241 // Use BIS for zeroing | |
3242 __ bis_zeroing(to, count, temp, Ldone); | |
3243 __ bind(Ldone); | |
3244 __ retl(); | |
3245 __ delayed()->nop(); | |
3246 return start; | |
3247 } | |
3094 | 3248 |
3095 void generate_arraycopy_stubs() { | 3249 void generate_arraycopy_stubs() { |
3096 address entry; | 3250 address entry; |
3097 address entry_jbyte_arraycopy; | 3251 address entry_jbyte_arraycopy; |
3098 address entry_jshort_arraycopy; | 3252 address entry_jshort_arraycopy; |
3216 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); | 3370 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); |
3217 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); | 3371 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); |
3218 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); | 3372 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); |
3219 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); | 3373 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); |
3220 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); | 3374 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); |
3375 | |
3376 if (UseBlockZeroing) { | |
3377 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words"); | |
3378 } | |
3221 } | 3379 } |
3222 | 3380 |
3223 void generate_initial() { | 3381 void generate_initial() { |
3224 // Generates all stubs and initializes the entry points | 3382 // Generates all stubs and initializes the entry points |
3225 | 3383 |
3264 | 3422 |
3265 // Generate partial_subtype_check first here since its code depends on | 3423 // Generate partial_subtype_check first here since its code depends on |
3266 // UseZeroBaseCompressedOops which is defined after heap initialization. | 3424 // UseZeroBaseCompressedOops which is defined after heap initialization. |
3267 StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check(); | 3425 StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check(); |
3268 // These entry points require SharedInfo::stack0 to be set up in non-core builds | 3426 // These entry points require SharedInfo::stack0 to be set up in non-core builds |
3269 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); | 3427 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError)); |
3270 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); | 3428 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError)); |
3271 StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true); | 3429 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call)); |
3272 StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true); | 3430 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); |
3273 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); | |
3274 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); | |
3275 | 3431 |
3276 StubRoutines::_handler_for_unsafe_access_entry = | 3432 StubRoutines::_handler_for_unsafe_access_entry = |
3277 generate_handler_for_unsafe_access(); | 3433 generate_handler_for_unsafe_access(); |
3278 | 3434 |
3279 // support for verify_oop (must happen after universe_init) | 3435 // support for verify_oop (must happen after universe_init) |