comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 4137:04b9a2566eec

Merge with hsx23/hotspot.
author Thomas Wuerthinger <thomas.wuerthinger@oracle.com>
date Sat, 17 Dec 2011 21:40:27 +0100
parents a92cdbac8b9e
children 33df1aeaebbf
comparison
equal deleted inserted replaced
3737:9dc19b7d89a3 4137:04b9a2566eec
148 #ifdef ASSERT 148 #ifdef ASSERT
149 // make sure we have no pending exceptions 149 // make sure we have no pending exceptions
150 { const Register t = G3_scratch; 150 { const Register t = G3_scratch;
151 Label L; 151 Label L;
152 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t); 152 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
153 __ br_null(t, false, Assembler::pt, L); 153 __ br_null_short(t, Assembler::pt, L);
154 __ delayed()->nop();
155 __ stop("StubRoutines::call_stub: entered with pending exception"); 154 __ stop("StubRoutines::call_stub: entered with pending exception");
156 __ bind(L); 155 __ bind(L);
157 } 156 }
158 #endif 157 #endif
159 158
205 204
206 // test if any parameters & setup of Lentry_args 205 // test if any parameters & setup of Lentry_args
207 Label exit; 206 Label exit;
208 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter 207 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter
209 __ add( FP, STACK_BIAS, dst ); 208 __ add( FP, STACK_BIAS, dst );
210 __ tst(cnt); 209 __ cmp_zero_and_br(Assembler::zero, cnt, exit);
211 __ br(Assembler::zero, false, Assembler::pn, exit);
212 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args 210 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args
213 211
214 // copy parameters if any 212 // copy parameters if any
215 Label loop; 213 Label loop;
216 __ BIND(loop); 214 __ BIND(loop);
280 __ BIND(exit); 278 __ BIND(exit);
281 __ ret(); 279 __ ret();
282 __ delayed()->restore(); 280 __ delayed()->restore();
283 281
284 __ BIND(is_object); 282 __ BIND(is_object);
285 __ ba(false, exit); 283 __ ba(exit);
286 __ delayed()->st_ptr(O0, addr, G0); 284 __ delayed()->st_ptr(O0, addr, G0);
287 285
288 __ BIND(is_float); 286 __ BIND(is_float);
289 __ ba(false, exit); 287 __ ba(exit);
290 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0); 288 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
291 289
292 __ BIND(is_double); 290 __ BIND(is_double);
293 __ ba(false, exit); 291 __ ba(exit);
294 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0); 292 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
295 293
296 __ BIND(is_long); 294 __ BIND(is_long);
297 #ifdef _LP64 295 #ifdef _LP64
298 __ ba(false, exit); 296 __ ba(exit);
299 __ delayed()->st_long(O0, addr, G0); // store entire long 297 __ delayed()->st_long(O0, addr, G0); // store entire long
300 #else 298 #else
301 #if defined(COMPILER2) 299 #if defined(COMPILER2)
302 // All return values are where we want them, except for Longs. C2 returns 300 // All return values are where we want them, except for Longs. C2 returns
303 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1. 301 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
305 // build we simply always use G1. 303 // build we simply always use G1.
306 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to 304 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
307 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node 305 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
308 // first which would move g1 -> O0/O1 and destroy the exception we were throwing. 306 // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
309 307
310 __ ba(false, exit); 308 __ ba(exit);
311 __ delayed()->stx(G1, addr, G0); // store entire long 309 __ delayed()->stx(G1, addr, G0); // store entire long
312 #else 310 #else
313 __ st(O1, addr, BytesPerInt); 311 __ st(O1, addr, BytesPerInt);
314 __ ba(false, exit); 312 __ ba(exit);
315 __ delayed()->st(O0, addr, G0); 313 __ delayed()->st(O0, addr, G0);
316 #endif /* COMPILER2 */ 314 #endif /* COMPILER2 */
317 #endif /* _LP64 */ 315 #endif /* _LP64 */
318 } 316 }
319 return start; 317 return start;
380 378
381 #ifdef ASSERT 379 #ifdef ASSERT
382 // make sure that this code is only executed if there is a pending exception 380 // make sure that this code is only executed if there is a pending exception
383 { Label L; 381 { Label L;
384 __ ld_ptr(exception_addr, Gtemp); 382 __ ld_ptr(exception_addr, Gtemp);
385 __ br_notnull(Gtemp, false, Assembler::pt, L); 383 __ br_notnull_short(Gtemp, Assembler::pt, L);
386 __ delayed()->nop();
387 __ stop("StubRoutines::forward exception: no pending exception (1)"); 384 __ stop("StubRoutines::forward exception: no pending exception (1)");
388 __ bind(L); 385 __ bind(L);
389 } 386 }
390 #endif 387 #endif
391 388
404 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC 401 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
405 402
406 #ifdef ASSERT 403 #ifdef ASSERT
407 // make sure exception is set 404 // make sure exception is set
408 { Label L; 405 { Label L;
409 __ br_notnull(Oexception, false, Assembler::pt, L); 406 __ br_notnull_short(Oexception, Assembler::pt, L);
410 __ delayed()->nop();
411 __ stop("StubRoutines::forward exception: no pending exception (2)"); 407 __ stop("StubRoutines::forward exception: no pending exception (2)");
412 __ bind(L); 408 __ bind(L);
413 } 409 }
414 #endif 410 #endif
415 // jump to exception handler 411 // jump to exception handler
464 // is pushed. 460 // is pushed.
465 __ save_frame(0); 461 __ save_frame(0);
466 462
467 int frame_complete = __ offset(); 463 int frame_complete = __ offset();
468 464
469 if (restore_saved_exception_pc) {
470 __ ld_ptr(G2_thread, JavaThread::saved_exception_pc_offset(), I7);
471 __ sub(I7, frame::pc_return_offset, I7);
472 }
473
474 // Note that we always have a runtime stub frame on the top of stack by this point 465 // Note that we always have a runtime stub frame on the top of stack by this point
475 Register last_java_sp = SP; 466 Register last_java_sp = SP;
476 // 64-bit last_java_sp is biased! 467 // 64-bit last_java_sp is biased!
477 __ set_last_Java_frame(last_java_sp, G0); 468 __ set_last_Java_frame(last_java_sp, G0);
478 if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early 469 if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early
499 Label L; 490 Label L;
500 491
501 Address exception_addr(G2_thread, Thread::pending_exception_offset()); 492 Address exception_addr(G2_thread, Thread::pending_exception_offset());
502 Register scratch_reg = Gtemp; 493 Register scratch_reg = Gtemp;
503 __ ld_ptr(exception_addr, scratch_reg); 494 __ ld_ptr(exception_addr, scratch_reg);
504 __ br_notnull(scratch_reg, false, Assembler::pt, L); 495 __ br_notnull_short(scratch_reg, Assembler::pt, L);
505 __ delayed()->nop();
506 __ should_not_reach_here(); 496 __ should_not_reach_here();
507 __ bind(L); 497 __ bind(L);
508 #endif // ASSERT 498 #endif // ASSERT
509 BLOCK_COMMENT("call forward_exception_entry"); 499 BLOCK_COMMENT("call forward_exception_entry");
510 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); 500 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
612 __ set(StubRoutines::Sparc::locked, lock_reg); 602 __ set(StubRoutines::Sparc::locked, lock_reg);
613 // Initialize yield counter 603 // Initialize yield counter
614 __ mov(G0,yield_reg); 604 __ mov(G0,yield_reg);
615 605
616 __ BIND(retry); 606 __ BIND(retry);
617 __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount); 607 __ cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dontyield);
618 __ br(Assembler::less, false, Assembler::pt, dontyield);
619 __ delayed()->nop();
620 608
621 // This code can only be called from inside the VM, this 609 // This code can only be called from inside the VM, this
622 // stub is only invoked from Atomic::add(). We do not 610 // stub is only invoked from Atomic::add(). We do not
623 // want to use call_VM, because _last_java_sp and such 611 // want to use call_VM, because _last_java_sp and such
624 // must already be set. 612 // must already be set.
674 __ mov(O0, O3); // scratch copy of exchange value 662 __ mov(O0, O3); // scratch copy of exchange value
675 __ ld(O1, 0, O2); // observe the previous value 663 __ ld(O1, 0, O2); // observe the previous value
676 // try to replace O2 with O3 664 // try to replace O2 with O3
677 __ cas_under_lock(O1, O2, O3, 665 __ cas_under_lock(O1, O2, O3,
678 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); 666 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
679 __ cmp(O2, O3); 667 __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry);
680 __ br(Assembler::notEqual, false, Assembler::pn, retry);
681 __ delayed()->nop();
682 668
683 __ retl(false); 669 __ retl(false);
684 __ delayed()->mov(O2, O0); // report previous value to caller 670 __ delayed()->mov(O2, O0); // report previous value to caller
685 671
686 } else { 672 } else {
796 if (VM_Version::v9_instructions_work()) { 782 if (VM_Version::v9_instructions_work()) {
797 Label(retry); 783 Label(retry);
798 __ BIND(retry); 784 __ BIND(retry);
799 785
800 __ lduw(O1, 0, O2); 786 __ lduw(O1, 0, O2);
801 __ add(O0, O2, O3); 787 __ add(O0, O2, O3);
802 __ cas(O1, O2, O3); 788 __ cas(O1, O2, O3);
803 __ cmp( O2, O3); 789 __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry);
804 __ br(Assembler::notEqual, false, Assembler::pn, retry);
805 __ delayed()->nop();
806 __ retl(false); 790 __ retl(false);
807 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3 791 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
808 } else { 792 } else {
809 const Register& lock_reg = O2; 793 const Register& lock_reg = O2;
810 const Register& lock_ptr_reg = O3; 794 const Register& lock_ptr_reg = O3;
1133 default: 1117 default:
1134 ShouldNotReachHere(); 1118 ShouldNotReachHere();
1135 } 1119 }
1136 } 1120 }
1137 1121
1122 //
1123 // Generate main code for disjoint arraycopy
1124 //
1125 typedef void (StubGenerator::*CopyLoopFunc)(Register from, Register to, Register count, int count_dec,
1126 Label& L_loop, bool use_prefetch, bool use_bis);
1127
1128 void disjoint_copy_core(Register from, Register to, Register count, int log2_elem_size,
1129 int iter_size, CopyLoopFunc copy_loop_func) {
1130 Label L_copy;
1131
1132 assert(log2_elem_size <= 3, "the following code should be changed");
1133 int count_dec = 16>>log2_elem_size;
1134
1135 int prefetch_dist = MAX2(ArraycopySrcPrefetchDistance, ArraycopyDstPrefetchDistance);
1136 assert(prefetch_dist < 4096, "invalid value");
1137 prefetch_dist = (prefetch_dist + (iter_size-1)) & (-iter_size); // round up to one iteration copy size
1138 int prefetch_count = (prefetch_dist >> log2_elem_size); // elements count
1139
1140 if (UseBlockCopy) {
1141 Label L_block_copy, L_block_copy_prefetch, L_skip_block_copy;
1142
1143 // 64 bytes tail + bytes copied in one loop iteration
1144 int tail_size = 64 + iter_size;
1145 int block_copy_count = (MAX2(tail_size, (int)BlockCopyLowLimit)) >> log2_elem_size;
1146 // Use BIS copy only for big arrays since it requires membar.
1147 __ set(block_copy_count, O4);
1148 __ cmp_and_br_short(count, O4, Assembler::lessUnsigned, Assembler::pt, L_skip_block_copy);
1149 // This code is for disjoint source and destination:
1150 // to <= from || to >= from+count
1151 // but BIS will stomp over 'from' if (to > from-tail_size && to <= from)
1152 __ sub(from, to, O4);
1153 __ srax(O4, 4, O4); // divide by 16 since following short branch have only 5 bits for imm.
1154 __ cmp_and_br_short(O4, (tail_size>>4), Assembler::lessEqualUnsigned, Assembler::pn, L_skip_block_copy);
1155
1156 __ wrasi(G0, Assembler::ASI_ST_BLKINIT_PRIMARY);
1157 // BIS should not be used to copy tail (64 bytes+iter_size)
1158 // to avoid zeroing of following values.
1159 __ sub(count, (tail_size>>log2_elem_size), count); // count is still positive >= 0
1160
1161 if (prefetch_count > 0) { // rounded up to one iteration count
1162 // Do prefetching only if copy size is bigger
1163 // than prefetch distance.
1164 __ set(prefetch_count, O4);
1165 __ cmp_and_brx_short(count, O4, Assembler::less, Assembler::pt, L_block_copy);
1166 __ sub(count, prefetch_count, count);
1167
1168 (this->*copy_loop_func)(from, to, count, count_dec, L_block_copy_prefetch, true, true);
1169 __ add(count, prefetch_count, count); // restore count
1170
1171 } // prefetch_count > 0
1172
1173 (this->*copy_loop_func)(from, to, count, count_dec, L_block_copy, false, true);
1174 __ add(count, (tail_size>>log2_elem_size), count); // restore count
1175
1176 __ wrasi(G0, Assembler::ASI_PRIMARY_NOFAULT);
1177 // BIS needs membar.
1178 __ membar(Assembler::StoreLoad);
1179 // Copy tail
1180 __ ba_short(L_copy);
1181
1182 __ BIND(L_skip_block_copy);
1183 } // UseBlockCopy
1184
1185 if (prefetch_count > 0) { // rounded up to one iteration count
1186 // Do prefetching only if copy size is bigger
1187 // than prefetch distance.
1188 __ set(prefetch_count, O4);
1189 __ cmp_and_brx_short(count, O4, Assembler::lessUnsigned, Assembler::pt, L_copy);
1190 __ sub(count, prefetch_count, count);
1191
1192 Label L_copy_prefetch;
1193 (this->*copy_loop_func)(from, to, count, count_dec, L_copy_prefetch, true, false);
1194 __ add(count, prefetch_count, count); // restore count
1195
1196 } // prefetch_count > 0
1197
1198 (this->*copy_loop_func)(from, to, count, count_dec, L_copy, false, false);
1199 }
1200
1201
1202
1203 //
1204 // Helper methods for copy_16_bytes_forward_with_shift()
1205 //
1206 void copy_16_bytes_shift_loop(Register from, Register to, Register count, int count_dec,
1207 Label& L_loop, bool use_prefetch, bool use_bis) {
1208
1209 const Register left_shift = G1; // left shift bit counter
1210 const Register right_shift = G5; // right shift bit counter
1211
1212 __ align(OptoLoopAlignment);
1213 __ BIND(L_loop);
1214 if (use_prefetch) {
1215 if (ArraycopySrcPrefetchDistance > 0) {
1216 __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads);
1217 }
1218 if (ArraycopyDstPrefetchDistance > 0) {
1219 __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads);
1220 }
1221 }
1222 __ ldx(from, 0, O4);
1223 __ ldx(from, 8, G4);
1224 __ inc(to, 16);
1225 __ inc(from, 16);
1226 __ deccc(count, count_dec); // Can we do next iteration after this one?
1227 __ srlx(O4, right_shift, G3);
1228 __ bset(G3, O3);
1229 __ sllx(O4, left_shift, O4);
1230 __ srlx(G4, right_shift, G3);
1231 __ bset(G3, O4);
1232 if (use_bis) {
1233 __ stxa(O3, to, -16);
1234 __ stxa(O4, to, -8);
1235 } else {
1236 __ stx(O3, to, -16);
1237 __ stx(O4, to, -8);
1238 }
1239 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
1240 __ delayed()->sllx(G4, left_shift, O3);
1241 }
1138 1242
1139 // Copy big chunks forward with shift 1243 // Copy big chunks forward with shift
1140 // 1244 //
1141 // Inputs: 1245 // Inputs:
1142 // from - source arrays 1246 // from - source arrays
1144 // count - elements count to copy >= the count equivalent to 16 bytes 1248 // count - elements count to copy >= the count equivalent to 16 bytes
1145 // count_dec - elements count's decrement equivalent to 16 bytes 1249 // count_dec - elements count's decrement equivalent to 16 bytes
1146 // L_copy_bytes - copy exit label 1250 // L_copy_bytes - copy exit label
1147 // 1251 //
1148 void copy_16_bytes_forward_with_shift(Register from, Register to, 1252 void copy_16_bytes_forward_with_shift(Register from, Register to,
1149 Register count, int count_dec, Label& L_copy_bytes) { 1253 Register count, int log2_elem_size, Label& L_copy_bytes) {
1150 Label L_loop, L_aligned_copy, L_copy_last_bytes; 1254 Label L_aligned_copy, L_copy_last_bytes;
1255 assert(log2_elem_size <= 3, "the following code should be changed");
1256 int count_dec = 16>>log2_elem_size;
1151 1257
1152 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy 1258 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy
1153 __ andcc(from, 7, G1); // misaligned bytes 1259 __ andcc(from, 7, G1); // misaligned bytes
1154 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy); 1260 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
1155 __ delayed()->nop(); 1261 __ delayed()->nop();
1156 1262
1157 const Register left_shift = G1; // left shift bit counter 1263 const Register left_shift = G1; // left shift bit counter
1158 const Register right_shift = G5; // right shift bit counter 1264 const Register right_shift = G5; // right shift bit counter
1159 1265
1160 __ sll(G1, LogBitsPerByte, left_shift); 1266 __ sll(G1, LogBitsPerByte, left_shift);
1161 __ mov(64, right_shift); 1267 __ mov(64, right_shift);
1162 __ sub(right_shift, left_shift, right_shift); 1268 __ sub(right_shift, left_shift, right_shift);
1163 1269
1164 // 1270 //
1165 // Load 2 aligned 8-bytes chunks and use one from previous iteration 1271 // Load 2 aligned 8-bytes chunks and use one from previous iteration
1166 // to form 2 aligned 8-bytes chunks to store. 1272 // to form 2 aligned 8-bytes chunks to store.
1167 // 1273 //
1168 __ deccc(count, count_dec); // Pre-decrement 'count' 1274 __ dec(count, count_dec); // Pre-decrement 'count'
1169 __ andn(from, 7, from); // Align address 1275 __ andn(from, 7, from); // Align address
1170 __ ldx(from, 0, O3); 1276 __ ldx(from, 0, O3);
1171 __ inc(from, 8); 1277 __ inc(from, 8);
1172 __ align(OptoLoopAlignment); 1278 __ sllx(O3, left_shift, O3);
1173 __ BIND(L_loop); 1279
1174 __ ldx(from, 0, O4); 1280 disjoint_copy_core(from, to, count, log2_elem_size, 16, copy_16_bytes_shift_loop);
1175 __ deccc(count, count_dec); // Can we do next iteration after this one? 1281
1176 __ ldx(from, 8, G4); 1282 __ inccc(count, count_dec>>1 ); // + 8 bytes
1177 __ inc(to, 16); 1283 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
1178 __ inc(from, 16); 1284 __ delayed()->inc(count, count_dec>>1); // restore 'count'
1179 __ sllx(O3, left_shift, O3); 1285
1180 __ srlx(O4, right_shift, G3); 1286 // copy 8 bytes, part of them already loaded in O3
1181 __ bset(G3, O3); 1287 __ ldx(from, 0, O4);
1182 __ stx(O3, to, -16); 1288 __ inc(to, 8);
1183 __ sllx(O4, left_shift, O4); 1289 __ inc(from, 8);
1184 __ srlx(G4, right_shift, G3); 1290 __ srlx(O4, right_shift, G3);
1185 __ bset(G3, O4); 1291 __ bset(O3, G3);
1186 __ stx(O4, to, -8); 1292 __ stx(G3, to, -8);
1187 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
1188 __ delayed()->mov(G4, O3);
1189
1190 __ inccc(count, count_dec>>1 ); // + 8 bytes
1191 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
1192 __ delayed()->inc(count, count_dec>>1); // restore 'count'
1193
1194 // copy 8 bytes, part of them already loaded in O3
1195 __ ldx(from, 0, O4);
1196 __ inc(to, 8);
1197 __ inc(from, 8);
1198 __ sllx(O3, left_shift, O3);
1199 __ srlx(O4, right_shift, G3);
1200 __ bset(O3, G3);
1201 __ stx(G3, to, -8);
1202 1293
1203 __ BIND(L_copy_last_bytes); 1294 __ BIND(L_copy_last_bytes);
1204 __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes 1295 __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes
1205 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes); 1296 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
1206 __ delayed()->sub(from, right_shift, from); // restore address 1297 __ delayed()->sub(from, right_shift, from); // restore address
1207 1298
1208 __ BIND(L_aligned_copy); 1299 __ BIND(L_aligned_copy);
1209 } 1300 }
1210 1301
1211 // Copy big chunks backward with shift 1302 // Copy big chunks backward with shift
1357 // the same alignment mod 8, otherwise fall through to the next 1448 // the same alignment mod 8, otherwise fall through to the next
1358 // code for aligned copy. 1449 // code for aligned copy.
1359 // The compare above (count >= 23) guarantes 'count' >= 16 bytes. 1450 // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1360 // Also jump over aligned copy after the copy with shift completed. 1451 // Also jump over aligned copy after the copy with shift completed.
1361 1452
1362 copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte); 1453 copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
1363 } 1454 }
1364 1455
1365 // Both array are 8 bytes aligned, copy 16 bytes at a time 1456 // Both array are 8 bytes aligned, copy 16 bytes at a time
1366 __ and3(count, 7, G4); // Save count 1457 __ and3(count, 7, G4); // Save count
1367 __ srl(count, 3, count); 1458 __ srl(count, 3, count);
1368 generate_disjoint_long_copy_core(aligned); 1459 generate_disjoint_long_copy_core(aligned);
1369 __ mov(G4, count); // Restore count 1460 __ mov(G4, count); // Restore count
1370 1461
1371 // copy tailing bytes 1462 // copy tailing bytes
1372 __ BIND(L_copy_byte); 1463 __ BIND(L_copy_byte);
1373 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 1464 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1374 __ delayed()->nop();
1375 __ align(OptoLoopAlignment); 1465 __ align(OptoLoopAlignment);
1376 __ BIND(L_copy_byte_loop); 1466 __ BIND(L_copy_byte_loop);
1377 __ ldub(from, offset, O3); 1467 __ ldub(from, offset, O3);
1378 __ deccc(count); 1468 __ deccc(count);
1379 __ stb(O3, to, offset); 1469 __ stb(O3, to, offset);
1480 __ delayed()->stx(O4, end_to, 0); 1570 __ delayed()->stx(O4, end_to, 0);
1481 __ inc(count, 16); 1571 __ inc(count, 16);
1482 1572
1483 // copy 1 element (2 bytes) at a time 1573 // copy 1 element (2 bytes) at a time
1484 __ BIND(L_copy_byte); 1574 __ BIND(L_copy_byte);
1485 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 1575 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1486 __ delayed()->nop();
1487 __ align(OptoLoopAlignment); 1576 __ align(OptoLoopAlignment);
1488 __ BIND(L_copy_byte_loop); 1577 __ BIND(L_copy_byte_loop);
1489 __ dec(end_from); 1578 __ dec(end_from);
1490 __ dec(end_to); 1579 __ dec(end_to);
1491 __ ldub(end_from, 0, O4); 1580 __ ldub(end_from, 0, O4);
1587 // the same alignment mod 8, otherwise fall through to the next 1676 // the same alignment mod 8, otherwise fall through to the next
1588 // code for aligned copy. 1677 // code for aligned copy.
1589 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1678 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1590 // Also jump over aligned copy after the copy with shift completed. 1679 // Also jump over aligned copy after the copy with shift completed.
1591 1680
1592 copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes); 1681 copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
1593 } 1682 }
1594 1683
1595 // Both array are 8 bytes aligned, copy 16 bytes at a time 1684 // Both array are 8 bytes aligned, copy 16 bytes at a time
1596 __ and3(count, 3, G4); // Save 1685 __ and3(count, 3, G4); // Save
1597 __ srl(count, 2, count); 1686 __ srl(count, 2, count);
1598 generate_disjoint_long_copy_core(aligned); 1687 generate_disjoint_long_copy_core(aligned);
1599 __ mov(G4, count); // restore 1688 __ mov(G4, count); // restore
1600 1689
1601 // copy 1 element at a time 1690 // copy 1 element at a time
1602 __ BIND(L_copy_2_bytes); 1691 __ BIND(L_copy_2_bytes);
1603 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 1692 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1604 __ delayed()->nop();
1605 __ align(OptoLoopAlignment); 1693 __ align(OptoLoopAlignment);
1606 __ BIND(L_copy_2_bytes_loop); 1694 __ BIND(L_copy_2_bytes_loop);
1607 __ lduh(from, offset, O3); 1695 __ lduh(from, offset, O3);
1608 __ deccc(count); 1696 __ deccc(count);
1609 __ sth(O3, to, offset); 1697 __ sth(O3, to, offset);
1944 __ delayed()->stx(O4, end_to, 0); 2032 __ delayed()->stx(O4, end_to, 0);
1945 __ inc(count, 8); 2033 __ inc(count, 8);
1946 2034
1947 // copy 1 element (2 bytes) at a time 2035 // copy 1 element (2 bytes) at a time
1948 __ BIND(L_copy_2_bytes); 2036 __ BIND(L_copy_2_bytes);
1949 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 2037 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1950 __ delayed()->nop();
1951 __ BIND(L_copy_2_bytes_loop); 2038 __ BIND(L_copy_2_bytes_loop);
1952 __ dec(end_from, 2); 2039 __ dec(end_from, 2);
1953 __ dec(end_to, 2); 2040 __ dec(end_to, 2);
1954 __ lduh(end_from, 0, O4); 2041 __ lduh(end_from, 0, O4);
1955 __ deccc(count); 2042 __ deccc(count);
1963 __ delayed()->mov(G0, O0); // return 0 2050 __ delayed()->mov(G0, O0); // return 0
1964 return start; 2051 return start;
1965 } 2052 }
1966 2053
1967 // 2054 //
2055 // Helper methods for generate_disjoint_int_copy_core()
2056 //
2057 void copy_16_bytes_loop(Register from, Register to, Register count, int count_dec,
2058 Label& L_loop, bool use_prefetch, bool use_bis) {
2059
2060 __ align(OptoLoopAlignment);
2061 __ BIND(L_loop);
2062 if (use_prefetch) {
2063 if (ArraycopySrcPrefetchDistance > 0) {
2064 __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads);
2065 }
2066 if (ArraycopyDstPrefetchDistance > 0) {
2067 __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads);
2068 }
2069 }
2070 __ ldx(from, 4, O4);
2071 __ ldx(from, 12, G4);
2072 __ inc(to, 16);
2073 __ inc(from, 16);
2074 __ deccc(count, 4); // Can we do next iteration after this one?
2075
2076 __ srlx(O4, 32, G3);
2077 __ bset(G3, O3);
2078 __ sllx(O4, 32, O4);
2079 __ srlx(G4, 32, G3);
2080 __ bset(G3, O4);
2081 if (use_bis) {
2082 __ stxa(O3, to, -16);
2083 __ stxa(O4, to, -8);
2084 } else {
2085 __ stx(O3, to, -16);
2086 __ stx(O4, to, -8);
2087 }
2088 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
2089 __ delayed()->sllx(G4, 32, O3);
2090
2091 }
2092
2093 //
1968 // Generate core code for disjoint int copy (and oop copy on 32-bit). 2094 // Generate core code for disjoint int copy (and oop copy on 32-bit).
1969 // If "aligned" is true, the "from" and "to" addresses are assumed 2095 // If "aligned" is true, the "from" and "to" addresses are assumed
1970 // to be heapword aligned. 2096 // to be heapword aligned.
1971 // 2097 //
1972 // Arguments: 2098 // Arguments:
1975 // count: O2 treated as signed 2101 // count: O2 treated as signed
1976 // 2102 //
1977 void generate_disjoint_int_copy_core(bool aligned) { 2103 void generate_disjoint_int_copy_core(bool aligned) {
1978 2104
1979 Label L_skip_alignment, L_aligned_copy; 2105 Label L_skip_alignment, L_aligned_copy;
1980 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; 2106 Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
1981 2107
1982 const Register from = O0; // source array address 2108 const Register from = O0; // source array address
1983 const Register to = O1; // destination array address 2109 const Register to = O1; // destination array address
1984 const Register count = O2; // elements count 2110 const Register count = O2; // elements count
1985 const Register offset = O5; // offset from start of arrays 2111 const Register offset = O5; // offset from start of arrays
2026 // copy_16_bytes_forward_with_shift() is not used here since this 2152 // copy_16_bytes_forward_with_shift() is not used here since this
2027 // code is more optimal. 2153 // code is more optimal.
2028 2154
2029 // copy with shift 4 elements (16 bytes) at a time 2155 // copy with shift 4 elements (16 bytes) at a time
2030 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4 2156 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4
2031 2157 __ sllx(O3, 32, O3);
2032 __ align(OptoLoopAlignment); 2158
2033 __ BIND(L_copy_16_bytes); 2159 disjoint_copy_core(from, to, count, 2, 16, copy_16_bytes_loop);
2034 __ ldx(from, 4, O4);
2035 __ deccc(count, 4); // Can we do next iteration after this one?
2036 __ ldx(from, 12, G4);
2037 __ inc(to, 16);
2038 __ inc(from, 16);
2039 __ sllx(O3, 32, O3);
2040 __ srlx(O4, 32, G3);
2041 __ bset(G3, O3);
2042 __ stx(O3, to, -16);
2043 __ sllx(O4, 32, O4);
2044 __ srlx(G4, 32, G3);
2045 __ bset(G3, O4);
2046 __ stx(O4, to, -8);
2047 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2048 __ delayed()->mov(G4, O3);
2049 2160
2050 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); 2161 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2051 __ delayed()->inc(count, 4); // restore 'count' 2162 __ delayed()->inc(count, 4); // restore 'count'
2052 2163
2053 __ BIND(L_aligned_copy); 2164 __ BIND(L_aligned_copy);
2054 } 2165 } // !aligned
2166
2055 // copy 4 elements (16 bytes) at a time 2167 // copy 4 elements (16 bytes) at a time
2056 __ and3(count, 1, G4); // Save 2168 __ and3(count, 1, G4); // Save
2057 __ srl(count, 1, count); 2169 __ srl(count, 1, count);
2058 generate_disjoint_long_copy_core(aligned); 2170 generate_disjoint_long_copy_core(aligned);
2059 __ mov(G4, count); // Restore 2171 __ mov(G4, count); // Restore
2060 2172
2061 // copy 1 element at a time 2173 // copy 1 element at a time
2062 __ BIND(L_copy_4_bytes); 2174 __ BIND(L_copy_4_bytes);
2063 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 2175 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
2064 __ delayed()->nop();
2065 __ BIND(L_copy_4_bytes_loop); 2176 __ BIND(L_copy_4_bytes_loop);
2066 __ ld(from, offset, O3); 2177 __ ld(from, offset, O3);
2067 __ deccc(count); 2178 __ deccc(count);
2068 __ st(O3, to, offset); 2179 __ st(O3, to, offset);
2069 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop); 2180 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
2191 __ delayed()->stx(O4, end_to, 0); 2302 __ delayed()->stx(O4, end_to, 0);
2192 __ inc(count, 4); 2303 __ inc(count, 4);
2193 2304
2194 // copy 1 element (4 bytes) at a time 2305 // copy 1 element (4 bytes) at a time
2195 __ BIND(L_copy_4_bytes); 2306 __ BIND(L_copy_4_bytes);
2196 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); 2307 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
2197 __ delayed()->nop();
2198 __ BIND(L_copy_4_bytes_loop); 2308 __ BIND(L_copy_4_bytes_loop);
2199 __ dec(end_from, 4); 2309 __ dec(end_from, 4);
2200 __ dec(end_to, 4); 2310 __ dec(end_to, 4);
2201 __ ld(end_from, 0, O4); 2311 __ ld(end_from, 0, O4);
2202 __ deccc(count); 2312 __ deccc(count);
2235 // O3, O4 are used as temp registers 2345 // O3, O4 are used as temp registers
2236 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); 2346 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
2237 __ retl(); 2347 __ retl();
2238 __ delayed()->mov(G0, O0); // return 0 2348 __ delayed()->mov(G0, O0); // return 0
2239 return start; 2349 return start;
2350 }
2351
2352 //
2353 // Helper methods for generate_disjoint_long_copy_core()
2354 //
2355 void copy_64_bytes_loop(Register from, Register to, Register count, int count_dec,
2356 Label& L_loop, bool use_prefetch, bool use_bis) {
2357 __ align(OptoLoopAlignment);
2358 __ BIND(L_loop);
2359 for (int off = 0; off < 64; off += 16) {
2360 if (use_prefetch && (off & 31) == 0) {
2361 if (ArraycopySrcPrefetchDistance > 0) {
2362 __ prefetch(from, ArraycopySrcPrefetchDistance+off, Assembler::severalReads);
2363 }
2364 if (ArraycopyDstPrefetchDistance > 0) {
2365 __ prefetch(to, ArraycopyDstPrefetchDistance+off, Assembler::severalWritesAndPossiblyReads);
2366 }
2367 }
2368 __ ldx(from, off+0, O4);
2369 __ ldx(from, off+8, O5);
2370 if (use_bis) {
2371 __ stxa(O4, to, off+0);
2372 __ stxa(O5, to, off+8);
2373 } else {
2374 __ stx(O4, to, off+0);
2375 __ stx(O5, to, off+8);
2376 }
2377 }
2378 __ deccc(count, 8);
2379 __ inc(from, 64);
2380 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
2381 __ delayed()->inc(to, 64);
2240 } 2382 }
2241 2383
2242 // 2384 //
2243 // Generate core code for disjoint long copy (and oop copy on 64-bit). 2385 // Generate core code for disjoint long copy (and oop copy on 64-bit).
2244 // "aligned" is ignored, because we must make the stronger 2386 // "aligned" is ignored, because we must make the stronger
2276 const Register to = O1; // destination array address 2418 const Register to = O1; // destination array address
2277 const Register count = O2; // elements count 2419 const Register count = O2; // elements count
2278 const Register offset0 = O4; // element offset 2420 const Register offset0 = O4; // element offset
2279 const Register offset8 = O5; // next element offset 2421 const Register offset8 = O5; // next element offset
2280 2422
2281 __ deccc(count, 2); 2423 __ deccc(count, 2);
2282 __ mov(G0, offset0); // offset from start of arrays (0) 2424 __ mov(G0, offset0); // offset from start of arrays (0)
2283 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); 2425 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
2284 __ delayed()->add(offset0, 8, offset8); 2426 __ delayed()->add(offset0, 8, offset8);
2285 2427
2286 // Copy by 64 bytes chunks 2428 // Copy by 64 bytes chunks
2287 Label L_copy_64_bytes; 2429
2288 const Register from64 = O3; // source address 2430 const Register from64 = O3; // source address
2289 const Register to64 = G3; // destination address 2431 const Register to64 = G3; // destination address
2290 __ subcc(count, 6, O3); 2432 __ subcc(count, 6, O3);
2291 __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes ); 2433 __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes );
2292 __ delayed()->mov(to, to64); 2434 __ delayed()->mov(to, to64);
2293 // Now we can use O4(offset0), O5(offset8) as temps 2435 // Now we can use O4(offset0), O5(offset8) as temps
2294 __ mov(O3, count); 2436 __ mov(O3, count);
2295 __ mov(from, from64); 2437 // count >= 0 (original count - 8)
2296 2438 __ mov(from, from64);
2297 __ align(OptoLoopAlignment); 2439
2298 __ BIND(L_copy_64_bytes); 2440 disjoint_copy_core(from64, to64, count, 3, 64, copy_64_bytes_loop);
2299 for( int off = 0; off < 64; off += 16 ) {
2300 __ ldx(from64, off+0, O4);
2301 __ ldx(from64, off+8, O5);
2302 __ stx(O4, to64, off+0);
2303 __ stx(O5, to64, off+8);
2304 }
2305 __ deccc(count, 8);
2306 __ inc(from64, 64);
2307 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_64_bytes);
2308 __ delayed()->inc(to64, 64);
2309 2441
2310 // Restore O4(offset0), O5(offset8) 2442 // Restore O4(offset0), O5(offset8)
2311 __ sub(from64, from, offset0); 2443 __ sub(from64, from, offset0);
2312 __ inccc(count, 6); 2444 __ inccc(count, 6); // restore count
2313 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); 2445 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
2314 __ delayed()->add(offset0, 8, offset8); 2446 __ delayed()->add(offset0, 8, offset8);
2315 2447
2316 // Copy by 16 bytes chunks 2448 // Copy by 16 bytes chunks
2317 __ align(OptoLoopAlignment); 2449 __ align(OptoLoopAlignment);
2574 __ save_frame(0); 2706 __ save_frame(0);
2575 __ check_klass_subtype_slow_path(sub_klass->after_save(), 2707 __ check_klass_subtype_slow_path(sub_klass->after_save(),
2576 super_klass->after_save(), 2708 super_klass->after_save(),
2577 L0, L1, L2, L4, 2709 L0, L1, L2, L4,
2578 NULL, &L_pop_to_miss); 2710 NULL, &L_pop_to_miss);
2579 __ ba(false, L_success); 2711 __ ba(L_success);
2580 __ delayed()->restore(); 2712 __ delayed()->restore();
2581 2713
2582 __ bind(L_pop_to_miss); 2714 __ bind(L_pop_to_miss);
2583 __ restore(); 2715 __ restore();
2584 2716
2671 __ delayed()->set(0, O0); // return -1 on success 2803 __ delayed()->set(0, O0); // return -1 on success
2672 2804
2673 // ======== loop entry is here ======== 2805 // ======== loop entry is here ========
2674 __ BIND(load_element); 2806 __ BIND(load_element);
2675 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop 2807 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop
2676 __ br_null(G3_oop, true, Assembler::pt, store_element); 2808 __ br_null_short(G3_oop, Assembler::pt, store_element);
2677 __ delayed()->nop();
2678 2809
2679 __ load_klass(G3_oop, G4_klass); // query the object klass 2810 __ load_klass(G3_oop, G4_klass); // query the object klass
2680 2811
2681 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, 2812 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
2682 // branch to this on success: 2813 // branch to this on success:
2894 3025
2895 #ifdef ASSERT 3026 #ifdef ASSERT
2896 // assert(src->klass() != NULL); 3027 // assert(src->klass() != NULL);
2897 BLOCK_COMMENT("assert klasses not null"); 3028 BLOCK_COMMENT("assert klasses not null");
2898 { Label L_a, L_b; 3029 { Label L_a, L_b;
2899 __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL 3030 __ br_notnull_short(G3_src_klass, Assembler::pt, L_b); // it is broken if klass is NULL
2900 __ delayed()->nop();
2901 __ bind(L_a); 3031 __ bind(L_a);
2902 __ stop("broken null klass"); 3032 __ stop("broken null klass");
2903 __ bind(L_b); 3033 __ bind(L_b);
2904 __ load_klass(dst, G4_dst_klass); 3034 __ load_klass(dst, G4_dst_klass);
2905 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also 3035 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also
2935 } else { 3065 } else {
2936 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass); 3066 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
2937 } 3067 }
2938 3068
2939 // if (src->klass() != dst->klass()) return -1; 3069 // if (src->klass() != dst->klass()) return -1;
2940 __ cmp(G3_src_klass, G4_dst_klass); 3070 __ cmp_and_brx_short(G3_src_klass, G4_dst_klass, Assembler::notEqual, Assembler::pn, L_failed);
2941 __ brx(Assembler::notEqual, false, Assembler::pn, L_failed);
2942 __ delayed()->nop();
2943 3071
2944 // if (!src->is_Array()) return -1; 3072 // if (!src->is_Array()) return -1;
2945 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0 3073 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
2946 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed); 3074 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
2947 3075
3005 __ cmp(G3_elsize, LogBytesPerInt); 3133 __ cmp(G3_elsize, LogBytesPerInt);
3006 __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy); 3134 __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy);
3007 __ delayed()->signx(length, count); // length 3135 __ delayed()->signx(length, count); // length
3008 #ifdef ASSERT 3136 #ifdef ASSERT
3009 { Label L; 3137 { Label L;
3010 __ cmp(G3_elsize, LogBytesPerLong); 3138 __ cmp_and_br_short(G3_elsize, LogBytesPerLong, Assembler::equal, Assembler::pt, L);
3011 __ br(Assembler::equal, false, Assembler::pt, L);
3012 __ delayed()->nop();
3013 __ stop("must be long copy, but elsize is wrong"); 3139 __ stop("must be long copy, but elsize is wrong");
3014 __ bind(L); 3140 __ bind(L);
3015 } 3141 }
3016 #endif 3142 #endif
3017 __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy); 3143 __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy);
3089 __ BIND(L_failed); 3215 __ BIND(L_failed);
3090 __ retl(); 3216 __ retl();
3091 __ delayed()->sub(G0, 1, O0); // return -1 3217 __ delayed()->sub(G0, 1, O0); // return -1
3092 return start; 3218 return start;
3093 } 3219 }
3220
3221 //
3222 // Generate stub for heap zeroing.
3223 // "to" address is aligned to jlong (8 bytes).
3224 //
3225 // Arguments for generated stub:
3226 // to: O0
3227 // count: O1 treated as signed (count of HeapWord)
3228 // count could be 0
3229 //
3230 address generate_zero_aligned_words(const char* name) {
3231 __ align(CodeEntryAlignment);
3232 StubCodeMark mark(this, "StubRoutines", name);
3233 address start = __ pc();
3234
3235 const Register to = O0; // source array address
3236 const Register count = O1; // HeapWords count
3237 const Register temp = O2; // scratch
3238
3239 Label Ldone;
3240 __ sllx(count, LogHeapWordSize, count); // to bytes count
3241 // Use BIS for zeroing
3242 __ bis_zeroing(to, count, temp, Ldone);
3243 __ bind(Ldone);
3244 __ retl();
3245 __ delayed()->nop();
3246 return start;
3247 }
3094 3248
3095 void generate_arraycopy_stubs() { 3249 void generate_arraycopy_stubs() {
3096 address entry; 3250 address entry;
3097 address entry_jbyte_arraycopy; 3251 address entry_jbyte_arraycopy;
3098 address entry_jshort_arraycopy; 3252 address entry_jshort_arraycopy;
3216 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); 3370 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
3217 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); 3371 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
3218 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); 3372 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
3219 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); 3373 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
3220 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); 3374 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
3375
3376 if (UseBlockZeroing) {
3377 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words");
3378 }
3221 } 3379 }
3222 3380
3223 void generate_initial() { 3381 void generate_initial() {
3224 // Generates all stubs and initializes the entry points 3382 // Generates all stubs and initializes the entry points
3225 3383
3264 3422
3265 // Generate partial_subtype_check first here since its code depends on 3423 // Generate partial_subtype_check first here since its code depends on
3266 // UseZeroBaseCompressedOops which is defined after heap initialization. 3424 // UseZeroBaseCompressedOops which is defined after heap initialization.
3267 StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check(); 3425 StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check();
3268 // These entry points require SharedInfo::stack0 to be set up in non-core builds 3426 // These entry points require SharedInfo::stack0 to be set up in non-core builds
3269 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); 3427 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError));
3270 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); 3428 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError));
3271 StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true); 3429 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call));
3272 StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true); 3430 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
3273 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
3274 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
3275 3431
3276 StubRoutines::_handler_for_unsafe_access_entry = 3432 StubRoutines::_handler_for_unsafe_access_entry =
3277 generate_handler_for_unsafe_access(); 3433 generate_handler_for_unsafe_access();
3278 3434
3279 // support for verify_oop (must happen after universe_init) 3435 // support for verify_oop (must happen after universe_init)