comparison src/cpu/ppc/vm/stubGenerator_ppc.cpp @ 14445:67fa91961822

8029940: PPC64 (part 122): C2 compiler port Reviewed-by: kvn
author goetz
date Wed, 11 Dec 2013 00:06:11 +0100
parents ec28f9c041ff
children b858620b0081
comparison
equal deleted inserted replaced
14444:492e67693373 14445:67fa91961822
144 r_arg_argument_count, Interpreter::logStackElementSize); 144 r_arg_argument_count, Interpreter::logStackElementSize);
145 // arguments alignment (max 1 slot) 145 // arguments alignment (max 1 slot)
146 // FIXME: use round_to() here 146 // FIXME: use round_to() here
147 __ andi_(r_frame_alignment_in_bytes, r_arg_argument_count, 1); 147 __ andi_(r_frame_alignment_in_bytes, r_arg_argument_count, 1);
148 __ sldi(r_frame_alignment_in_bytes, 148 __ sldi(r_frame_alignment_in_bytes,
149 r_frame_alignment_in_bytes, Interpreter::logStackElementSize); 149 r_frame_alignment_in_bytes, Interpreter::logStackElementSize);
150 150
151 // size = unaligned size of arguments + top abi's size 151 // size = unaligned size of arguments + top abi's size
152 __ addi(r_frame_size, r_argument_size_in_bytes, 152 __ addi(r_frame_size, r_argument_size_in_bytes,
153 frame::top_ijava_frame_abi_size); 153 frame::top_ijava_frame_abi_size);
154 // size += arguments alignment 154 // size += arguments alignment
155 __ add(r_frame_size, 155 __ add(r_frame_size,
156 r_frame_size, r_frame_alignment_in_bytes); 156 r_frame_size, r_frame_alignment_in_bytes);
157 // size += size of call_stub locals 157 // size += size of call_stub locals
158 __ addi(r_frame_size, 158 __ addi(r_frame_size,
159 r_frame_size, frame::entry_frame_locals_size); 159 r_frame_size, frame::entry_frame_locals_size);
160 160
161 // push ENTRY_FRAME 161 // push ENTRY_FRAME
177 // Calculate top_of_arguments_addr which will be R17_tos (not prepushed) later. 177 // Calculate top_of_arguments_addr which will be R17_tos (not prepushed) later.
178 // FIXME: why not simply use SP+frame::top_ijava_frame_size? 178 // FIXME: why not simply use SP+frame::top_ijava_frame_size?
179 __ addi(r_top_of_arguments_addr, 179 __ addi(r_top_of_arguments_addr,
180 R1_SP, frame::top_ijava_frame_abi_size); 180 R1_SP, frame::top_ijava_frame_abi_size);
181 __ add(r_top_of_arguments_addr, 181 __ add(r_top_of_arguments_addr,
182 r_top_of_arguments_addr, r_frame_alignment_in_bytes); 182 r_top_of_arguments_addr, r_frame_alignment_in_bytes);
183 183
184 // any arguments to copy? 184 // any arguments to copy?
185 __ cmpdi(CCR0, r_arg_argument_count, 0); 185 __ cmpdi(CCR0, r_arg_argument_count, 0);
186 __ beq(CCR0, arguments_copied); 186 __ beq(CCR0, arguments_copied);
187 187
227 227
228 __ mr(r_new_arg_entry, r_arg_entry); 228 __ mr(r_new_arg_entry, r_arg_entry);
229 229
230 // Register state on entry to frame manager / native entry: 230 // Register state on entry to frame manager / native entry:
231 // 231 //
232 // R17_tos - intptr_t* sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8 232 // tos - intptr_t* sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8
233 // R19_method - Method 233 // R19_method - Method
234 // R16_thread - JavaThread* 234 // R16_thread - JavaThread*
235 235
236 // R17_tos must point to last argument - element_size. 236 // Tos must point to last argument - element_size.
237 __ addi(R17_tos, r_top_of_arguments_addr, -Interpreter::stackElementSize); 237 const Register tos = R17_tos;
238 __ addi(tos, r_top_of_arguments_addr, -Interpreter::stackElementSize);
238 239
239 // initialize call_stub locals (step 2) 240 // initialize call_stub locals (step 2)
240 // now save R17_tos as arguments_tos_address 241 // now save tos as arguments_tos_address
241 __ std(R17_tos, _entry_frame_locals_neg(arguments_tos_address), r_entryframe_fp); 242 __ std(tos, _entry_frame_locals_neg(arguments_tos_address), r_entryframe_fp);
242 243
243 // load argument registers for call 244 // load argument registers for call
244 __ mr(R19_method, r_arg_method); 245 __ mr(R19_method, r_arg_method);
245 __ mr(R16_thread, r_arg_thread); 246 __ mr(R16_thread, r_arg_thread);
246 assert(R17_tos != r_arg_method, "trashed r_arg_method"); 247 assert(tos != r_arg_method, "trashed r_arg_method");
247 assert(R17_tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread"); 248 assert(tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread");
248 249
249 // Set R15_prev_state to 0 for simplifying checks in callee. 250 // Set R15_prev_state to 0 for simplifying checks in callee.
250 __ li(R15_prev_state, 0); 251 __ li(R15_prev_state, 0);
251 252
252 // Stack on entry to frame manager / native entry: 253 // Stack on entry to frame manager / native entry:
272 __ mr(R21_tmp1, R1_SP); 273 __ mr(R21_tmp1, R1_SP);
273 274
274 // Do a light-weight C-call here, r_new_arg_entry holds the address 275 // Do a light-weight C-call here, r_new_arg_entry holds the address
275 // of the interpreter entry point (frame manager or native entry) 276 // of the interpreter entry point (frame manager or native entry)
276 // and save runtime-value of LR in return_address. 277 // and save runtime-value of LR in return_address.
277 assert(r_new_arg_entry != R17_tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread, 278 assert(r_new_arg_entry != tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread,
278 "trashed r_new_arg_entry"); 279 "trashed r_new_arg_entry");
279 return_address = __ call_stub(r_new_arg_entry); 280 return_address = __ call_stub(r_new_arg_entry);
280 } 281 }
281 282
282 { 283 {
324 325
325 // Store result depending on type. Everything that is not 326 // Store result depending on type. Everything that is not
326 // T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE is treated as T_INT. 327 // T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE is treated as T_INT.
327 __ cmpwi(CCR0, r_arg_result_type, T_OBJECT); 328 __ cmpwi(CCR0, r_arg_result_type, T_OBJECT);
328 __ cmpwi(CCR1, r_arg_result_type, T_LONG); 329 __ cmpwi(CCR1, r_arg_result_type, T_LONG);
329 __ cmpwi(CCR5, r_arg_result_type, T_FLOAT); 330 __ cmpwi(CCR5, r_arg_result_type, T_FLOAT);
330 __ cmpwi(CCR6, r_arg_result_type, T_DOUBLE); 331 __ cmpwi(CCR6, r_arg_result_type, T_DOUBLE);
331 332
332 // restore non-volatile registers 333 // restore non-volatile registers
333 __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); 334 __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14));
334 335
335 336
343 // All non-volatiles have been restored at this point!! 344 // All non-volatiles have been restored at this point!!
344 assert(R3_RET == R3, "R3_RET should be R3"); 345 assert(R3_RET == R3, "R3_RET should be R3");
345 346
346 __ beq(CCR0, ret_is_object); 347 __ beq(CCR0, ret_is_object);
347 __ beq(CCR1, ret_is_long); 348 __ beq(CCR1, ret_is_long);
348 __ beq(CCR5, ret_is_float); 349 __ beq(CCR5, ret_is_float);
349 __ beq(CCR6, ret_is_double); 350 __ beq(CCR6, ret_is_double);
350 351
351 // default: 352 // default:
352 __ stw(R3_RET, 0, r_arg_result_addr); 353 __ stw(R3_RET, 0, r_arg_result_addr);
353 __ blr(); // return to caller 354 __ blr(); // return to caller
354 355
612 case BarrierSet::G1SATBCTLogging: 613 case BarrierSet::G1SATBCTLogging:
613 // With G1, don't generate the call if we statically know that the target in uninitialized 614 // With G1, don't generate the call if we statically know that the target in uninitialized
614 if (!dest_uninitialized) { 615 if (!dest_uninitialized) {
615 const int spill_slots = 4 * wordSize; 616 const int spill_slots = 4 * wordSize;
616 const int frame_size = frame::abi_112_size + spill_slots; 617 const int frame_size = frame::abi_112_size + spill_slots;
618 Label filtered;
619
620 // Is marking active?
621 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
622 __ lwz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread);
623 } else {
624 guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
625 __ lbz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread);
626 }
627 __ cmpdi(CCR0, Rtmp1, 0);
628 __ beq(CCR0, filtered);
617 629
618 __ save_LR_CR(R0); 630 __ save_LR_CR(R0);
619 __ push_frame_abi112(spill_slots, R0); 631 __ push_frame_abi112(spill_slots, R0);
620 __ std(from, frame_size - 1 * wordSize, R1_SP); 632 __ std(from, frame_size - 1 * wordSize, R1_SP);
621 __ std(to, frame_size - 2 * wordSize, R1_SP); 633 __ std(to, frame_size - 2 * wordSize, R1_SP);
626 __ ld(from, frame_size - 1 * wordSize, R1_SP); 638 __ ld(from, frame_size - 1 * wordSize, R1_SP);
627 __ ld(to, frame_size - 2 * wordSize, R1_SP); 639 __ ld(to, frame_size - 2 * wordSize, R1_SP);
628 __ ld(count, frame_size - 3 * wordSize, R1_SP); 640 __ ld(count, frame_size - 3 * wordSize, R1_SP);
629 __ pop_frame(); 641 __ pop_frame();
630 __ restore_LR_CR(R0); 642 __ restore_LR_CR(R0);
643
644 __ bind(filtered);
631 } 645 }
632 break; 646 break;
633 case BarrierSet::CardTableModRef: 647 case BarrierSet::CardTableModRef:
634 case BarrierSet::CardTableExtension: 648 case BarrierSet::CardTableExtension:
635 case BarrierSet::ModRef: 649 case BarrierSet::ModRef:
646 // count - register containing element count 660 // count - register containing element count
647 // tmp - scratch register 661 // tmp - scratch register
648 // 662 //
649 // The input registers and R0 are overwritten. 663 // The input registers and R0 are overwritten.
650 // 664 //
651 void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp) { 665 void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp, bool branchToEnd) {
652 BarrierSet* const bs = Universe::heap()->barrier_set(); 666 BarrierSet* const bs = Universe::heap()->barrier_set();
653 667
654 switch (bs->kind()) { 668 switch (bs->kind()) {
655 case BarrierSet::G1SATBCT: 669 case BarrierSet::G1SATBCT:
656 case BarrierSet::G1SATBCTLogging: 670 case BarrierSet::G1SATBCTLogging:
657 { 671 {
658 __ save_LR_CR(R0); 672 if (branchToEnd) {
659 // We need this frame only that the callee can spill LR/CR. 673 __ save_LR_CR(R0);
660 __ push_frame_abi112(0, R0); 674 // We need this frame only to spill LR.
661 675 __ push_frame_abi112(0, R0);
662 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count); 676 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
663 677 __ pop_frame();
664 __ pop_frame(); 678 __ restore_LR_CR(R0);
665 __ restore_LR_CR(R0); 679 } else {
680 // Tail call: fake call from stub caller by branching without linking.
681 address entry_point = (address)CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post);
682 __ mr_if_needed(R3_ARG1, addr);
683 __ mr_if_needed(R4_ARG2, count);
684 __ load_const(R11, entry_point, R0);
685 __ call_c_and_return_to_caller(R11);
686 }
666 } 687 }
667 break; 688 break;
668 case BarrierSet::CardTableModRef: 689 case BarrierSet::CardTableModRef:
669 case BarrierSet::CardTableExtension: 690 case BarrierSet::CardTableExtension:
670 { 691 {
695 __ bind(Lstore_loop); 716 __ bind(Lstore_loop);
696 __ stbx(R0, tmp, addr); 717 __ stbx(R0, tmp, addr);
697 __ addi(addr, addr, 1); 718 __ addi(addr, addr, 1);
698 __ bdnz(Lstore_loop); 719 __ bdnz(Lstore_loop);
699 __ bind(Lskip_loop); 720 __ bind(Lskip_loop);
721
722 if (!branchToEnd) __ blr();
700 } 723 }
701 break; 724 break;
702 case BarrierSet::ModRef: 725 case BarrierSet::ModRef:
726 if (!branchToEnd) __ blr();
703 break; 727 break;
704 default: 728 default:
705 ShouldNotReachHere(); 729 ShouldNotReachHere();
706 } 730 }
707 } 731 }
845 // -XX:+OptimizeFill : convert fill/copy loops into intrinsic 869 // -XX:+OptimizeFill : convert fill/copy loops into intrinsic
846 // 870 //
847 // The code is implemented(ported from sparc) as we believe it benefits JVM98, however 871 // The code is implemented(ported from sparc) as we believe it benefits JVM98, however
848 // tracing(-XX:+TraceOptimizeFill) shows the intrinsic replacement doesn't happen at all! 872 // tracing(-XX:+TraceOptimizeFill) shows the intrinsic replacement doesn't happen at all!
849 // 873 //
850 // Source code in function is_range_check_if() shows OptimizeFill relaxed the condition 874 // Source code in function is_range_check_if() shows that OptimizeFill relaxed the condition
851 // for turning on loop predication optimization, and hence the behavior of "array range check" 875 // for turning on loop predication optimization, and hence the behavior of "array range check"
852 // and "loop invariant check" could be influenced, which potentially boosted JVM98. 876 // and "loop invariant check" could be influenced, which potentially boosted JVM98.
853 // 877 //
854 // We leave the code here and see if Oracle has updates in later releases(later than HS20). 878 // Generate stub for disjoint short fill. If "aligned" is true, the
855 // 879 // "to" address is assumed to be heapword aligned.
856 // Generate stub for disjoint short fill. If "aligned" is true, the
857 // "to" address is assumed to be heapword aligned.
858 // 880 //
859 // Arguments for generated stub: 881 // Arguments for generated stub:
860 // to: R3_ARG1 882 // to: R3_ARG1
861 // value: R4_ARG2 883 // value: R4_ARG2
862 // count: R5_ARG3 treated as signed 884 // count: R5_ARG3 treated as signed
863 // 885 //
864 address generate_fill(BasicType t, bool aligned, const char* name) { 886 address generate_fill(BasicType t, bool aligned, const char* name) {
865 StubCodeMark mark(this, "StubRoutines", name); 887 StubCodeMark mark(this, "StubRoutines", name);
866 address start = __ emit_fd(); 888 address start = __ emit_fd();
867 889
868 const Register to = R3_ARG1; // source array address 890 const Register to = R3_ARG1; // source array address
869 const Register value = R4_ARG2; // fill value 891 const Register value = R4_ARG2; // fill value
870 const Register count = R5_ARG3; // elements count 892 const Register count = R5_ARG3; // elements count
871 const Register temp = R6_ARG4; // temp register 893 const Register temp = R6_ARG4; // temp register
872 894
873 //assert_clean_int(count, O3); // Make sure 'count' is clean int. 895 //assert_clean_int(count, O3); // Make sure 'count' is clean int.
874 896
875 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 897 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
876 Label L_fill_2_bytes, L_fill_4_bytes, L_fill_elements, L_fill_32_bytes; 898 Label L_fill_2_bytes, L_fill_4_bytes, L_fill_elements, L_fill_32_bytes;
877 899
878 int shift = -1; 900 int shift = -1;
879 switch (t) { 901 switch (t) {
880 case T_BYTE: 902 case T_BYTE:
881 shift = 2; 903 shift = 2;
882 // clone bytes (zero extend not needed because store instructions below ignore high order bytes) 904 // Clone bytes (zero extend not needed because store instructions below ignore high order bytes).
883 __ rldimi(value, value, 8, 48); // 8 bit -> 16 bit 905 __ rldimi(value, value, 8, 48); // 8 bit -> 16 bit
884 __ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element 906 __ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element.
885 __ blt(CCR0, L_fill_elements); 907 __ blt(CCR0, L_fill_elements);
886 __ rldimi(value, value, 16, 32); // 16 bit -> 32 bit 908 __ rldimi(value, value, 16, 32); // 16 bit -> 32 bit
887 break; 909 break;
888 case T_SHORT: 910 case T_SHORT:
889 shift = 1; 911 shift = 1;
890 // clone bytes (zero extend not needed because store instructions below ignore high order bytes) 912 // Clone bytes (zero extend not needed because store instructions below ignore high order bytes).
891 __ rldimi(value, value, 16, 32); // 16 bit -> 32 bit 913 __ rldimi(value, value, 16, 32); // 16 bit -> 32 bit
892 __ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element 914 __ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element.
893 __ blt(CCR0, L_fill_elements); 915 __ blt(CCR0, L_fill_elements);
894 break; 916 break;
895 case T_INT: 917 case T_INT:
896 shift = 0; 918 shift = 0;
897 __ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element 919 __ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element.
898 __ blt(CCR0, L_fill_4_bytes); 920 __ blt(CCR0, L_fill_4_bytes);
899 break; 921 break;
900 default: ShouldNotReachHere(); 922 default: ShouldNotReachHere();
901 } 923 }
902 924
903 if (!aligned && (t == T_BYTE || t == T_SHORT)) { 925 if (!aligned && (t == T_BYTE || t == T_SHORT)) {
904 // align source address at 4 bytes address boundary 926 // Align source address at 4 bytes address boundary.
905 if (t == T_BYTE) { 927 if (t == T_BYTE) {
906 // One byte misalignment happens only for byte arrays 928 // One byte misalignment happens only for byte arrays.
907 __ andi_(temp, to, 1); 929 __ andi_(temp, to, 1);
908 __ beq(CCR0, L_skip_align1); 930 __ beq(CCR0, L_skip_align1);
909 __ stb(value, 0, to); 931 __ stb(value, 0, to);
910 __ addi(to, to, 1); 932 __ addi(to, to, 1);
911 __ addi(count, count, -1); 933 __ addi(count, count, -1);
928 __ addi(to, to, 4); 950 __ addi(to, to, 4);
929 __ addi(count, count, -(1 << shift)); 951 __ addi(count, count, -(1 << shift));
930 __ bind(L_fill_32_bytes); 952 __ bind(L_fill_32_bytes);
931 } 953 }
932 954
933 __ li(temp, 8<<shift); // prepare for 32 byte loop 955 __ li(temp, 8<<shift); // Prepare for 32 byte loop.
934 // clone bytes int->long as above 956 // Clone bytes int->long as above.
935 __ rldimi(value, value, 32, 0); // 32 bit -> 64 bit 957 __ rldimi(value, value, 32, 0); // 32 bit -> 64 bit
936 958
937 Label L_check_fill_8_bytes; 959 Label L_check_fill_8_bytes;
938 // Fill 32-byte chunks 960 // Fill 32-byte chunks.
939 __ subf_(count, temp, count); 961 __ subf_(count, temp, count);
940 __ blt(CCR0, L_check_fill_8_bytes); 962 __ blt(CCR0, L_check_fill_8_bytes);
941 963
942 Label L_fill_32_bytes_loop; 964 Label L_fill_32_bytes_loop;
943 __ align(32); 965 __ align(32);
944 __ bind(L_fill_32_bytes_loop); 966 __ bind(L_fill_32_bytes_loop);
945 967
946 __ std(value, 0, to); 968 __ std(value, 0, to);
947 __ std(value, 8, to); 969 __ std(value, 8, to);
948 __ subf_(count, temp, count); // update count 970 __ subf_(count, temp, count); // Update count.
949 __ std(value, 16, to); 971 __ std(value, 16, to);
950 __ std(value, 24, to); 972 __ std(value, 24, to);
951 973
952 __ addi(to, to, 32); 974 __ addi(to, to, 32);
953 __ bge(CCR0, L_fill_32_bytes_loop); 975 __ bge(CCR0, L_fill_32_bytes_loop);
966 __ std(value, 0, to); 988 __ std(value, 0, to);
967 __ addic_(count, count, -(2 << shift)); 989 __ addic_(count, count, -(2 << shift));
968 __ addi(to, to, 8); 990 __ addi(to, to, 8);
969 __ bge(CCR0, L_fill_8_bytes_loop); 991 __ bge(CCR0, L_fill_8_bytes_loop);
970 992
971 // fill trailing 4 bytes 993 // Fill trailing 4 bytes.
972 __ bind(L_fill_4_bytes); 994 __ bind(L_fill_4_bytes);
973 __ andi_(temp, count, 1<<shift); 995 __ andi_(temp, count, 1<<shift);
974 __ beq(CCR0, L_fill_2_bytes); 996 __ beq(CCR0, L_fill_2_bytes);
975 997
976 __ stw(value, 0, to); 998 __ stw(value, 0, to);
977 if (t == T_BYTE || t == T_SHORT) { 999 if (t == T_BYTE || t == T_SHORT) {
978 __ addi(to, to, 4); 1000 __ addi(to, to, 4);
979 // fill trailing 2 bytes 1001 // Fill trailing 2 bytes.
980 __ bind(L_fill_2_bytes); 1002 __ bind(L_fill_2_bytes);
981 __ andi_(temp, count, 1<<(shift-1)); 1003 __ andi_(temp, count, 1<<(shift-1));
982 __ beq(CCR0, L_fill_byte); 1004 __ beq(CCR0, L_fill_byte);
983 __ sth(value, 0, to); 1005 __ sth(value, 0, to);
984 if (t == T_BYTE) { 1006 if (t == T_BYTE) {
985 __ addi(to, to, 2); 1007 __ addi(to, to, 2);
986 // fill trailing byte 1008 // Fill trailing byte.
987 __ bind(L_fill_byte); 1009 __ bind(L_fill_byte);
988 __ andi_(count, count, 1); 1010 __ andi_(count, count, 1);
989 __ beq(CCR0, L_exit); 1011 __ beq(CCR0, L_exit);
990 __ stb(value, 0, to); 1012 __ stb(value, 0, to);
991 } else { 1013 } else {
995 __ bind(L_fill_2_bytes); 1017 __ bind(L_fill_2_bytes);
996 } 1018 }
997 __ bind(L_exit); 1019 __ bind(L_exit);
998 __ blr(); 1020 __ blr();
999 1021
1000 // Handle copies less than 8 bytes. Int is handled elsewhere. 1022 // Handle copies less than 8 bytes. Int is handled elsewhere.
1001 if (t == T_BYTE) { 1023 if (t == T_BYTE) {
1002 __ bind(L_fill_elements); 1024 __ bind(L_fill_elements);
1003 Label L_fill_2, L_fill_4; 1025 Label L_fill_2, L_fill_4;
1004 __ andi_(temp, count, 1); 1026 __ andi_(temp, count, 1);
1005 __ beq(CCR0, L_fill_2); 1027 __ beq(CCR0, L_fill_2);
1037 } 1059 }
1038 return start; 1060 return start;
1039 } 1061 }
1040 1062
1041 1063
1042 // Generate overlap test for array copy stubs 1064 // Generate overlap test for array copy stubs.
1043 // 1065 //
1044 // Input: 1066 // Input:
1045 // R3_ARG1 - from 1067 // R3_ARG1 - from
1046 // R4_ARG2 - to 1068 // R4_ARG2 - to
1047 // R5_ARG3 - element count 1069 // R5_ARG3 - element count
1871 } else { 1893 } else {
1872 array_overlap_test(nooverlap_target, 3); 1894 array_overlap_test(nooverlap_target, 3);
1873 generate_conjoint_long_copy_core(aligned); 1895 generate_conjoint_long_copy_core(aligned);
1874 } 1896 }
1875 1897
1876 gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1); 1898 gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
1877
1878 __ blr();
1879
1880 return start; 1899 return start;
1881 } 1900 }
1882 1901
1883 // Generate stub for disjoint oop copy. If "aligned" is true, the 1902 // Generate stub for disjoint oop copy. If "aligned" is true, the
1884 // "from" and "to" addresses are assumed to be heapword aligned. 1903 // "from" and "to" addresses are assumed to be heapword aligned.
1904 generate_disjoint_int_copy_core(aligned); 1923 generate_disjoint_int_copy_core(aligned);
1905 } else { 1924 } else {
1906 generate_disjoint_long_copy_core(aligned); 1925 generate_disjoint_long_copy_core(aligned);
1907 } 1926 }
1908 1927
1909 gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1); 1928 gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
1910
1911 __ blr();
1912 1929
1913 return start; 1930 return start;
1914 } 1931 }
1915 1932
1916 void generate_arraycopy_stubs() { 1933 void generate_arraycopy_stubs() {