Mercurial > hg > graal-compiler
comparison src/cpu/ppc/vm/stubGenerator_ppc.cpp @ 14445:67fa91961822
8029940: PPC64 (part 122): C2 compiler port
Reviewed-by: kvn
author | goetz |
---|---|
date | Wed, 11 Dec 2013 00:06:11 +0100 |
parents | ec28f9c041ff |
children | b858620b0081 |
comparison
equal
deleted
inserted
replaced
14444:492e67693373 | 14445:67fa91961822 |
---|---|
144 r_arg_argument_count, Interpreter::logStackElementSize); | 144 r_arg_argument_count, Interpreter::logStackElementSize); |
145 // arguments alignment (max 1 slot) | 145 // arguments alignment (max 1 slot) |
146 // FIXME: use round_to() here | 146 // FIXME: use round_to() here |
147 __ andi_(r_frame_alignment_in_bytes, r_arg_argument_count, 1); | 147 __ andi_(r_frame_alignment_in_bytes, r_arg_argument_count, 1); |
148 __ sldi(r_frame_alignment_in_bytes, | 148 __ sldi(r_frame_alignment_in_bytes, |
149 r_frame_alignment_in_bytes, Interpreter::logStackElementSize); | 149 r_frame_alignment_in_bytes, Interpreter::logStackElementSize); |
150 | 150 |
151 // size = unaligned size of arguments + top abi's size | 151 // size = unaligned size of arguments + top abi's size |
152 __ addi(r_frame_size, r_argument_size_in_bytes, | 152 __ addi(r_frame_size, r_argument_size_in_bytes, |
153 frame::top_ijava_frame_abi_size); | 153 frame::top_ijava_frame_abi_size); |
154 // size += arguments alignment | 154 // size += arguments alignment |
155 __ add(r_frame_size, | 155 __ add(r_frame_size, |
156 r_frame_size, r_frame_alignment_in_bytes); | 156 r_frame_size, r_frame_alignment_in_bytes); |
157 // size += size of call_stub locals | 157 // size += size of call_stub locals |
158 __ addi(r_frame_size, | 158 __ addi(r_frame_size, |
159 r_frame_size, frame::entry_frame_locals_size); | 159 r_frame_size, frame::entry_frame_locals_size); |
160 | 160 |
161 // push ENTRY_FRAME | 161 // push ENTRY_FRAME |
177 // Calculate top_of_arguments_addr which will be R17_tos (not prepushed) later. | 177 // Calculate top_of_arguments_addr which will be R17_tos (not prepushed) later. |
178 // FIXME: why not simply use SP+frame::top_ijava_frame_size? | 178 // FIXME: why not simply use SP+frame::top_ijava_frame_size? |
179 __ addi(r_top_of_arguments_addr, | 179 __ addi(r_top_of_arguments_addr, |
180 R1_SP, frame::top_ijava_frame_abi_size); | 180 R1_SP, frame::top_ijava_frame_abi_size); |
181 __ add(r_top_of_arguments_addr, | 181 __ add(r_top_of_arguments_addr, |
182 r_top_of_arguments_addr, r_frame_alignment_in_bytes); | 182 r_top_of_arguments_addr, r_frame_alignment_in_bytes); |
183 | 183 |
184 // any arguments to copy? | 184 // any arguments to copy? |
185 __ cmpdi(CCR0, r_arg_argument_count, 0); | 185 __ cmpdi(CCR0, r_arg_argument_count, 0); |
186 __ beq(CCR0, arguments_copied); | 186 __ beq(CCR0, arguments_copied); |
187 | 187 |
227 | 227 |
228 __ mr(r_new_arg_entry, r_arg_entry); | 228 __ mr(r_new_arg_entry, r_arg_entry); |
229 | 229 |
230 // Register state on entry to frame manager / native entry: | 230 // Register state on entry to frame manager / native entry: |
231 // | 231 // |
232 // R17_tos - intptr_t* sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8 | 232 // tos - intptr_t* sender tos (prepushed) Lesp = (SP) + copied_arguments_offset - 8 |
233 // R19_method - Method | 233 // R19_method - Method |
234 // R16_thread - JavaThread* | 234 // R16_thread - JavaThread* |
235 | 235 |
236 // R17_tos must point to last argument - element_size. | 236 // Tos must point to last argument - element_size. |
237 __ addi(R17_tos, r_top_of_arguments_addr, -Interpreter::stackElementSize); | 237 const Register tos = R17_tos; |
238 __ addi(tos, r_top_of_arguments_addr, -Interpreter::stackElementSize); | |
238 | 239 |
239 // initialize call_stub locals (step 2) | 240 // initialize call_stub locals (step 2) |
240 // now save R17_tos as arguments_tos_address | 241 // now save tos as arguments_tos_address |
241 __ std(R17_tos, _entry_frame_locals_neg(arguments_tos_address), r_entryframe_fp); | 242 __ std(tos, _entry_frame_locals_neg(arguments_tos_address), r_entryframe_fp); |
242 | 243 |
243 // load argument registers for call | 244 // load argument registers for call |
244 __ mr(R19_method, r_arg_method); | 245 __ mr(R19_method, r_arg_method); |
245 __ mr(R16_thread, r_arg_thread); | 246 __ mr(R16_thread, r_arg_thread); |
246 assert(R17_tos != r_arg_method, "trashed r_arg_method"); | 247 assert(tos != r_arg_method, "trashed r_arg_method"); |
247 assert(R17_tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread"); | 248 assert(tos != r_arg_thread && R19_method != r_arg_thread, "trashed r_arg_thread"); |
248 | 249 |
249 // Set R15_prev_state to 0 for simplifying checks in callee. | 250 // Set R15_prev_state to 0 for simplifying checks in callee. |
250 __ li(R15_prev_state, 0); | 251 __ li(R15_prev_state, 0); |
251 | 252 |
252 // Stack on entry to frame manager / native entry: | 253 // Stack on entry to frame manager / native entry: |
272 __ mr(R21_tmp1, R1_SP); | 273 __ mr(R21_tmp1, R1_SP); |
273 | 274 |
274 // Do a light-weight C-call here, r_new_arg_entry holds the address | 275 // Do a light-weight C-call here, r_new_arg_entry holds the address |
275 // of the interpreter entry point (frame manager or native entry) | 276 // of the interpreter entry point (frame manager or native entry) |
276 // and save runtime-value of LR in return_address. | 277 // and save runtime-value of LR in return_address. |
277 assert(r_new_arg_entry != R17_tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread, | 278 assert(r_new_arg_entry != tos && r_new_arg_entry != R19_method && r_new_arg_entry != R16_thread, |
278 "trashed r_new_arg_entry"); | 279 "trashed r_new_arg_entry"); |
279 return_address = __ call_stub(r_new_arg_entry); | 280 return_address = __ call_stub(r_new_arg_entry); |
280 } | 281 } |
281 | 282 |
282 { | 283 { |
324 | 325 |
325 // Store result depending on type. Everything that is not | 326 // Store result depending on type. Everything that is not |
326 // T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE is treated as T_INT. | 327 // T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE is treated as T_INT. |
327 __ cmpwi(CCR0, r_arg_result_type, T_OBJECT); | 328 __ cmpwi(CCR0, r_arg_result_type, T_OBJECT); |
328 __ cmpwi(CCR1, r_arg_result_type, T_LONG); | 329 __ cmpwi(CCR1, r_arg_result_type, T_LONG); |
329 __ cmpwi(CCR5, r_arg_result_type, T_FLOAT); | 330 __ cmpwi(CCR5, r_arg_result_type, T_FLOAT); |
330 __ cmpwi(CCR6, r_arg_result_type, T_DOUBLE); | 331 __ cmpwi(CCR6, r_arg_result_type, T_DOUBLE); |
331 | 332 |
332 // restore non-volatile registers | 333 // restore non-volatile registers |
333 __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); | 334 __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14)); |
334 | 335 |
335 | 336 |
343 // All non-volatiles have been restored at this point!! | 344 // All non-volatiles have been restored at this point!! |
344 assert(R3_RET == R3, "R3_RET should be R3"); | 345 assert(R3_RET == R3, "R3_RET should be R3"); |
345 | 346 |
346 __ beq(CCR0, ret_is_object); | 347 __ beq(CCR0, ret_is_object); |
347 __ beq(CCR1, ret_is_long); | 348 __ beq(CCR1, ret_is_long); |
348 __ beq(CCR5, ret_is_float); | 349 __ beq(CCR5, ret_is_float); |
349 __ beq(CCR6, ret_is_double); | 350 __ beq(CCR6, ret_is_double); |
350 | 351 |
351 // default: | 352 // default: |
352 __ stw(R3_RET, 0, r_arg_result_addr); | 353 __ stw(R3_RET, 0, r_arg_result_addr); |
353 __ blr(); // return to caller | 354 __ blr(); // return to caller |
354 | 355 |
612 case BarrierSet::G1SATBCTLogging: | 613 case BarrierSet::G1SATBCTLogging: |
613 // With G1, don't generate the call if we statically know that the target in uninitialized | 614 // With G1, don't generate the call if we statically know that the target in uninitialized |
614 if (!dest_uninitialized) { | 615 if (!dest_uninitialized) { |
615 const int spill_slots = 4 * wordSize; | 616 const int spill_slots = 4 * wordSize; |
616 const int frame_size = frame::abi_112_size + spill_slots; | 617 const int frame_size = frame::abi_112_size + spill_slots; |
618 Label filtered; | |
619 | |
620 // Is marking active? | |
621 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { | |
622 __ lwz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread); | |
623 } else { | |
624 guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); | |
625 __ lbz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread); | |
626 } | |
627 __ cmpdi(CCR0, Rtmp1, 0); | |
628 __ beq(CCR0, filtered); | |
617 | 629 |
618 __ save_LR_CR(R0); | 630 __ save_LR_CR(R0); |
619 __ push_frame_abi112(spill_slots, R0); | 631 __ push_frame_abi112(spill_slots, R0); |
620 __ std(from, frame_size - 1 * wordSize, R1_SP); | 632 __ std(from, frame_size - 1 * wordSize, R1_SP); |
621 __ std(to, frame_size - 2 * wordSize, R1_SP); | 633 __ std(to, frame_size - 2 * wordSize, R1_SP); |
626 __ ld(from, frame_size - 1 * wordSize, R1_SP); | 638 __ ld(from, frame_size - 1 * wordSize, R1_SP); |
627 __ ld(to, frame_size - 2 * wordSize, R1_SP); | 639 __ ld(to, frame_size - 2 * wordSize, R1_SP); |
628 __ ld(count, frame_size - 3 * wordSize, R1_SP); | 640 __ ld(count, frame_size - 3 * wordSize, R1_SP); |
629 __ pop_frame(); | 641 __ pop_frame(); |
630 __ restore_LR_CR(R0); | 642 __ restore_LR_CR(R0); |
643 | |
644 __ bind(filtered); | |
631 } | 645 } |
632 break; | 646 break; |
633 case BarrierSet::CardTableModRef: | 647 case BarrierSet::CardTableModRef: |
634 case BarrierSet::CardTableExtension: | 648 case BarrierSet::CardTableExtension: |
635 case BarrierSet::ModRef: | 649 case BarrierSet::ModRef: |
646 // count - register containing element count | 660 // count - register containing element count |
647 // tmp - scratch register | 661 // tmp - scratch register |
648 // | 662 // |
649 // The input registers and R0 are overwritten. | 663 // The input registers and R0 are overwritten. |
650 // | 664 // |
651 void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp) { | 665 void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp, bool branchToEnd) { |
652 BarrierSet* const bs = Universe::heap()->barrier_set(); | 666 BarrierSet* const bs = Universe::heap()->barrier_set(); |
653 | 667 |
654 switch (bs->kind()) { | 668 switch (bs->kind()) { |
655 case BarrierSet::G1SATBCT: | 669 case BarrierSet::G1SATBCT: |
656 case BarrierSet::G1SATBCTLogging: | 670 case BarrierSet::G1SATBCTLogging: |
657 { | 671 { |
658 __ save_LR_CR(R0); | 672 if (branchToEnd) { |
659 // We need this frame only that the callee can spill LR/CR. | 673 __ save_LR_CR(R0); |
660 __ push_frame_abi112(0, R0); | 674 // We need this frame only to spill LR. |
661 | 675 __ push_frame_abi112(0, R0); |
662 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count); | 676 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count); |
663 | 677 __ pop_frame(); |
664 __ pop_frame(); | 678 __ restore_LR_CR(R0); |
665 __ restore_LR_CR(R0); | 679 } else { |
680 // Tail call: fake call from stub caller by branching without linking. | |
681 address entry_point = (address)CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post); | |
682 __ mr_if_needed(R3_ARG1, addr); | |
683 __ mr_if_needed(R4_ARG2, count); | |
684 __ load_const(R11, entry_point, R0); | |
685 __ call_c_and_return_to_caller(R11); | |
686 } | |
666 } | 687 } |
667 break; | 688 break; |
668 case BarrierSet::CardTableModRef: | 689 case BarrierSet::CardTableModRef: |
669 case BarrierSet::CardTableExtension: | 690 case BarrierSet::CardTableExtension: |
670 { | 691 { |
695 __ bind(Lstore_loop); | 716 __ bind(Lstore_loop); |
696 __ stbx(R0, tmp, addr); | 717 __ stbx(R0, tmp, addr); |
697 __ addi(addr, addr, 1); | 718 __ addi(addr, addr, 1); |
698 __ bdnz(Lstore_loop); | 719 __ bdnz(Lstore_loop); |
699 __ bind(Lskip_loop); | 720 __ bind(Lskip_loop); |
721 | |
722 if (!branchToEnd) __ blr(); | |
700 } | 723 } |
701 break; | 724 break; |
702 case BarrierSet::ModRef: | 725 case BarrierSet::ModRef: |
726 if (!branchToEnd) __ blr(); | |
703 break; | 727 break; |
704 default: | 728 default: |
705 ShouldNotReachHere(); | 729 ShouldNotReachHere(); |
706 } | 730 } |
707 } | 731 } |
845 // -XX:+OptimizeFill : convert fill/copy loops into intrinsic | 869 // -XX:+OptimizeFill : convert fill/copy loops into intrinsic |
846 // | 870 // |
847 // The code is implemented(ported from sparc) as we believe it benefits JVM98, however | 871 // The code is implemented(ported from sparc) as we believe it benefits JVM98, however |
848 // tracing(-XX:+TraceOptimizeFill) shows the intrinsic replacement doesn't happen at all! | 872 // tracing(-XX:+TraceOptimizeFill) shows the intrinsic replacement doesn't happen at all! |
849 // | 873 // |
850 // Source code in function is_range_check_if() shows OptimizeFill relaxed the condition | 874 // Source code in function is_range_check_if() shows that OptimizeFill relaxed the condition |
851 // for turning on loop predication optimization, and hence the behavior of "array range check" | 875 // for turning on loop predication optimization, and hence the behavior of "array range check" |
852 // and "loop invariant check" could be influenced, which potentially boosted JVM98. | 876 // and "loop invariant check" could be influenced, which potentially boosted JVM98. |
853 // | 877 // |
854 // We leave the code here and see if Oracle has updates in later releases(later than HS20). | 878 // Generate stub for disjoint short fill. If "aligned" is true, the |
855 // | 879 // "to" address is assumed to be heapword aligned. |
856 // Generate stub for disjoint short fill. If "aligned" is true, the | |
857 // "to" address is assumed to be heapword aligned. | |
858 // | 880 // |
859 // Arguments for generated stub: | 881 // Arguments for generated stub: |
860 // to: R3_ARG1 | 882 // to: R3_ARG1 |
861 // value: R4_ARG2 | 883 // value: R4_ARG2 |
862 // count: R5_ARG3 treated as signed | 884 // count: R5_ARG3 treated as signed |
863 // | 885 // |
864 address generate_fill(BasicType t, bool aligned, const char* name) { | 886 address generate_fill(BasicType t, bool aligned, const char* name) { |
865 StubCodeMark mark(this, "StubRoutines", name); | 887 StubCodeMark mark(this, "StubRoutines", name); |
866 address start = __ emit_fd(); | 888 address start = __ emit_fd(); |
867 | 889 |
868 const Register to = R3_ARG1; // source array address | 890 const Register to = R3_ARG1; // source array address |
869 const Register value = R4_ARG2; // fill value | 891 const Register value = R4_ARG2; // fill value |
870 const Register count = R5_ARG3; // elements count | 892 const Register count = R5_ARG3; // elements count |
871 const Register temp = R6_ARG4; // temp register | 893 const Register temp = R6_ARG4; // temp register |
872 | 894 |
873 //assert_clean_int(count, O3); // Make sure 'count' is clean int. | 895 //assert_clean_int(count, O3); // Make sure 'count' is clean int. |
874 | 896 |
875 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; | 897 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; |
876 Label L_fill_2_bytes, L_fill_4_bytes, L_fill_elements, L_fill_32_bytes; | 898 Label L_fill_2_bytes, L_fill_4_bytes, L_fill_elements, L_fill_32_bytes; |
877 | 899 |
878 int shift = -1; | 900 int shift = -1; |
879 switch (t) { | 901 switch (t) { |
880 case T_BYTE: | 902 case T_BYTE: |
881 shift = 2; | 903 shift = 2; |
882 // clone bytes (zero extend not needed because store instructions below ignore high order bytes) | 904 // Clone bytes (zero extend not needed because store instructions below ignore high order bytes). |
883 __ rldimi(value, value, 8, 48); // 8 bit -> 16 bit | 905 __ rldimi(value, value, 8, 48); // 8 bit -> 16 bit |
884 __ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element | 906 __ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element. |
885 __ blt(CCR0, L_fill_elements); | 907 __ blt(CCR0, L_fill_elements); |
886 __ rldimi(value, value, 16, 32); // 16 bit -> 32 bit | 908 __ rldimi(value, value, 16, 32); // 16 bit -> 32 bit |
887 break; | 909 break; |
888 case T_SHORT: | 910 case T_SHORT: |
889 shift = 1; | 911 shift = 1; |
890 // clone bytes (zero extend not needed because store instructions below ignore high order bytes) | 912 // Clone bytes (zero extend not needed because store instructions below ignore high order bytes). |
891 __ rldimi(value, value, 16, 32); // 16 bit -> 32 bit | 913 __ rldimi(value, value, 16, 32); // 16 bit -> 32 bit |
892 __ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element | 914 __ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element. |
893 __ blt(CCR0, L_fill_elements); | 915 __ blt(CCR0, L_fill_elements); |
894 break; | 916 break; |
895 case T_INT: | 917 case T_INT: |
896 shift = 0; | 918 shift = 0; |
897 __ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element | 919 __ cmpdi(CCR0, count, 2<<shift); // Short arrays (< 8 bytes) fill by element. |
898 __ blt(CCR0, L_fill_4_bytes); | 920 __ blt(CCR0, L_fill_4_bytes); |
899 break; | 921 break; |
900 default: ShouldNotReachHere(); | 922 default: ShouldNotReachHere(); |
901 } | 923 } |
902 | 924 |
903 if (!aligned && (t == T_BYTE || t == T_SHORT)) { | 925 if (!aligned && (t == T_BYTE || t == T_SHORT)) { |
904 // align source address at 4 bytes address boundary | 926 // Align source address at 4 bytes address boundary. |
905 if (t == T_BYTE) { | 927 if (t == T_BYTE) { |
906 // One byte misalignment happens only for byte arrays | 928 // One byte misalignment happens only for byte arrays. |
907 __ andi_(temp, to, 1); | 929 __ andi_(temp, to, 1); |
908 __ beq(CCR0, L_skip_align1); | 930 __ beq(CCR0, L_skip_align1); |
909 __ stb(value, 0, to); | 931 __ stb(value, 0, to); |
910 __ addi(to, to, 1); | 932 __ addi(to, to, 1); |
911 __ addi(count, count, -1); | 933 __ addi(count, count, -1); |
928 __ addi(to, to, 4); | 950 __ addi(to, to, 4); |
929 __ addi(count, count, -(1 << shift)); | 951 __ addi(count, count, -(1 << shift)); |
930 __ bind(L_fill_32_bytes); | 952 __ bind(L_fill_32_bytes); |
931 } | 953 } |
932 | 954 |
933 __ li(temp, 8<<shift); // prepare for 32 byte loop | 955 __ li(temp, 8<<shift); // Prepare for 32 byte loop. |
934 // clone bytes int->long as above | 956 // Clone bytes int->long as above. |
935 __ rldimi(value, value, 32, 0); // 32 bit -> 64 bit | 957 __ rldimi(value, value, 32, 0); // 32 bit -> 64 bit |
936 | 958 |
937 Label L_check_fill_8_bytes; | 959 Label L_check_fill_8_bytes; |
938 // Fill 32-byte chunks | 960 // Fill 32-byte chunks. |
939 __ subf_(count, temp, count); | 961 __ subf_(count, temp, count); |
940 __ blt(CCR0, L_check_fill_8_bytes); | 962 __ blt(CCR0, L_check_fill_8_bytes); |
941 | 963 |
942 Label L_fill_32_bytes_loop; | 964 Label L_fill_32_bytes_loop; |
943 __ align(32); | 965 __ align(32); |
944 __ bind(L_fill_32_bytes_loop); | 966 __ bind(L_fill_32_bytes_loop); |
945 | 967 |
946 __ std(value, 0, to); | 968 __ std(value, 0, to); |
947 __ std(value, 8, to); | 969 __ std(value, 8, to); |
948 __ subf_(count, temp, count); // update count | 970 __ subf_(count, temp, count); // Update count. |
949 __ std(value, 16, to); | 971 __ std(value, 16, to); |
950 __ std(value, 24, to); | 972 __ std(value, 24, to); |
951 | 973 |
952 __ addi(to, to, 32); | 974 __ addi(to, to, 32); |
953 __ bge(CCR0, L_fill_32_bytes_loop); | 975 __ bge(CCR0, L_fill_32_bytes_loop); |
966 __ std(value, 0, to); | 988 __ std(value, 0, to); |
967 __ addic_(count, count, -(2 << shift)); | 989 __ addic_(count, count, -(2 << shift)); |
968 __ addi(to, to, 8); | 990 __ addi(to, to, 8); |
969 __ bge(CCR0, L_fill_8_bytes_loop); | 991 __ bge(CCR0, L_fill_8_bytes_loop); |
970 | 992 |
971 // fill trailing 4 bytes | 993 // Fill trailing 4 bytes. |
972 __ bind(L_fill_4_bytes); | 994 __ bind(L_fill_4_bytes); |
973 __ andi_(temp, count, 1<<shift); | 995 __ andi_(temp, count, 1<<shift); |
974 __ beq(CCR0, L_fill_2_bytes); | 996 __ beq(CCR0, L_fill_2_bytes); |
975 | 997 |
976 __ stw(value, 0, to); | 998 __ stw(value, 0, to); |
977 if (t == T_BYTE || t == T_SHORT) { | 999 if (t == T_BYTE || t == T_SHORT) { |
978 __ addi(to, to, 4); | 1000 __ addi(to, to, 4); |
979 // fill trailing 2 bytes | 1001 // Fill trailing 2 bytes. |
980 __ bind(L_fill_2_bytes); | 1002 __ bind(L_fill_2_bytes); |
981 __ andi_(temp, count, 1<<(shift-1)); | 1003 __ andi_(temp, count, 1<<(shift-1)); |
982 __ beq(CCR0, L_fill_byte); | 1004 __ beq(CCR0, L_fill_byte); |
983 __ sth(value, 0, to); | 1005 __ sth(value, 0, to); |
984 if (t == T_BYTE) { | 1006 if (t == T_BYTE) { |
985 __ addi(to, to, 2); | 1007 __ addi(to, to, 2); |
986 // fill trailing byte | 1008 // Fill trailing byte. |
987 __ bind(L_fill_byte); | 1009 __ bind(L_fill_byte); |
988 __ andi_(count, count, 1); | 1010 __ andi_(count, count, 1); |
989 __ beq(CCR0, L_exit); | 1011 __ beq(CCR0, L_exit); |
990 __ stb(value, 0, to); | 1012 __ stb(value, 0, to); |
991 } else { | 1013 } else { |
995 __ bind(L_fill_2_bytes); | 1017 __ bind(L_fill_2_bytes); |
996 } | 1018 } |
997 __ bind(L_exit); | 1019 __ bind(L_exit); |
998 __ blr(); | 1020 __ blr(); |
999 | 1021 |
1000 // Handle copies less than 8 bytes. Int is handled elsewhere. | 1022 // Handle copies less than 8 bytes. Int is handled elsewhere. |
1001 if (t == T_BYTE) { | 1023 if (t == T_BYTE) { |
1002 __ bind(L_fill_elements); | 1024 __ bind(L_fill_elements); |
1003 Label L_fill_2, L_fill_4; | 1025 Label L_fill_2, L_fill_4; |
1004 __ andi_(temp, count, 1); | 1026 __ andi_(temp, count, 1); |
1005 __ beq(CCR0, L_fill_2); | 1027 __ beq(CCR0, L_fill_2); |
1037 } | 1059 } |
1038 return start; | 1060 return start; |
1039 } | 1061 } |
1040 | 1062 |
1041 | 1063 |
1042 // Generate overlap test for array copy stubs | 1064 // Generate overlap test for array copy stubs. |
1043 // | 1065 // |
1044 // Input: | 1066 // Input: |
1045 // R3_ARG1 - from | 1067 // R3_ARG1 - from |
1046 // R4_ARG2 - to | 1068 // R4_ARG2 - to |
1047 // R5_ARG3 - element count | 1069 // R5_ARG3 - element count |
1871 } else { | 1893 } else { |
1872 array_overlap_test(nooverlap_target, 3); | 1894 array_overlap_test(nooverlap_target, 3); |
1873 generate_conjoint_long_copy_core(aligned); | 1895 generate_conjoint_long_copy_core(aligned); |
1874 } | 1896 } |
1875 | 1897 |
1876 gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1); | 1898 gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false); |
1877 | |
1878 __ blr(); | |
1879 | |
1880 return start; | 1899 return start; |
1881 } | 1900 } |
1882 | 1901 |
1883 // Generate stub for disjoint oop copy. If "aligned" is true, the | 1902 // Generate stub for disjoint oop copy. If "aligned" is true, the |
1884 // "from" and "to" addresses are assumed to be heapword aligned. | 1903 // "from" and "to" addresses are assumed to be heapword aligned. |
1904 generate_disjoint_int_copy_core(aligned); | 1923 generate_disjoint_int_copy_core(aligned); |
1905 } else { | 1924 } else { |
1906 generate_disjoint_long_copy_core(aligned); | 1925 generate_disjoint_long_copy_core(aligned); |
1907 } | 1926 } |
1908 | 1927 |
1909 gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1); | 1928 gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false); |
1910 | |
1911 __ blr(); | |
1912 | 1929 |
1913 return start; | 1930 return start; |
1914 } | 1931 } |
1915 | 1932 |
1916 void generate_arraycopy_stubs() { | 1933 void generate_arraycopy_stubs() { |