comparison src/cpu/x86/vm/x86_64.ad @ 681:fbde8ec322d0

6761600: Use sse 4.2 in intrinsics Summary: Use SSE 4.2 in intrinsics for String.{compareTo/equals/indexOf} and Arrays.equals. Reviewed-by: kvn, never, jrose
author cfang
date Tue, 31 Mar 2009 14:07:08 -0700
parents d0994e5bebce
children 93c14e5562c4
comparison
equal deleted inserted replaced
676:d3676b4cb78c 681:fbde8ec322d0
3692 masm.nop(); // avoid branch to branch 3692 masm.nop(); // avoid branch to branch
3693 } 3693 }
3694 } 3694 }
3695 %} 3695 %}
3696 3696
3697 enc_class enc_String_Compare() 3697 enc_class enc_String_Compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
3698 %{ 3698 rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{
3699 Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL, 3699 Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL,
3700 POP_LABEL, DONE_LABEL, CONT_LABEL, 3700 POP_LABEL, DONE_LABEL, CONT_LABEL,
3701 WHILE_HEAD_LABEL; 3701 WHILE_HEAD_LABEL;
3702 MacroAssembler masm(&cbuf); 3702 MacroAssembler masm(&cbuf);
3703
3704 XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg);
3705 XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg);
3703 3706
3704 // Get the first character position in both strings 3707 // Get the first character position in both strings
3705 // [8] char array, [12] offset, [16] count 3708 // [8] char array, [12] offset, [16] count
3706 int value_offset = java_lang_String::value_offset_in_bytes(); 3709 int value_offset = java_lang_String::value_offset_in_bytes();
3707 int offset_offset = java_lang_String::offset_offset_in_bytes(); 3710 int offset_offset = java_lang_String::offset_offset_in_bytes();
3716 masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset)); 3719 masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset));
3717 3720
3718 // Compute the minimum of the string lengths(rsi) and the 3721 // Compute the minimum of the string lengths(rsi) and the
3719 // difference of the string lengths (stack) 3722 // difference of the string lengths (stack)
3720 3723
3724 // do the conditional move stuff
3721 masm.movl(rdi, Address(rdi, count_offset)); 3725 masm.movl(rdi, Address(rdi, count_offset));
3722 masm.movl(rsi, Address(rsi, count_offset)); 3726 masm.movl(rsi, Address(rsi, count_offset));
3723 masm.movl(rcx, rdi); 3727 masm.movl(rcx, rdi);
3724 masm.subl(rdi, rsi); 3728 masm.subl(rdi, rsi);
3725 masm.push(rdi); 3729 masm.push(rdi);
3743 { 3747 {
3744 // Check after comparing first character to see if strings are equivalent 3748 // Check after comparing first character to see if strings are equivalent
3745 Label LSkip2; 3749 Label LSkip2;
3746 // Check if the strings start at same location 3750 // Check if the strings start at same location
3747 masm.cmpptr(rbx, rax); 3751 masm.cmpptr(rbx, rax);
3748 masm.jcc(Assembler::notEqual, LSkip2); 3752 masm.jccb(Assembler::notEqual, LSkip2);
3749 3753
3750 // Check if the length difference is zero (from stack) 3754 // Check if the length difference is zero (from stack)
3751 masm.cmpl(Address(rsp, 0), 0x0); 3755 masm.cmpl(Address(rsp, 0), 0x0);
3752 masm.jcc(Assembler::equal, LENGTH_DIFF_LABEL); 3756 masm.jcc(Assembler::equal, LENGTH_DIFF_LABEL);
3753 3757
3754 // Strings might not be equivalent 3758 // Strings might not be equivalent
3755 masm.bind(LSkip2); 3759 masm.bind(LSkip2);
3756 } 3760 }
3757 3761
3762 // Advance to next character
3763 masm.addptr(rax, 2);
3764 masm.addptr(rbx, 2);
3765
3766 if (UseSSE42Intrinsics) {
3767 // With SSE4.2, use double quad vector compare
3768 Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
3769 // Setup to compare 16-byte vectors
3770 masm.movl(rdi, rsi);
3771 masm.andl(rsi, 0xfffffff8); // rsi holds the vector count
3772 masm.andl(rdi, 0x00000007); // rdi holds the tail count
3773 masm.testl(rsi, rsi);
3774 masm.jccb(Assembler::zero, COMPARE_TAIL);
3775
3776 masm.lea(rax, Address(rax, rsi, Address::times_2));
3777 masm.lea(rbx, Address(rbx, rsi, Address::times_2));
3778 masm.negptr(rsi);
3779
3780 masm.bind(COMPARE_VECTORS);
3781 masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2));
3782 masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2));
3783 masm.pxor(tmp1Reg, tmp2Reg);
3784 masm.ptest(tmp1Reg, tmp1Reg);
3785 masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
3786 masm.addptr(rsi, 8);
3787 masm.jcc(Assembler::notZero, COMPARE_VECTORS);
3788 masm.jmpb(COMPARE_TAIL);
3789
3790 // Mismatched characters in the vectors
3791 masm.bind(VECTOR_NOT_EQUAL);
3792 masm.lea(rax, Address(rax, rsi, Address::times_2));
3793 masm.lea(rbx, Address(rbx, rsi, Address::times_2));
3794 masm.movl(rdi, 8);
3795
3796 // Compare tail (< 8 chars), or rescan last vectors to
3797 // find 1st mismatched characters
3798 masm.bind(COMPARE_TAIL);
3799 masm.testl(rdi, rdi);
3800 masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
3801 masm.movl(rsi, rdi);
3802 // Fallthru to tail compare
3803 }
3804
3758 // Shift RAX and RBX to the end of the arrays, negate min 3805 // Shift RAX and RBX to the end of the arrays, negate min
3759 masm.lea(rax, Address(rax, rsi, Address::times_2, 2)); 3806 masm.lea(rax, Address(rax, rsi, Address::times_2, 0));
3760 masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2)); 3807 masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0));
3761 masm.negptr(rsi); 3808 masm.negptr(rsi);
3762 3809
3763 // Compare the rest of the characters 3810 // Compare the rest of the characters
3764 masm.bind(WHILE_HEAD_LABEL); 3811 masm.bind(WHILE_HEAD_LABEL);
3765 masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0)); 3812 masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
3766 masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0)); 3813 masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
3767 masm.subl(rcx, rdi); 3814 masm.subl(rcx, rdi);
3768 masm.jcc(Assembler::notZero, POP_LABEL); 3815 masm.jccb(Assembler::notZero, POP_LABEL);
3769 masm.increment(rsi); 3816 masm.increment(rsi);
3770 masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL); 3817 masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
3771 3818
3772 // Strings are equal up to min length. Return the length difference. 3819 // Strings are equal up to min length. Return the length difference.
3773 masm.bind(LENGTH_DIFF_LABEL); 3820 masm.bind(LENGTH_DIFF_LABEL);
3774 masm.pop(rcx); 3821 masm.pop(rcx);
3775 masm.jmp(DONE_LABEL); 3822 masm.jmpb(DONE_LABEL);
3776 3823
3777 // Discard the stored length difference 3824 // Discard the stored length difference
3778 masm.bind(POP_LABEL); 3825 masm.bind(POP_LABEL);
3779 masm.addptr(rsp, 8); 3826 masm.addptr(rsp, 8);
3780 3827
3781 // That's it 3828 // That's it
3782 masm.bind(DONE_LABEL); 3829 masm.bind(DONE_LABEL);
3783 %} 3830 %}
3784 3831
3785 enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, rbx_RegI tmp2, rcx_RegI result) %{ 3832 enc_class enc_String_IndexOf(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2,
3786 Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP; 3833 rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result) %{
3834 // SSE4.2 version
3835 Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
3836 SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE;
3787 MacroAssembler masm(&cbuf); 3837 MacroAssembler masm(&cbuf);
3788 3838
3789 Register ary1Reg = as_Register($ary1$$reg); 3839 XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg);
3790 Register ary2Reg = as_Register($ary2$$reg); 3840
3791 Register tmp1Reg = as_Register($tmp1$$reg); 3841 // Get the first character position in both strings
3792 Register tmp2Reg = as_Register($tmp2$$reg); 3842 // [8] char array, [12] offset, [16] count
3793 Register resultReg = as_Register($result$$reg); 3843 int value_offset = java_lang_String::value_offset_in_bytes();
3844 int offset_offset = java_lang_String::offset_offset_in_bytes();
3845 int count_offset = java_lang_String::count_offset_in_bytes();
3846 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3847
3848 // Get counts for string and substr
3849 masm.movl(rdx, Address(rsi, count_offset));
3850 masm.movl(rax, Address(rdi, count_offset));
3851 // Check for substr count > string count
3852 masm.cmpl(rax, rdx);
3853 masm.jcc(Assembler::greater, RET_NEG_ONE);
3854
3855 // Start the indexOf operation
3856 // Get start addr of string
3857 masm.load_heap_oop(rbx, Address(rsi, value_offset));
3858 masm.movl(rcx, Address(rsi, offset_offset));
3859 masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset));
3860 masm.push(rsi);
3861
3862 // Get start addr of substr
3863 masm.load_heap_oop(rbx, Address(rdi, value_offset));
3864 masm.movl(rcx, Address(rdi, offset_offset));
3865 masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset));
3866 masm.push(rdi);
3867 masm.push(rax);
3868 masm.jmpb(PREP_FOR_SCAN);
3869
3870 // Substr count saved at sp
3871 // Substr saved at sp+8
3872 // String saved at sp+16
3873
3874 // Prep to load substr for scan
3875 masm.bind(LOAD_SUBSTR);
3876 masm.movptr(rdi, Address(rsp, 8));
3877 masm.movl(rax, Address(rsp, 0));
3878
3879 // Load substr
3880 masm.bind(PREP_FOR_SCAN);
3881 masm.movdqu(tmp1Reg, Address(rdi, 0));
3882 masm.addq(rdx, 8); // prime the loop
3883 masm.subptr(rsi, 16);
3884
3885 // Scan string for substr in 16-byte vectors
3886 masm.bind(SCAN_TO_SUBSTR);
3887 masm.subq(rdx, 8);
3888 masm.addptr(rsi, 16);
3889 masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
3890 masm.jcc(Assembler::above, SCAN_TO_SUBSTR);
3891 masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND);
3892
3893 // Fallthru: found a potential substr
3894
3895 //Make sure string is still long enough
3896 masm.subl(rdx, rcx);
3897 masm.cmpl(rdx, rax);
3898 masm.jccb(Assembler::negative, RET_NOT_FOUND);
3899 // Compute start addr of substr
3900 masm.lea(rsi, Address(rsi, rcx, Address::times_2));
3901 masm.movptr(rbx, rsi);
3902
3903 // Compare potential substr
3904 masm.addq(rdx, 8); // prime the loop
3905 masm.addq(rax, 8);
3906 masm.subptr(rsi, 16);
3907 masm.subptr(rdi, 16);
3908
3909 // Scan 16-byte vectors of string and substr
3910 masm.bind(SCAN_SUBSTR);
3911 masm.subq(rax, 8);
3912 masm.subq(rdx, 8);
3913 masm.addptr(rsi, 16);
3914 masm.addptr(rdi, 16);
3915 masm.movdqu(tmp1Reg, Address(rdi, 0));
3916 masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
3917 masm.jcc(Assembler::noOverflow, LOAD_SUBSTR); // OF == 0
3918 masm.jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0
3919
3920 // Compute substr offset
3921 masm.movptr(rsi, Address(rsp, 16));
3922 masm.subptr(rbx, rsi);
3923 masm.shrl(rbx, 1);
3924 masm.jmpb(CLEANUP);
3925
3926 masm.bind(RET_NEG_ONE);
3927 masm.movl(rbx, -1);
3928 masm.jmpb(DONE);
3929
3930 masm.bind(RET_NOT_FOUND);
3931 masm.movl(rbx, -1);
3932
3933 masm.bind(CLEANUP);
3934 masm.addptr(rsp, 24);
3935
3936 masm.bind(DONE);
3937 %}
3938
3939 enc_class enc_String_Equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
3940 rbx_RegI tmp3, rcx_RegI tmp2, rax_RegI result) %{
3941 Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR;
3942 MacroAssembler masm(&cbuf);
3943
3944 XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg);
3945 XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg);
3946
3947 int value_offset = java_lang_String::value_offset_in_bytes();
3948 int offset_offset = java_lang_String::offset_offset_in_bytes();
3949 int count_offset = java_lang_String::count_offset_in_bytes();
3950 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3951
3952 // does source == target string?
3953 masm.cmpptr(rdi, rsi);
3954 masm.jcc(Assembler::equal, RET_TRUE);
3955
3956 // get and compare counts
3957 masm.movl(rcx, Address(rdi, count_offset));
3958 masm.movl(rax, Address(rsi, count_offset));
3959 masm.cmpl(rcx, rax);
3960 masm.jcc(Assembler::notEqual, RET_FALSE);
3961 masm.testl(rax, rax);
3962 masm.jcc(Assembler::zero, RET_TRUE);
3963
3964 // get source string offset and value
3965 masm.load_heap_oop(rbx, Address(rsi, value_offset));
3966 masm.movl(rax, Address(rsi, offset_offset));
3967 masm.lea(rsi, Address(rbx, rax, Address::times_2, base_offset));
3968
3969 // get compare string offset and value
3970 masm.load_heap_oop(rbx, Address(rdi, value_offset));
3971 masm.movl(rax, Address(rdi, offset_offset));
3972 masm.lea(rdi, Address(rbx, rax, Address::times_2, base_offset));
3973
3974 // Set byte count
3975 masm.shll(rcx, 1);
3976 masm.movl(rax, rcx);
3977
3978 if (UseSSE42Intrinsics) {
3979 // With SSE4.2, use double quad vector compare
3980 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3981 // Compare 16-byte vectors
3982 masm.andl(rcx, 0xfffffff0); // vector count (in bytes)
3983 masm.andl(rax, 0x0000000e); // tail count (in bytes)
3984 masm.testl(rcx, rcx);
3985 masm.jccb(Assembler::zero, COMPARE_TAIL);
3986 masm.lea(rdi, Address(rdi, rcx, Address::times_1));
3987 masm.lea(rsi, Address(rsi, rcx, Address::times_1));
3988 masm.negptr(rcx);
3989
3990 masm.bind(COMPARE_WIDE_VECTORS);
3991 masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1));
3992 masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1));
3993 masm.pxor(tmp1Reg, tmp2Reg);
3994 masm.ptest(tmp1Reg, tmp1Reg);
3995 masm.jccb(Assembler::notZero, RET_FALSE);
3996 masm.addptr(rcx, 16);
3997 masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
3998 masm.bind(COMPARE_TAIL);
3999 masm.movl(rcx, rax);
4000 // Fallthru to tail compare
4001 }
4002
4003 // Compare 4-byte vectors
4004 masm.andl(rcx, 0xfffffffc); // vector count (in bytes)
4005 masm.andl(rax, 0x00000002); // tail char (in bytes)
4006 masm.testl(rcx, rcx);
4007 masm.jccb(Assembler::zero, COMPARE_CHAR);
4008 masm.lea(rdi, Address(rdi, rcx, Address::times_1));
4009 masm.lea(rsi, Address(rsi, rcx, Address::times_1));
4010 masm.negptr(rcx);
4011
4012 masm.bind(COMPARE_VECTORS);
4013 masm.movl(rbx, Address(rdi, rcx, Address::times_1));
4014 masm.cmpl(rbx, Address(rsi, rcx, Address::times_1));
4015 masm.jccb(Assembler::notEqual, RET_FALSE);
4016 masm.addptr(rcx, 4);
4017 masm.jcc(Assembler::notZero, COMPARE_VECTORS);
4018
4019 // Compare trailing char (final 2 bytes), if any
4020 masm.bind(COMPARE_CHAR);
4021 masm.testl(rax, rax);
4022 masm.jccb(Assembler::zero, RET_TRUE);
4023 masm.load_unsigned_short(rbx, Address(rdi, 0));
4024 masm.load_unsigned_short(rcx, Address(rsi, 0));
4025 masm.cmpl(rbx, rcx);
4026 masm.jccb(Assembler::notEqual, RET_FALSE);
4027
4028 masm.bind(RET_TRUE);
4029 masm.movl(rax, 1); // return true
4030 masm.jmpb(DONE);
4031
4032 masm.bind(RET_FALSE);
4033 masm.xorl(rax, rax); // return false
4034
4035 masm.bind(DONE);
4036 %}
4037
4038 enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2,
4039 rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{
4040 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
4041 MacroAssembler masm(&cbuf);
4042
4043 XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg);
4044 XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg);
4045 Register ary1Reg = as_Register($ary1$$reg);
4046 Register ary2Reg = as_Register($ary2$$reg);
4047 Register tmp3Reg = as_Register($tmp3$$reg);
4048 Register tmp4Reg = as_Register($tmp4$$reg);
4049 Register resultReg = as_Register($result$$reg);
3794 4050
3795 int length_offset = arrayOopDesc::length_offset_in_bytes(); 4051 int length_offset = arrayOopDesc::length_offset_in_bytes();
3796 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 4052 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3797 4053
3798 // Check the input args 4054 // Check the input args
3799 masm.cmpq(ary1Reg, ary2Reg); 4055 masm.cmpq(ary1Reg, ary2Reg);
3800 masm.jcc(Assembler::equal, TRUE_LABEL); 4056 masm.jcc(Assembler::equal, TRUE_LABEL);
3801 masm.testq(ary1Reg, ary1Reg); 4057 masm.testq(ary1Reg, ary1Reg);
3802 masm.jcc(Assembler::zero, FALSE_LABEL); 4058 masm.jcc(Assembler::zero, FALSE_LABEL);
3803 masm.testq(ary2Reg, ary2Reg); 4059 masm.testq(ary2Reg, ary2Reg);
3804 masm.jcc(Assembler::zero, FALSE_LABEL); 4060 masm.jcc(Assembler::zero, FALSE_LABEL);
3805 4061
3806 // Check the lengths 4062 // Check the lengths
3807 masm.movl(tmp2Reg, Address(ary1Reg, length_offset)); 4063 masm.movl(tmp4Reg, Address(ary1Reg, length_offset));
3808 masm.movl(resultReg, Address(ary2Reg, length_offset)); 4064 masm.movl(resultReg, Address(ary2Reg, length_offset));
3809 masm.cmpl(tmp2Reg, resultReg); 4065 masm.cmpl(tmp4Reg, resultReg);
3810 masm.jcc(Assembler::notEqual, FALSE_LABEL); 4066 masm.jcc(Assembler::notEqual, FALSE_LABEL);
3811 masm.testl(resultReg, resultReg); 4067 masm.testl(resultReg, resultReg);
3812 masm.jcc(Assembler::zero, TRUE_LABEL); 4068 masm.jcc(Assembler::zero, TRUE_LABEL);
3813 4069
3814 // Get the number of 4 byte vectors to compare 4070 //load array address
3815 masm.shrl(resultReg, 1); 4071 masm.lea(ary1Reg, Address(ary1Reg, base_offset));
3816 4072 masm.lea(ary2Reg, Address(ary2Reg, base_offset));
3817 // Check for odd-length arrays 4073
3818 masm.andl(tmp2Reg, 1); 4074 //set byte count
3819 masm.testl(tmp2Reg, tmp2Reg); 4075 masm.shll(tmp4Reg, 1);
3820 masm.jcc(Assembler::zero, COMPARE_LOOP_HDR); 4076 masm.movl(resultReg,tmp4Reg);
3821 4077
3822 // Compare 2-byte "tail" at end of arrays 4078 if (UseSSE42Intrinsics){
3823 masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); 4079 // With SSE4.2, use double quad vector compare
3824 masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); 4080 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3825 masm.cmpl(tmp1Reg, tmp2Reg); 4081 // Compare 16-byte vectors
3826 masm.jcc(Assembler::notEqual, FALSE_LABEL); 4082 masm.andl(tmp4Reg, 0xfffffff0); // vector count (in bytes)
4083 masm.andl(resultReg, 0x0000000e); // tail count (in bytes)
4084 masm.testl(tmp4Reg, tmp4Reg);
4085 masm.jccb(Assembler::zero, COMPARE_TAIL);
4086 masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4087 masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4088 masm.negptr(tmp4Reg);
4089
4090 masm.bind(COMPARE_WIDE_VECTORS);
4091 masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4092 masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4093 masm.pxor(tmp1Reg, tmp2Reg);
4094 masm.ptest(tmp1Reg, tmp1Reg);
4095
4096 masm.jccb(Assembler::notZero, FALSE_LABEL);
4097 masm.addptr(tmp4Reg, 16);
4098 masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
4099 masm.bind(COMPARE_TAIL);
4100 masm.movl(tmp4Reg, resultReg);
4101 // Fallthru to tail compare
4102 }
4103
4104 // Compare 4-byte vectors
4105 masm.andl(tmp4Reg, 0xfffffffc); // vector count (in bytes)
4106 masm.andl(resultReg, 0x00000002); // tail char (in bytes)
4107 masm.testl(tmp4Reg, tmp4Reg); //if tmp2 == 0, only compare char
4108 masm.jccb(Assembler::zero, COMPARE_CHAR);
4109 masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4110 masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4111 masm.negptr(tmp4Reg);
4112
4113 masm.bind(COMPARE_VECTORS);
4114 masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4115 masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4116 masm.jccb(Assembler::notEqual, FALSE_LABEL);
4117 masm.addptr(tmp4Reg, 4);
4118 masm.jcc(Assembler::notZero, COMPARE_VECTORS);
4119
4120 // Compare trailing char (final 2 bytes), if any
4121 masm.bind(COMPARE_CHAR);
3827 masm.testl(resultReg, resultReg); 4122 masm.testl(resultReg, resultReg);
3828 masm.jcc(Assembler::zero, TRUE_LABEL); 4123 masm.jccb(Assembler::zero, TRUE_LABEL);
3829 4124 masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0));
3830 // Setup compare loop 4125 masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0));
3831 masm.bind(COMPARE_LOOP_HDR); 4126 masm.cmpl(tmp3Reg, tmp4Reg);
3832 // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays 4127 masm.jccb(Assembler::notEqual, FALSE_LABEL);
3833 masm.leaq(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
3834 masm.leaq(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
3835 masm.negq(resultReg);
3836
3837 // 4-byte-wide compare loop
3838 masm.bind(COMPARE_LOOP);
3839 masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0));
3840 masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0));
3841 masm.cmpl(ary1Reg, ary2Reg);
3842 masm.jcc(Assembler::notEqual, FALSE_LABEL);
3843 masm.incrementq(resultReg);
3844 masm.jcc(Assembler::notZero, COMPARE_LOOP);
3845 4128
3846 masm.bind(TRUE_LABEL); 4129 masm.bind(TRUE_LABEL);
3847 masm.movl(resultReg, 1); // return true 4130 masm.movl(resultReg, 1); // return true
3848 masm.jmp(DONE_LABEL); 4131 masm.jmpb(DONE);
3849 4132
3850 masm.bind(FALSE_LABEL); 4133 masm.bind(FALSE_LABEL);
3851 masm.xorl(resultReg, resultReg); // return false 4134 masm.xorl(resultReg, resultReg); // return false
3852 4135
3853 // That's it 4136 // That's it
3854 masm.bind(DONE_LABEL); 4137 masm.bind(DONE);
3855 %} 4138 %}
3856 4139
3857 enc_class enc_rethrow() 4140 enc_class enc_rethrow()
3858 %{ 4141 %{
3859 cbuf.set_inst_mark(); 4142 cbuf.set_inst_mark();
5085 format %{ %} 5368 format %{ %}
5086 interface(REG_INTER); 5369 interface(REG_INTER);
5087 %} 5370 %}
5088 5371
5089 // Double register operands 5372 // Double register operands
5090 operand regD() 5373 operand regD()
5091 %{ 5374 %{
5092 constraint(ALLOC_IN_RC(double_reg)); 5375 constraint(ALLOC_IN_RC(double_reg));
5093 match(RegD); 5376 match(RegD);
5094 5377
5095 format %{ %} 5378 format %{ %}
11538 ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax 11821 ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11539 Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos 11822 Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11540 ins_pipe(pipe_slow); 11823 ins_pipe(pipe_slow);
11541 %} 11824 %}
11542 11825
11543 instruct string_compare(rdi_RegP str1, rsi_RegP str2, rax_RegI tmp1, 11826 instruct string_compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
11544 rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr) 11827 rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr)
11545 %{ 11828 %{
11546 match(Set result (StrComp str1 str2)); 11829 match(Set result (StrComp str1 str2));
11547 effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr); 11830 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
11548 //ins_cost(300); 11831 //ins_cost(300);
11549 11832
11550 format %{ "String Compare $str1, $str2 -> $result // XXX KILL RAX, RBX" %} 11833 format %{ "String Compare $str1, $str2 -> $result // XXX KILL RAX, RBX" %}
11551 ins_encode( enc_String_Compare() ); 11834 ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
11552 ins_pipe( pipe_slow ); 11835 ins_pipe( pipe_slow );
11553 %} 11836 %}
11554 11837
11838 instruct string_indexof(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2,
11839 rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result, rFlagsReg cr)
11840 %{
11841 predicate(UseSSE42Intrinsics);
11842 match(Set result (StrIndexOf str1 str2));
11843 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
11844
11845 format %{ "String IndexOf $str1,$str2 -> $result // KILL RAX, RCX, RDX" %}
11846 ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
11847 ins_pipe( pipe_slow );
11848 %}
11849
11850 // fast string equals
11851 instruct string_equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, rbx_RegI tmp3,
11852 rcx_RegI tmp4, rax_RegI result, rFlagsReg cr)
11853 %{
11854 match(Set result (StrEquals str1 str2));
11855 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
11856
11857 format %{ "String Equals $str1,$str2 -> $result // KILL RBX, RCX" %}
11858 ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
11859 ins_pipe( pipe_slow );
11860 %}
11861
11555 // fast array equals 11862 // fast array equals
11556 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, 11863 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2, rax_RegI tmp3,
11557 rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr) %{ 11864 rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr)
11865 %{
11558 match(Set result (AryEq ary1 ary2)); 11866 match(Set result (AryEq ary1 ary2));
11559 effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr); 11867 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11560 //ins_cost(300); 11868 //ins_cost(300);
11561 11869
11562 format %{ "Array Equals $ary1,$ary2 -> $result // KILL RAX, RBX" %} 11870 format %{ "Array Equals $ary1,$ary2 -> $result // KILL RAX, RBX" %}
11563 ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) ); 11871 ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) );
11564 ins_pipe( pipe_slow ); 11872 ins_pipe( pipe_slow );
11565 %} 11873 %}
11566 11874
11567 //----------Control Flow Instructions------------------------------------------ 11875 //----------Control Flow Instructions------------------------------------------
11568 // Signed compare Instructions 11876 // Signed compare Instructions