comparison src/cpu/x86/vm/x86_32.ad @ 681:fbde8ec322d0

6761600: Use sse 4.2 in intrinsics Summary: Use SSE 4.2 in intrinsics for String.{compareTo/equals/indexOf} and Arrays.equals. Reviewed-by: kvn, never, jrose
author cfang
date Tue, 31 Mar 2009 14:07:08 -0700
parents d0994e5bebce
children 93c14e5562c4
comparison
equal deleted inserted replaced
676:d3676b4cb78c 681:fbde8ec322d0
3692 // Avoid branch to branch on AMD processors 3692 // Avoid branch to branch on AMD processors
3693 if (EmitSync & 32768) { masm.nop() ; } 3693 if (EmitSync & 32768) { masm.nop() ; }
3694 } 3694 }
3695 %} 3695 %}
3696 3696
3697 enc_class enc_String_Compare() %{ 3697 enc_class enc_String_Compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
3698 eAXRegI tmp3, eBXRegI tmp4, eCXRegI result) %{
3698 Label ECX_GOOD_LABEL, LENGTH_DIFF_LABEL, 3699 Label ECX_GOOD_LABEL, LENGTH_DIFF_LABEL,
3699 POP_LABEL, DONE_LABEL, CONT_LABEL, 3700 POP_LABEL, DONE_LABEL, CONT_LABEL,
3700 WHILE_HEAD_LABEL; 3701 WHILE_HEAD_LABEL;
3701 MacroAssembler masm(&cbuf); 3702 MacroAssembler masm(&cbuf);
3703
3704 XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg);
3705 XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg);
3702 3706
3703 // Get the first character position in both strings 3707 // Get the first character position in both strings
3704 // [8] char array, [12] offset, [16] count 3708 // [8] char array, [12] offset, [16] count
3705 int value_offset = java_lang_String::value_offset_in_bytes(); 3709 int value_offset = java_lang_String::value_offset_in_bytes();
3706 int offset_offset = java_lang_String::offset_offset_in_bytes(); 3710 int offset_offset = java_lang_String::offset_offset_in_bytes();
3714 masm.movl(rcx, Address(rdi, offset_offset)); 3718 masm.movl(rcx, Address(rdi, offset_offset));
3715 masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset)); 3719 masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset));
3716 3720
3717 // Compute the minimum of the string lengths(rsi) and the 3721 // Compute the minimum of the string lengths(rsi) and the
3718 // difference of the string lengths (stack) 3722 // difference of the string lengths (stack)
3719
3720 3723
3721 if (VM_Version::supports_cmov()) { 3724 if (VM_Version::supports_cmov()) {
3722 masm.movl(rdi, Address(rdi, count_offset)); 3725 masm.movl(rdi, Address(rdi, count_offset));
3723 masm.movl(rsi, Address(rsi, count_offset)); 3726 masm.movl(rsi, Address(rsi, count_offset));
3724 masm.movl(rcx, rdi); 3727 masm.movl(rcx, rdi);
3729 masm.movl(rdi, Address(rdi, count_offset)); 3732 masm.movl(rdi, Address(rdi, count_offset));
3730 masm.movl(rcx, Address(rsi, count_offset)); 3733 masm.movl(rcx, Address(rsi, count_offset));
3731 masm.movl(rsi, rdi); 3734 masm.movl(rsi, rdi);
3732 masm.subl(rdi, rcx); 3735 masm.subl(rdi, rcx);
3733 masm.push(rdi); 3736 masm.push(rdi);
3734 masm.jcc(Assembler::lessEqual, ECX_GOOD_LABEL); 3737 masm.jccb(Assembler::lessEqual, ECX_GOOD_LABEL);
3735 masm.movl(rsi, rcx); 3738 masm.movl(rsi, rcx);
3736 // rsi holds min, rcx is unused 3739 // rsi holds min, rcx is unused
3737 } 3740 }
3738 3741
3739 // Is the minimum length zero? 3742 // Is the minimum length zero?
3754 { 3757 {
3755 // Check after comparing first character to see if strings are equivalent 3758 // Check after comparing first character to see if strings are equivalent
3756 Label LSkip2; 3759 Label LSkip2;
3757 // Check if the strings start at same location 3760 // Check if the strings start at same location
3758 masm.cmpptr(rbx,rax); 3761 masm.cmpptr(rbx,rax);
3759 masm.jcc(Assembler::notEqual, LSkip2); 3762 masm.jccb(Assembler::notEqual, LSkip2);
3760 3763
3761 // Check if the length difference is zero (from stack) 3764 // Check if the length difference is zero (from stack)
3762 masm.cmpl(Address(rsp, 0), 0x0); 3765 masm.cmpl(Address(rsp, 0), 0x0);
3763 masm.jcc(Assembler::equal, LENGTH_DIFF_LABEL); 3766 masm.jcc(Assembler::equal, LENGTH_DIFF_LABEL);
3764 3767
3765 // Strings might not be equivalent 3768 // Strings might not be equivalent
3766 masm.bind(LSkip2); 3769 masm.bind(LSkip2);
3767 } 3770 }
3768 3771
3769 // Shift rax, and rbx, to the end of the arrays, negate min 3772 // Advance to next character
3770 masm.lea(rax, Address(rax, rsi, Address::times_2, 2)); 3773 masm.addptr(rax, 2);
3771 masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2)); 3774 masm.addptr(rbx, 2);
3775
3776 if (UseSSE42Intrinsics) {
3777 // With SSE4.2, use double quad vector compare
3778 Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
3779 // Setup to compare 16-byte vectors
3780 masm.movl(rdi, rsi);
3781 masm.andl(rsi, 0xfffffff8); // rsi holds the vector count
3782 masm.andl(rdi, 0x00000007); // rdi holds the tail count
3783 masm.testl(rsi, rsi);
3784 masm.jccb(Assembler::zero, COMPARE_TAIL);
3785
3786 masm.lea(rax, Address(rax, rsi, Address::times_2));
3787 masm.lea(rbx, Address(rbx, rsi, Address::times_2));
3788 masm.negl(rsi);
3789
3790 masm.bind(COMPARE_VECTORS);
3791 masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2));
3792 masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2));
3793 masm.pxor(tmp1Reg, tmp2Reg);
3794 masm.ptest(tmp1Reg, tmp1Reg);
3795 masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
3796 masm.addl(rsi, 8);
3797 masm.jcc(Assembler::notZero, COMPARE_VECTORS);
3798 masm.jmpb(COMPARE_TAIL);
3799
3800 // Mismatched characters in the vectors
3801 masm.bind(VECTOR_NOT_EQUAL);
3802 masm.lea(rax, Address(rax, rsi, Address::times_2));
3803 masm.lea(rbx, Address(rbx, rsi, Address::times_2));
3804 masm.movl(rdi, 8);
3805
3806 // Compare tail (< 8 chars), or rescan last vectors to
3807 // find 1st mismatched characters
3808 masm.bind(COMPARE_TAIL);
3809 masm.testl(rdi, rdi);
3810 masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
3811 masm.movl(rsi, rdi);
3812 // Fallthru to tail compare
3813 }
3814
3815 //Shift rax, and rbx, to the end of the arrays, negate min
3816 masm.lea(rax, Address(rax, rsi, Address::times_2, 0));
3817 masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0));
3772 masm.negl(rsi); 3818 masm.negl(rsi);
3773 3819
3774 // Compare the rest of the characters 3820 // Compare the rest of the characters
3775 masm.bind(WHILE_HEAD_LABEL); 3821 masm.bind(WHILE_HEAD_LABEL);
3776 masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0)); 3822 masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
3777 masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0)); 3823 masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
3778 masm.subl(rcx, rdi); 3824 masm.subl(rcx, rdi);
3779 masm.jcc(Assembler::notZero, POP_LABEL); 3825 masm.jccb(Assembler::notZero, POP_LABEL);
3780 masm.incrementl(rsi); 3826 masm.incrementl(rsi);
3781 masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL); 3827 masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
3782 3828
3783 // Strings are equal up to min length. Return the length difference. 3829 // Strings are equal up to min length. Return the length difference.
3784 masm.bind(LENGTH_DIFF_LABEL); 3830 masm.bind(LENGTH_DIFF_LABEL);
3785 masm.pop(rcx); 3831 masm.pop(rcx);
3786 masm.jmp(DONE_LABEL); 3832 masm.jmpb(DONE_LABEL);
3787 3833
3788 // Discard the stored length difference 3834 // Discard the stored length difference
3789 masm.bind(POP_LABEL); 3835 masm.bind(POP_LABEL);
3790 masm.addptr(rsp, 4); 3836 masm.addptr(rsp, 4);
3791 3837
3792 // That's it 3838 // That's it
3793 masm.bind(DONE_LABEL); 3839 masm.bind(DONE_LABEL);
3794 %} 3840 %}
3795 3841
3796 enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result) %{ 3842 enc_class enc_String_Equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
3797 Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP; 3843 eBXRegI tmp3, eCXRegI tmp4, eAXRegI result) %{
3844 Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR;
3798 MacroAssembler masm(&cbuf); 3845 MacroAssembler masm(&cbuf);
3799 3846
3800 Register ary1Reg = as_Register($ary1$$reg); 3847 XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg);
3801 Register ary2Reg = as_Register($ary2$$reg); 3848 XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg);
3802 Register tmp1Reg = as_Register($tmp1$$reg); 3849
3803 Register tmp2Reg = as_Register($tmp2$$reg); 3850 int value_offset = java_lang_String::value_offset_in_bytes();
3804 Register resultReg = as_Register($result$$reg); 3851 int offset_offset = java_lang_String::offset_offset_in_bytes();
3852 int count_offset = java_lang_String::count_offset_in_bytes();
3853 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3854
3855 // does source == target string?
3856 masm.cmpptr(rdi, rsi);
3857 masm.jcc(Assembler::equal, RET_TRUE);
3858
3859 // get and compare counts
3860 masm.movl(rcx, Address(rdi, count_offset));
3861 masm.movl(rax, Address(rsi, count_offset));
3862 masm.cmpl(rcx, rax);
3863 masm.jcc(Assembler::notEqual, RET_FALSE);
3864 masm.testl(rax, rax);
3865 masm.jcc(Assembler::zero, RET_TRUE);
3866
3867 // get source string offset and value
3868 masm.movptr(rbx, Address(rsi, value_offset));
3869 masm.movl(rax, Address(rsi, offset_offset));
3870 masm.leal(rsi, Address(rbx, rax, Address::times_2, base_offset));
3871
3872 // get compare string offset and value
3873 masm.movptr(rbx, Address(rdi, value_offset));
3874 masm.movl(rax, Address(rdi, offset_offset));
3875 masm.leal(rdi, Address(rbx, rax, Address::times_2, base_offset));
3876
3877 // Set byte count
3878 masm.shll(rcx, 1);
3879 masm.movl(rax, rcx);
3880
3881 if (UseSSE42Intrinsics) {
3882 // With SSE4.2, use double quad vector compare
3883 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3884 // Compare 16-byte vectors
3885 masm.andl(rcx, 0xfffffff0); // vector count (in bytes)
3886 masm.andl(rax, 0x0000000e); // tail count (in bytes)
3887 masm.testl(rcx, rcx);
3888 masm.jccb(Assembler::zero, COMPARE_TAIL);
3889 masm.lea(rdi, Address(rdi, rcx, Address::times_1));
3890 masm.lea(rsi, Address(rsi, rcx, Address::times_1));
3891 masm.negl(rcx);
3892
3893 masm.bind(COMPARE_WIDE_VECTORS);
3894 masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1));
3895 masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1));
3896 masm.pxor(tmp1Reg, tmp2Reg);
3897 masm.ptest(tmp1Reg, tmp1Reg);
3898 masm.jccb(Assembler::notZero, RET_FALSE);
3899 masm.addl(rcx, 16);
3900 masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
3901 masm.bind(COMPARE_TAIL);
3902 masm.movl(rcx, rax);
3903 // Fallthru to tail compare
3904 }
3905
3906 // Compare 4-byte vectors
3907 masm.andl(rcx, 0xfffffffc); // vector count (in bytes)
3908 masm.andl(rax, 0x00000002); // tail char (in bytes)
3909 masm.testl(rcx, rcx);
3910 masm.jccb(Assembler::zero, COMPARE_CHAR);
3911 masm.lea(rdi, Address(rdi, rcx, Address::times_1));
3912 masm.lea(rsi, Address(rsi, rcx, Address::times_1));
3913 masm.negl(rcx);
3914
3915 masm.bind(COMPARE_VECTORS);
3916 masm.movl(rbx, Address(rdi, rcx, Address::times_1));
3917 masm.cmpl(rbx, Address(rsi, rcx, Address::times_1));
3918 masm.jccb(Assembler::notEqual, RET_FALSE);
3919 masm.addl(rcx, 4);
3920 masm.jcc(Assembler::notZero, COMPARE_VECTORS);
3921
3922 // Compare trailing char (final 2 bytes), if any
3923 masm.bind(COMPARE_CHAR);
3924 masm.testl(rax, rax);
3925 masm.jccb(Assembler::zero, RET_TRUE);
3926 masm.load_unsigned_short(rbx, Address(rdi, 0));
3927 masm.load_unsigned_short(rcx, Address(rsi, 0));
3928 masm.cmpl(rbx, rcx);
3929 masm.jccb(Assembler::notEqual, RET_FALSE);
3930
3931 masm.bind(RET_TRUE);
3932 masm.movl(rax, 1); // return true
3933 masm.jmpb(DONE);
3934
3935 masm.bind(RET_FALSE);
3936 masm.xorl(rax, rax); // return false
3937
3938 masm.bind(DONE);
3939 %}
3940
3941 enc_class enc_String_IndexOf(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2,
3942 eCXRegI tmp3, eDXRegI tmp4, eBXRegI result) %{
3943 // SSE4.2 version
3944 Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
3945 SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE;
3946 MacroAssembler masm(&cbuf);
3947
3948 XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg);
3949
3950 // Get the first character position in both strings
3951 // [8] char array, [12] offset, [16] count
3952 int value_offset = java_lang_String::value_offset_in_bytes();
3953 int offset_offset = java_lang_String::offset_offset_in_bytes();
3954 int count_offset = java_lang_String::count_offset_in_bytes();
3955 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3956
3957 // Get counts for string and substr
3958 masm.movl(rdx, Address(rsi, count_offset));
3959 masm.movl(rax, Address(rdi, count_offset));
3960 // Check for substr count > string count
3961 masm.cmpl(rax, rdx);
3962 masm.jcc(Assembler::greater, RET_NEG_ONE);
3963
3964 // Start the indexOf operation
3965 // Get start addr of string
3966 masm.movptr(rbx, Address(rsi, value_offset));
3967 masm.movl(rcx, Address(rsi, offset_offset));
3968 masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset));
3969 masm.push(rsi);
3970
3971 // Get start addr of substr
3972 masm.movptr(rbx, Address(rdi, value_offset));
3973 masm.movl(rcx, Address(rdi, offset_offset));
3974 masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset));
3975 masm.push(rdi);
3976 masm.push(rax);
3977 masm.jmpb(PREP_FOR_SCAN);
3978
3979 // Substr count saved at sp
3980 // Substr saved at sp+4
3981 // String saved at sp+8
3982
3983 // Prep to load substr for scan
3984 masm.bind(LOAD_SUBSTR);
3985 masm.movptr(rdi, Address(rsp, 4));
3986 masm.movl(rax, Address(rsp, 0));
3987
3988 // Load substr
3989 masm.bind(PREP_FOR_SCAN);
3990 masm.movdqu(tmp1Reg, Address(rdi, 0));
3991 masm.addl(rdx, 8); // prime the loop
3992 masm.subptr(rsi, 16);
3993
3994 // Scan string for substr in 16-byte vectors
3995 masm.bind(SCAN_TO_SUBSTR);
3996 masm.subl(rdx, 8);
3997 masm.addptr(rsi, 16);
3998 masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
3999 masm.jcc(Assembler::above, SCAN_TO_SUBSTR); // CF == 0 && ZF == 0
4000 masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
4001
4002 // Fallthru: found a potential substr
4003
4004 // Make sure string is still long enough
4005 masm.subl(rdx, rcx);
4006 masm.cmpl(rdx, rax);
4007 masm.jccb(Assembler::negative, RET_NOT_FOUND);
4008 // Compute start addr of substr
4009 masm.lea(rsi, Address(rsi, rcx, Address::times_2));
4010 masm.movptr(rbx, rsi);
4011
4012 // Compare potential substr
4013 masm.addl(rdx, 8); // prime the loop
4014 masm.addl(rax, 8);
4015 masm.subptr(rsi, 16);
4016 masm.subptr(rdi, 16);
4017
4018 // Scan 16-byte vectors of string and substr
4019 masm.bind(SCAN_SUBSTR);
4020 masm.subl(rax, 8);
4021 masm.subl(rdx, 8);
4022 masm.addptr(rsi, 16);
4023 masm.addptr(rdi, 16);
4024 masm.movdqu(tmp1Reg, Address(rdi, 0));
4025 masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
4026 masm.jcc(Assembler::noOverflow, LOAD_SUBSTR); // OF == 0
4027 masm.jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0
4028
4029 // Compute substr offset
4030 masm.movptr(rsi, Address(rsp, 8));
4031 masm.subptr(rbx, rsi);
4032 masm.shrl(rbx, 1);
4033 masm.jmpb(CLEANUP);
4034
4035 masm.bind(RET_NEG_ONE);
4036 masm.movl(rbx, -1);
4037 masm.jmpb(DONE);
4038
4039 masm.bind(RET_NOT_FOUND);
4040 masm.movl(rbx, -1);
4041
4042 masm.bind(CLEANUP);
4043 masm.addptr(rsp, 12);
4044
4045 masm.bind(DONE);
4046 %}
4047
4048 enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2,
4049 eBXRegI tmp3, eDXRegI tmp4, eAXRegI result) %{
4050 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
4051 MacroAssembler masm(&cbuf);
4052
4053 XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg);
4054 XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg);
4055 Register ary1Reg = as_Register($ary1$$reg);
4056 Register ary2Reg = as_Register($ary2$$reg);
4057 Register tmp3Reg = as_Register($tmp3$$reg);
4058 Register tmp4Reg = as_Register($tmp4$$reg);
4059 Register resultReg = as_Register($result$$reg);
3805 4060
3806 int length_offset = arrayOopDesc::length_offset_in_bytes(); 4061 int length_offset = arrayOopDesc::length_offset_in_bytes();
3807 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 4062 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3808 4063
3809 // Check the input args 4064 // Check the input args
3810 masm.cmpl(ary1Reg, ary2Reg); 4065 masm.cmpptr(ary1Reg, ary2Reg);
3811 masm.jcc(Assembler::equal, TRUE_LABEL); 4066 masm.jcc(Assembler::equal, TRUE_LABEL);
3812 masm.testl(ary1Reg, ary1Reg); 4067 masm.testptr(ary1Reg, ary1Reg);
3813 masm.jcc(Assembler::zero, FALSE_LABEL); 4068 masm.jcc(Assembler::zero, FALSE_LABEL);
3814 masm.testl(ary2Reg, ary2Reg); 4069 masm.testptr(ary2Reg, ary2Reg);
3815 masm.jcc(Assembler::zero, FALSE_LABEL); 4070 masm.jcc(Assembler::zero, FALSE_LABEL);
3816 4071
3817 // Check the lengths 4072 // Check the lengths
3818 masm.movl(tmp2Reg, Address(ary1Reg, length_offset)); 4073 masm.movl(tmp4Reg, Address(ary1Reg, length_offset));
3819 masm.movl(resultReg, Address(ary2Reg, length_offset)); 4074 masm.movl(resultReg, Address(ary2Reg, length_offset));
3820 masm.cmpl(tmp2Reg, resultReg); 4075 masm.cmpl(tmp4Reg, resultReg);
3821 masm.jcc(Assembler::notEqual, FALSE_LABEL); 4076 masm.jcc(Assembler::notEqual, FALSE_LABEL);
3822 masm.testl(resultReg, resultReg); 4077 masm.testl(resultReg, resultReg);
3823 masm.jcc(Assembler::zero, TRUE_LABEL); 4078 masm.jcc(Assembler::zero, TRUE_LABEL);
3824 4079
3825 // Get the number of 4 byte vectors to compare 4080 // Load array addrs
3826 masm.shrl(resultReg, 1); 4081 masm.lea(ary1Reg, Address(ary1Reg, base_offset));
3827 4082 masm.lea(ary2Reg, Address(ary2Reg, base_offset));
3828 // Check for odd-length arrays 4083
3829 masm.andl(tmp2Reg, 1); 4084 // Set byte count
3830 masm.testl(tmp2Reg, tmp2Reg); 4085 masm.shll(tmp4Reg, 1);
3831 masm.jcc(Assembler::zero, COMPARE_LOOP_HDR); 4086 masm.movl(resultReg, tmp4Reg);
3832 4087
3833 // Compare 2-byte "tail" at end of arrays 4088 if (UseSSE42Intrinsics) {
3834 masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); 4089 // With SSE4.2, use double quad vector compare
3835 masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); 4090 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3836 masm.cmpl(tmp1Reg, tmp2Reg); 4091 // Compare 16-byte vectors
3837 masm.jcc(Assembler::notEqual, FALSE_LABEL); 4092 masm.andl(tmp4Reg, 0xfffffff0); // vector count (in bytes)
4093 masm.andl(resultReg, 0x0000000e); // tail count (in bytes)
4094 masm.testl(tmp4Reg, tmp4Reg);
4095 masm.jccb(Assembler::zero, COMPARE_TAIL);
4096 masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4097 masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4098 masm.negl(tmp4Reg);
4099
4100 masm.bind(COMPARE_WIDE_VECTORS);
4101 masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4102 masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4103 masm.pxor(tmp1Reg, tmp2Reg);
4104 masm.ptest(tmp1Reg, tmp1Reg);
4105
4106 masm.jccb(Assembler::notZero, FALSE_LABEL);
4107 masm.addl(tmp4Reg, 16);
4108 masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
4109 masm.bind(COMPARE_TAIL);
4110 masm.movl(tmp4Reg, resultReg);
4111 // Fallthru to tail compare
4112 }
4113
4114 // Compare 4-byte vectors
4115 masm.andl(tmp4Reg, 0xfffffffc); // vector count (in bytes)
4116 masm.andl(resultReg, 0x00000002); // tail char (in bytes)
4117 masm.testl(tmp4Reg, tmp4Reg);
4118 masm.jccb(Assembler::zero, COMPARE_CHAR);
4119 masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4120 masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4121 masm.negl(tmp4Reg);
4122
4123 masm.bind(COMPARE_VECTORS);
4124 masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4125 masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4126 masm.jccb(Assembler::notEqual, FALSE_LABEL);
4127 masm.addl(tmp4Reg, 4);
4128 masm.jcc(Assembler::notZero, COMPARE_VECTORS);
4129
4130 // Compare trailing char (final 2 bytes), if any
4131 masm.bind(COMPARE_CHAR);
3838 masm.testl(resultReg, resultReg); 4132 masm.testl(resultReg, resultReg);
3839 masm.jcc(Assembler::zero, TRUE_LABEL); 4133 masm.jccb(Assembler::zero, TRUE_LABEL);
3840 4134 masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0));
3841 // Setup compare loop 4135 masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0));
3842 masm.bind(COMPARE_LOOP_HDR); 4136 masm.cmpl(tmp3Reg, tmp4Reg);
3843 // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays 4137 masm.jccb(Assembler::notEqual, FALSE_LABEL);
3844 masm.leal(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
3845 masm.leal(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
3846 masm.negl(resultReg);
3847
3848 // 4-byte-wide compare loop
3849 masm.bind(COMPARE_LOOP);
3850 masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0));
3851 masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0));
3852 masm.cmpl(ary1Reg, ary2Reg);
3853 masm.jcc(Assembler::notEqual, FALSE_LABEL);
3854 masm.increment(resultReg);
3855 masm.jcc(Assembler::notZero, COMPARE_LOOP);
3856 4138
3857 masm.bind(TRUE_LABEL); 4139 masm.bind(TRUE_LABEL);
3858 masm.movl(resultReg, 1); // return true 4140 masm.movl(resultReg, 1); // return true
3859 masm.jmp(DONE_LABEL); 4141 masm.jmpb(DONE);
3860 4142
3861 masm.bind(FALSE_LABEL); 4143 masm.bind(FALSE_LABEL);
3862 masm.xorl(resultReg, resultReg); // return false 4144 masm.xorl(resultReg, resultReg); // return false
3863 4145
3864 // That's it 4146 // That's it
3865 masm.bind(DONE_LABEL); 4147 masm.bind(DONE);
3866 %} 4148 %}
3867 4149
3868 enc_class enc_pop_rdx() %{ 4150 enc_class enc_pop_rdx() %{
3869 emit_opcode(cbuf,0x5A); 4151 emit_opcode(cbuf,0x5A);
3870 %} 4152 %}
12072 format %{ "PXOR $dst,$dst\t! replicate2F" %} 12354 format %{ "PXOR $dst,$dst\t! replicate2F" %}
12073 ins_encode( pxor(dst, dst)); 12355 ins_encode( pxor(dst, dst));
12074 ins_pipe( fpu_reg_reg ); 12356 ins_pipe( fpu_reg_reg );
12075 %} 12357 %}
12076 12358
12077
12078
12079 // ======================================================================= 12359 // =======================================================================
12080 // fast clearing of an array 12360 // fast clearing of an array
12081
12082 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 12361 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12083 match(Set dummy (ClearArray cnt base)); 12362 match(Set dummy (ClearArray cnt base));
12084 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 12363 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12085 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t" 12364 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t"
12086 "XOR EAX,EAX\n\t" 12365 "XOR EAX,EAX\n\t"
12090 OpcRegReg(0x33,EAX,EAX), 12369 OpcRegReg(0x33,EAX,EAX),
12091 Opcode(0xF3), Opcode(0xAB) ); 12370 Opcode(0xF3), Opcode(0xAB) );
12092 ins_pipe( pipe_slow ); 12371 ins_pipe( pipe_slow );
12093 %} 12372 %}
12094 12373
12095 instruct string_compare(eDIRegP str1, eSIRegP str2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{ 12374 instruct string_compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
12375 eAXRegI tmp3, eBXRegI tmp4, eCXRegI result, eFlagsReg cr) %{
12096 match(Set result (StrComp str1 str2)); 12376 match(Set result (StrComp str1 str2));
12097 effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr); 12377 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
12098 //ins_cost(300); 12378 //ins_cost(300);
12099 12379
12100 format %{ "String Compare $str1,$str2 -> $result // KILL EAX, EBX" %} 12380 format %{ "String Compare $str1,$str2 -> $result // KILL EAX, EBX" %}
12101 ins_encode( enc_String_Compare() ); 12381 ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
12382 ins_pipe( pipe_slow );
12383 %}
12384
12385 // fast string equals
12386 instruct string_equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
12387 eBXRegI tmp3, eCXRegI tmp4, eAXRegI result, eFlagsReg cr) %{
12388 match(Set result (StrEquals str1 str2));
12389 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
12390
12391 format %{ "String Equals $str1,$str2 -> $result // KILL EBX, ECX" %}
12392 ins_encode( enc_String_Equals(tmp1, tmp2, str1, str2, tmp3, tmp4, result) );
12393 ins_pipe( pipe_slow );
12394 %}
12395
12396 instruct string_indexof(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2,
12397 eCXRegI tmp3, eDXRegI tmp4, eBXRegI result, eFlagsReg cr) %{
12398 predicate(UseSSE42Intrinsics);
12399 match(Set result (StrIndexOf str1 str2));
12400 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
12401
12402 format %{ "String IndexOf $str1,$str2 -> $result // KILL EAX, ECX, EDX" %}
12403 ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
12102 ins_pipe( pipe_slow ); 12404 ins_pipe( pipe_slow );
12103 %} 12405 %}
12104 12406
12105 // fast array equals 12407 // fast array equals
12106 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{ 12408 instruct array_equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2, eBXRegI tmp3,
12409 eDXRegI tmp4, eAXRegI result, eFlagsReg cr) %{
12107 match(Set result (AryEq ary1 ary2)); 12410 match(Set result (AryEq ary1 ary2));
12108 effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr); 12411 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12109 //ins_cost(300); 12412 //ins_cost(300);
12110 12413
12111 format %{ "Array Equals $ary1,$ary2 -> $result // KILL EAX, EBX" %} 12414 format %{ "Array Equals $ary1,$ary2 -> $result // KILL EBX, EDX" %}
12112 ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) ); 12415 ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) );
12113 ins_pipe( pipe_slow ); 12416 ins_pipe( pipe_slow );
12114 %} 12417 %}
12115 12418
12116 //----------Control Flow Instructions------------------------------------------ 12419 //----------Control Flow Instructions------------------------------------------
12117 // Signed compare Instructions 12420 // Signed compare Instructions