comparison src/cpu/sparc/vm/sparc.ad @ 681:fbde8ec322d0

6761600: Use sse 4.2 in intrinsics Summary: Use SSE 4.2 in intrinsics for String.{compareTo/equals/indexOf} and Arrays.equals. Reviewed-by: kvn, never, jrose
author cfang
date Tue, 31 Mar 2009 14:07:08 -0700
parents bd441136a5ce
children 6b2273dd6fa9
comparison
equal deleted inserted replaced
676:d3676b4cb78c 681:fbde8ec322d0
2998 2998
2999 // If strings are equal up to min length, return the length difference. 2999 // If strings are equal up to min length, return the length difference.
3000 __ mov(O7, result_reg); 3000 __ mov(O7, result_reg);
3001 3001
3002 // Otherwise, return the difference between the first mismatched chars. 3002 // Otherwise, return the difference between the first mismatched chars.
3003 __ bind(Ldone);
3004 %}
3005
3006 enc_class enc_String_Equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{
3007 Label Lword, Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone;
3008 MacroAssembler _masm(&cbuf);
3009
3010 Register str1_reg = reg_to_register_object($str1$$reg);
3011 Register str2_reg = reg_to_register_object($str2$$reg);
3012 Register tmp1_reg = reg_to_register_object($tmp1$$reg);
3013 Register tmp2_reg = reg_to_register_object($tmp2$$reg);
3014 Register result_reg = reg_to_register_object($result$$reg);
3015
3016 // Get the first character position in both strings
3017 // [8] char array, [12] offset, [16] count
3018 int value_offset = java_lang_String:: value_offset_in_bytes();
3019 int offset_offset = java_lang_String::offset_offset_in_bytes();
3020 int count_offset = java_lang_String:: count_offset_in_bytes();
3021
3022 // load str1 (jchar*) base address into tmp1_reg
3023 __ load_heap_oop(Address(str1_reg, 0, value_offset), tmp1_reg);
3024 __ ld(Address(str1_reg, 0, offset_offset), result_reg);
3025 __ add(tmp1_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp1_reg);
3026 __ ld(Address(str1_reg, 0, count_offset), str1_reg); // hoisted
3027 __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg);
3028 __ load_heap_oop(Address(str2_reg, 0, value_offset), tmp2_reg); // hoisted
3029 __ add(result_reg, tmp1_reg, tmp1_reg);
3030
3031 // load str2 (jchar*) base address into tmp2_reg
3032 // __ ld_ptr(Address(str2_reg, 0, value_offset), tmp2_reg); // hoisted
3033 __ ld(Address(str2_reg, 0, offset_offset), result_reg);
3034 __ add(tmp2_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp2_reg);
3035 __ ld(Address(str2_reg, 0, count_offset), str2_reg); // hoisted
3036 __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg);
3037 __ cmp(str1_reg, str2_reg); // hoisted
3038 __ add(result_reg, tmp2_reg, tmp2_reg);
3039
3040 __ sll(str1_reg, exact_log2(sizeof(jchar)), str1_reg);
3041 __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
3042 __ delayed()->mov(G0, result_reg); // not equal
3043
3044 __ br_zero(Assembler::equal, true, Assembler::pn, str1_reg, Ldone);
3045 __ delayed()->add(G0, 1, result_reg); //equals
3046
3047 __ cmp(tmp1_reg, tmp2_reg); //same string ?
3048 __ brx(Assembler::equal, true, Assembler::pn, Ldone);
3049 __ delayed()->add(G0, 1, result_reg);
3050
3051 //rename registers
3052 Register limit_reg = str1_reg;
3053 Register chr2_reg = str2_reg;
3054 Register chr1_reg = result_reg;
3055 // tmp{12} are the base pointers
3056
3057 //check for alignment and position the pointers to the ends
3058 __ or3(tmp1_reg, tmp2_reg, chr1_reg);
3059 __ andcc(chr1_reg, 0x3, chr1_reg); // notZero means at least one not 4-byte aligned
3060 __ br(Assembler::notZero, false, Assembler::pn, Lchar);
3061 __ delayed()->nop();
3062
3063 __ bind(Lword);
3064 __ and3(limit_reg, 0x2, O7); //remember the remainder (either 0 or 2)
3065 __ andn(limit_reg, 0x3, limit_reg);
3066 __ br_zero(Assembler::zero, false, Assembler::pn, limit_reg, Lpost_word);
3067 __ delayed()->nop();
3068
3069 __ add(tmp1_reg, limit_reg, tmp1_reg);
3070 __ add(tmp2_reg, limit_reg, tmp2_reg);
3071 __ neg(limit_reg);
3072
3073 __ lduw(tmp1_reg, limit_reg, chr1_reg);
3074 __ bind(Lword_loop);
3075 __ lduw(tmp2_reg, limit_reg, chr2_reg);
3076 __ cmp(chr1_reg, chr2_reg);
3077 __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
3078 __ delayed()->mov(G0, result_reg);
3079 __ inccc(limit_reg, 2*sizeof(jchar));
3080 // annul LDUW if branch i s not taken to prevent access past end of string
3081 __ br(Assembler::notZero, true, Assembler::pt, Lword_loop); //annul on taken
3082 __ delayed()->lduw(tmp1_reg, limit_reg, chr1_reg); // hoisted
3083
3084 __ bind(Lpost_word);
3085 __ br_zero(Assembler::zero, true, Assembler::pt, O7, Ldone);
3086 __ delayed()->add(G0, 1, result_reg);
3087
3088 __ lduh(tmp1_reg, 0, chr1_reg);
3089 __ lduh(tmp2_reg, 0, chr2_reg);
3090 __ cmp (chr1_reg, chr2_reg);
3091 __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
3092 __ delayed()->mov(G0, result_reg);
3093 __ ba(false,Ldone);
3094 __ delayed()->add(G0, 1, result_reg);
3095
3096 __ bind(Lchar);
3097 __ add(tmp1_reg, limit_reg, tmp1_reg);
3098 __ add(tmp2_reg, limit_reg, tmp2_reg);
3099 __ neg(limit_reg); //negate count
3100
3101 __ lduh(tmp1_reg, limit_reg, chr1_reg);
3102 __ bind(Lchar_loop);
3103 __ lduh(tmp2_reg, limit_reg, chr2_reg);
3104 __ cmp(chr1_reg, chr2_reg);
3105 __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
3106 __ delayed()->mov(G0, result_reg); //not equal
3107 __ inccc(limit_reg, sizeof(jchar));
3108 // annul LDUH if branch is not taken to prevent access past end of string
3109 __ br(Assembler::notZero, true, Assembler::pt, Lchar_loop); //annul on taken
3110 __ delayed()->lduh(tmp1_reg, limit_reg, chr1_reg); // hoisted
3111
3112 __ add(G0, 1, result_reg); //equal
3113
3114 __ bind(Ldone);
3115 %}
3116
3117 enc_class enc_Array_Equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{
3118 Label Lvector, Ldone, Lloop;
3119 MacroAssembler _masm(&cbuf);
3120
3121 Register ary1_reg = reg_to_register_object($ary1$$reg);
3122 Register ary2_reg = reg_to_register_object($ary2$$reg);
3123 Register tmp1_reg = reg_to_register_object($tmp1$$reg);
3124 Register tmp2_reg = reg_to_register_object($tmp2$$reg);
3125 Register result_reg = reg_to_register_object($result$$reg);
3126
3127 int length_offset = arrayOopDesc::length_offset_in_bytes();
3128 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3129
3130 // return true if the same array
3131 __ cmp(ary1_reg, ary2_reg);
3132 __ br(Assembler::equal, true, Assembler::pn, Ldone);
3133 __ delayed()->add(G0, 1, result_reg); // equal
3134
3135 __ br_null(ary1_reg, true, Assembler::pn, Ldone);
3136 __ delayed()->mov(G0, result_reg); // not equal
3137
3138 __ br_null(ary2_reg, true, Assembler::pn, Ldone);
3139 __ delayed()->mov(G0, result_reg); // not equal
3140
3141 //load the lengths of arrays
3142 __ ld(Address(ary1_reg, 0, length_offset), tmp1_reg);
3143 __ ld(Address(ary2_reg, 0, length_offset), tmp2_reg);
3144
3145 // return false if the two arrays are not equal length
3146 __ cmp(tmp1_reg, tmp2_reg);
3147 __ br(Assembler::notEqual, true, Assembler::pn, Ldone);
3148 __ delayed()->mov(G0, result_reg); // not equal
3149
3150 __ br_zero(Assembler::zero, true, Assembler::pn, tmp1_reg, Ldone);
3151 __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal
3152
3153 // load array addresses
3154 __ add(ary1_reg, base_offset, ary1_reg);
3155 __ add(ary2_reg, base_offset, ary2_reg);
3156
3157 // renaming registers
3158 Register chr1_reg = tmp2_reg; // for characters in ary1
3159 Register chr2_reg = result_reg; // for characters in ary2
3160 Register limit_reg = tmp1_reg; // length
3161
3162 // set byte count
3163 __ sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg);
3164 __ andcc(limit_reg, 0x2, chr1_reg); //trailing character ?
3165 __ br(Assembler::zero, false, Assembler::pt, Lvector);
3166 __ delayed()->nop();
3167
3168 //compare the trailing char
3169 __ sub(limit_reg, sizeof(jchar), limit_reg);
3170 __ lduh(ary1_reg, limit_reg, chr1_reg);
3171 __ lduh(ary2_reg, limit_reg, chr2_reg);
3172 __ cmp(chr1_reg, chr2_reg);
3173 __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
3174 __ delayed()->mov(G0, result_reg); // not equal
3175
3176 // only one char ?
3177 __ br_zero(Assembler::zero, true, Assembler::pn, limit_reg, Ldone);
3178 __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal
3179
3180 __ bind(Lvector);
3181 // Shift ary1_reg and ary2_reg to the end of the arrays, negate limit
3182 __ add(ary1_reg, limit_reg, ary1_reg);
3183 __ add(ary2_reg, limit_reg, ary2_reg);
3184 __ neg(limit_reg, limit_reg);
3185
3186 __ lduw(ary1_reg, limit_reg, chr1_reg);
3187 __ bind(Lloop);
3188 __ lduw(ary2_reg, limit_reg, chr2_reg);
3189 __ cmp(chr1_reg, chr2_reg);
3190 __ br(Assembler::notEqual, false, Assembler::pt, Ldone);
3191 __ delayed()->mov(G0, result_reg); // not equal
3192 __ inccc(limit_reg, 2*sizeof(jchar));
3193 // annul LDUW if branch is not taken to prevent access past end of string
3194 __ br(Assembler::notZero, true, Assembler::pt, Lloop); //annul on taken
3195 __ delayed()->lduw(ary1_reg, limit_reg, chr1_reg); // hoisted
3196
3197 __ add(G0, 1, result_reg); // equals
3198
3003 __ bind(Ldone); 3199 __ bind(Ldone);
3004 %} 3200 %}
3005 3201
3006 enc_class enc_rethrow() %{ 3202 enc_class enc_rethrow() %{
3007 cbuf.set_inst_mark(); 3203 cbuf.set_inst_mark();
9013 format %{ "String Compare $str1,$str2 -> $result" %} 9209 format %{ "String Compare $str1,$str2 -> $result" %}
9014 ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, result) ); 9210 ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, result) );
9015 ins_pipe(long_memory_op); 9211 ins_pipe(long_memory_op);
9016 %} 9212 %}
9017 9213
9214 instruct string_equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result,
9215 o7RegI tmp3, flagsReg ccr) %{
9216 match(Set result (StrEquals str1 str2));
9217 effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL ccr, KILL tmp3);
9218 ins_cost(300);
9219 format %{ "String Equals $str1,$str2 -> $result" %}
9220 ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, result) );
9221 ins_pipe(long_memory_op);
9222 %}
9223
9224 instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result,
9225 flagsReg ccr) %{
9226 match(Set result (AryEq ary1 ary2));
9227 effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL ccr);
9228 ins_cost(300);
9229 format %{ "Array Equals $ary1,$ary2 -> $result" %}
9230 ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result));
9231 ins_pipe(long_memory_op);
9232 %}
9018 9233
9019 //---------- Population Count Instructions ------------------------------------- 9234 //---------- Population Count Instructions -------------------------------------
9020 9235
9021 instruct popCountI(iRegI dst, iRegI src) %{ 9236 instruct popCountI(iRegI dst, iRegI src) %{
9022 predicate(UsePopCountInstruction); 9237 predicate(UsePopCountInstruction);