Mercurial > hg > truffle
comparison src/cpu/sparc/vm/sparc.ad @ 681:fbde8ec322d0
6761600: Use sse 4.2 in intrinsics
Summary: Use SSE 4.2 in intrinsics for String.{compareTo/equals/indexOf} and Arrays.equals.
Reviewed-by: kvn, never, jrose
author | cfang |
---|---|
date | Tue, 31 Mar 2009 14:07:08 -0700 |
parents | bd441136a5ce |
children | 6b2273dd6fa9 |
comparison
equal
deleted
inserted
replaced
676:d3676b4cb78c | 681:fbde8ec322d0 |
---|---|
2998 | 2998 |
2999 // If strings are equal up to min length, return the length difference. | 2999 // If strings are equal up to min length, return the length difference. |
3000 __ mov(O7, result_reg); | 3000 __ mov(O7, result_reg); |
3001 | 3001 |
3002 // Otherwise, return the difference between the first mismatched chars. | 3002 // Otherwise, return the difference between the first mismatched chars. |
3003 __ bind(Ldone); | |
3004 %} | |
3005 | |
3006 enc_class enc_String_Equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{ | |
3007 Label Lword, Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone; | |
3008 MacroAssembler _masm(&cbuf); | |
3009 | |
3010 Register str1_reg = reg_to_register_object($str1$$reg); | |
3011 Register str2_reg = reg_to_register_object($str2$$reg); | |
3012 Register tmp1_reg = reg_to_register_object($tmp1$$reg); | |
3013 Register tmp2_reg = reg_to_register_object($tmp2$$reg); | |
3014 Register result_reg = reg_to_register_object($result$$reg); | |
3015 | |
3016 // Get the first character position in both strings | |
3017 // [8] char array, [12] offset, [16] count | |
3018 int value_offset = java_lang_String:: value_offset_in_bytes(); | |
3019 int offset_offset = java_lang_String::offset_offset_in_bytes(); | |
3020 int count_offset = java_lang_String:: count_offset_in_bytes(); | |
3021 | |
3022 // load str1 (jchar*) base address into tmp1_reg | |
3023 __ load_heap_oop(Address(str1_reg, 0, value_offset), tmp1_reg); | |
3024 __ ld(Address(str1_reg, 0, offset_offset), result_reg); | |
3025 __ add(tmp1_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp1_reg); | |
3026 __ ld(Address(str1_reg, 0, count_offset), str1_reg); // hoisted | |
3027 __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg); | |
3028 __ load_heap_oop(Address(str2_reg, 0, value_offset), tmp2_reg); // hoisted | |
3029 __ add(result_reg, tmp1_reg, tmp1_reg); | |
3030 | |
3031 // load str2 (jchar*) base address into tmp2_reg | |
3032 // __ ld_ptr(Address(str2_reg, 0, value_offset), tmp2_reg); // hoisted | |
3033 __ ld(Address(str2_reg, 0, offset_offset), result_reg); | |
3034 __ add(tmp2_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp2_reg); | |
3035 __ ld(Address(str2_reg, 0, count_offset), str2_reg); // hoisted | |
3036 __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg); | |
3037 __ cmp(str1_reg, str2_reg); // hoisted | |
3038 __ add(result_reg, tmp2_reg, tmp2_reg); | |
3039 | |
3040 __ sll(str1_reg, exact_log2(sizeof(jchar)), str1_reg); | |
3041 __ br(Assembler::notEqual, true, Assembler::pt, Ldone); | |
3042 __ delayed()->mov(G0, result_reg); // not equal | |
3043 | |
3044 __ br_zero(Assembler::equal, true, Assembler::pn, str1_reg, Ldone); | |
3045 __ delayed()->add(G0, 1, result_reg); //equals | |
3046 | |
3047 __ cmp(tmp1_reg, tmp2_reg); //same string ? | |
3048 __ brx(Assembler::equal, true, Assembler::pn, Ldone); | |
3049 __ delayed()->add(G0, 1, result_reg); | |
3050 | |
3051 //rename registers | |
3052 Register limit_reg = str1_reg; | |
3053 Register chr2_reg = str2_reg; | |
3054 Register chr1_reg = result_reg; | |
3055 // tmp{12} are the base pointers | |
3056 | |
3057 //check for alignment and position the pointers to the ends | |
3058 __ or3(tmp1_reg, tmp2_reg, chr1_reg); | |
3059 __ andcc(chr1_reg, 0x3, chr1_reg); // notZero means at least one not 4-byte aligned | |
3060 __ br(Assembler::notZero, false, Assembler::pn, Lchar); | |
3061 __ delayed()->nop(); | |
3062 | |
3063 __ bind(Lword); | |
3064 __ and3(limit_reg, 0x2, O7); //remember the remainder (either 0 or 2) | |
3065 __ andn(limit_reg, 0x3, limit_reg); | |
3066 __ br_zero(Assembler::zero, false, Assembler::pn, limit_reg, Lpost_word); | |
3067 __ delayed()->nop(); | |
3068 | |
3069 __ add(tmp1_reg, limit_reg, tmp1_reg); | |
3070 __ add(tmp2_reg, limit_reg, tmp2_reg); | |
3071 __ neg(limit_reg); | |
3072 | |
3073 __ lduw(tmp1_reg, limit_reg, chr1_reg); | |
3074 __ bind(Lword_loop); | |
3075 __ lduw(tmp2_reg, limit_reg, chr2_reg); | |
3076 __ cmp(chr1_reg, chr2_reg); | |
3077 __ br(Assembler::notEqual, true, Assembler::pt, Ldone); | |
3078 __ delayed()->mov(G0, result_reg); | |
3079 __ inccc(limit_reg, 2*sizeof(jchar)); | |
3080 // annul LDUW if branch i s not taken to prevent access past end of string | |
3081 __ br(Assembler::notZero, true, Assembler::pt, Lword_loop); //annul on taken | |
3082 __ delayed()->lduw(tmp1_reg, limit_reg, chr1_reg); // hoisted | |
3083 | |
3084 __ bind(Lpost_word); | |
3085 __ br_zero(Assembler::zero, true, Assembler::pt, O7, Ldone); | |
3086 __ delayed()->add(G0, 1, result_reg); | |
3087 | |
3088 __ lduh(tmp1_reg, 0, chr1_reg); | |
3089 __ lduh(tmp2_reg, 0, chr2_reg); | |
3090 __ cmp (chr1_reg, chr2_reg); | |
3091 __ br(Assembler::notEqual, true, Assembler::pt, Ldone); | |
3092 __ delayed()->mov(G0, result_reg); | |
3093 __ ba(false,Ldone); | |
3094 __ delayed()->add(G0, 1, result_reg); | |
3095 | |
3096 __ bind(Lchar); | |
3097 __ add(tmp1_reg, limit_reg, tmp1_reg); | |
3098 __ add(tmp2_reg, limit_reg, tmp2_reg); | |
3099 __ neg(limit_reg); //negate count | |
3100 | |
3101 __ lduh(tmp1_reg, limit_reg, chr1_reg); | |
3102 __ bind(Lchar_loop); | |
3103 __ lduh(tmp2_reg, limit_reg, chr2_reg); | |
3104 __ cmp(chr1_reg, chr2_reg); | |
3105 __ br(Assembler::notEqual, true, Assembler::pt, Ldone); | |
3106 __ delayed()->mov(G0, result_reg); //not equal | |
3107 __ inccc(limit_reg, sizeof(jchar)); | |
3108 // annul LDUH if branch is not taken to prevent access past end of string | |
3109 __ br(Assembler::notZero, true, Assembler::pt, Lchar_loop); //annul on taken | |
3110 __ delayed()->lduh(tmp1_reg, limit_reg, chr1_reg); // hoisted | |
3111 | |
3112 __ add(G0, 1, result_reg); //equal | |
3113 | |
3114 __ bind(Ldone); | |
3115 %} | |
3116 | |
3117 enc_class enc_Array_Equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{ | |
3118 Label Lvector, Ldone, Lloop; | |
3119 MacroAssembler _masm(&cbuf); | |
3120 | |
3121 Register ary1_reg = reg_to_register_object($ary1$$reg); | |
3122 Register ary2_reg = reg_to_register_object($ary2$$reg); | |
3123 Register tmp1_reg = reg_to_register_object($tmp1$$reg); | |
3124 Register tmp2_reg = reg_to_register_object($tmp2$$reg); | |
3125 Register result_reg = reg_to_register_object($result$$reg); | |
3126 | |
3127 int length_offset = arrayOopDesc::length_offset_in_bytes(); | |
3128 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); | |
3129 | |
3130 // return true if the same array | |
3131 __ cmp(ary1_reg, ary2_reg); | |
3132 __ br(Assembler::equal, true, Assembler::pn, Ldone); | |
3133 __ delayed()->add(G0, 1, result_reg); // equal | |
3134 | |
3135 __ br_null(ary1_reg, true, Assembler::pn, Ldone); | |
3136 __ delayed()->mov(G0, result_reg); // not equal | |
3137 | |
3138 __ br_null(ary2_reg, true, Assembler::pn, Ldone); | |
3139 __ delayed()->mov(G0, result_reg); // not equal | |
3140 | |
3141 //load the lengths of arrays | |
3142 __ ld(Address(ary1_reg, 0, length_offset), tmp1_reg); | |
3143 __ ld(Address(ary2_reg, 0, length_offset), tmp2_reg); | |
3144 | |
3145 // return false if the two arrays are not equal length | |
3146 __ cmp(tmp1_reg, tmp2_reg); | |
3147 __ br(Assembler::notEqual, true, Assembler::pn, Ldone); | |
3148 __ delayed()->mov(G0, result_reg); // not equal | |
3149 | |
3150 __ br_zero(Assembler::zero, true, Assembler::pn, tmp1_reg, Ldone); | |
3151 __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal | |
3152 | |
3153 // load array addresses | |
3154 __ add(ary1_reg, base_offset, ary1_reg); | |
3155 __ add(ary2_reg, base_offset, ary2_reg); | |
3156 | |
3157 // renaming registers | |
3158 Register chr1_reg = tmp2_reg; // for characters in ary1 | |
3159 Register chr2_reg = result_reg; // for characters in ary2 | |
3160 Register limit_reg = tmp1_reg; // length | |
3161 | |
3162 // set byte count | |
3163 __ sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg); | |
3164 __ andcc(limit_reg, 0x2, chr1_reg); //trailing character ? | |
3165 __ br(Assembler::zero, false, Assembler::pt, Lvector); | |
3166 __ delayed()->nop(); | |
3167 | |
3168 //compare the trailing char | |
3169 __ sub(limit_reg, sizeof(jchar), limit_reg); | |
3170 __ lduh(ary1_reg, limit_reg, chr1_reg); | |
3171 __ lduh(ary2_reg, limit_reg, chr2_reg); | |
3172 __ cmp(chr1_reg, chr2_reg); | |
3173 __ br(Assembler::notEqual, true, Assembler::pt, Ldone); | |
3174 __ delayed()->mov(G0, result_reg); // not equal | |
3175 | |
3176 // only one char ? | |
3177 __ br_zero(Assembler::zero, true, Assembler::pn, limit_reg, Ldone); | |
3178 __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal | |
3179 | |
3180 __ bind(Lvector); | |
3181 // Shift ary1_reg and ary2_reg to the end of the arrays, negate limit | |
3182 __ add(ary1_reg, limit_reg, ary1_reg); | |
3183 __ add(ary2_reg, limit_reg, ary2_reg); | |
3184 __ neg(limit_reg, limit_reg); | |
3185 | |
3186 __ lduw(ary1_reg, limit_reg, chr1_reg); | |
3187 __ bind(Lloop); | |
3188 __ lduw(ary2_reg, limit_reg, chr2_reg); | |
3189 __ cmp(chr1_reg, chr2_reg); | |
3190 __ br(Assembler::notEqual, false, Assembler::pt, Ldone); | |
3191 __ delayed()->mov(G0, result_reg); // not equal | |
3192 __ inccc(limit_reg, 2*sizeof(jchar)); | |
3193 // annul LDUW if branch is not taken to prevent access past end of string | |
3194 __ br(Assembler::notZero, true, Assembler::pt, Lloop); //annul on taken | |
3195 __ delayed()->lduw(ary1_reg, limit_reg, chr1_reg); // hoisted | |
3196 | |
3197 __ add(G0, 1, result_reg); // equals | |
3198 | |
3003 __ bind(Ldone); | 3199 __ bind(Ldone); |
3004 %} | 3200 %} |
3005 | 3201 |
3006 enc_class enc_rethrow() %{ | 3202 enc_class enc_rethrow() %{ |
3007 cbuf.set_inst_mark(); | 3203 cbuf.set_inst_mark(); |
9013 format %{ "String Compare $str1,$str2 -> $result" %} | 9209 format %{ "String Compare $str1,$str2 -> $result" %} |
9014 ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, result) ); | 9210 ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, result) ); |
9015 ins_pipe(long_memory_op); | 9211 ins_pipe(long_memory_op); |
9016 %} | 9212 %} |
9017 | 9213 |
9214 instruct string_equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result, | |
9215 o7RegI tmp3, flagsReg ccr) %{ | |
9216 match(Set result (StrEquals str1 str2)); | |
9217 effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL ccr, KILL tmp3); | |
9218 ins_cost(300); | |
9219 format %{ "String Equals $str1,$str2 -> $result" %} | |
9220 ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, result) ); | |
9221 ins_pipe(long_memory_op); | |
9222 %} | |
9223 | |
9224 instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result, | |
9225 flagsReg ccr) %{ | |
9226 match(Set result (AryEq ary1 ary2)); | |
9227 effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL ccr); | |
9228 ins_cost(300); | |
9229 format %{ "Array Equals $ary1,$ary2 -> $result" %} | |
9230 ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result)); | |
9231 ins_pipe(long_memory_op); | |
9232 %} | |
9018 | 9233 |
9019 //---------- Population Count Instructions ------------------------------------- | 9234 //---------- Population Count Instructions ------------------------------------- |
9020 | 9235 |
9021 instruct popCountI(iRegI dst, iRegI src) %{ | 9236 instruct popCountI(iRegI dst, iRegI src) %{ |
9022 predicate(UsePopCountInstruction); | 9237 predicate(UsePopCountInstruction); |