Mercurial > hg > graal-jvmci-8
comparison src/cpu/x86/vm/macroAssembler_x86.cpp @ 7637:b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
Summary: Use SSE4.2 and AVX2 instructions for encodeArray intrinsic.
Reviewed-by: roland
author | kvn |
---|---|
date | Tue, 22 Jan 2013 15:34:16 -0800 |
parents | 038dd2875b94 |
children | 8391fdd36e1f |
comparison
equal
deleted
inserted
replaced
7636:a7114d3d712e | 7637:b30b3c2a0cf2 |
---|---|
6207 } else { | 6207 } else { |
6208 BIND(L_fill_2_bytes); | 6208 BIND(L_fill_2_bytes); |
6209 } | 6209 } |
6210 BIND(L_exit); | 6210 BIND(L_exit); |
6211 } | 6211 } |
6212 | |
6213 // encode char[] to byte[] in ISO_8859_1 | |
6214 void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, | |
6215 XMMRegister tmp1Reg, XMMRegister tmp2Reg, | |
6216 XMMRegister tmp3Reg, XMMRegister tmp4Reg, | |
6217 Register tmp5, Register result) { | |
6218 // rsi: src | |
6219 // rdi: dst | |
6220 // rdx: len | |
6221 // rcx: tmp5 | |
6222 // rax: result | |
6223 ShortBranchVerifier sbv(this); | |
6224 assert_different_registers(src, dst, len, tmp5, result); | |
6225 Label L_done, L_copy_1_char, L_copy_1_char_exit; | |
6226 | |
6227 // set result | |
6228 xorl(result, result); | |
6229 // check for zero length | |
6230 testl(len, len); | |
6231 jcc(Assembler::zero, L_done); | |
6232 movl(result, len); | |
6233 | |
6234 // Setup pointers | |
6235 lea(src, Address(src, len, Address::times_2)); // char[] | |
6236 lea(dst, Address(dst, len, Address::times_1)); // byte[] | |
6237 negptr(len); | |
6238 | |
6239 if (UseSSE42Intrinsics || UseAVX >= 2) { | |
6240 Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit; | |
6241 Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit; | |
6242 | |
6243 if (UseAVX >= 2) { | |
6244 Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit; | |
6245 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector | |
6246 movdl(tmp1Reg, tmp5); | |
6247 vpbroadcastd(tmp1Reg, tmp1Reg); | |
6248 jmpb(L_chars_32_check); | |
6249 | |
6250 bind(L_copy_32_chars); | |
6251 vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64)); | |
6252 vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32)); | |
6253 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); | |
6254 vptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector | |
6255 jccb(Assembler::notZero, L_copy_32_chars_exit); | |
6256 vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); | |
6257 vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector256 */ true); | |
6258 vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg); | |
6259 | |
6260 bind(L_chars_32_check); | |
6261 addptr(len, 32); | |
6262 jccb(Assembler::lessEqual, L_copy_32_chars); | |
6263 | |
6264 bind(L_copy_32_chars_exit); | |
6265 subptr(len, 16); | |
6266 jccb(Assembler::greater, L_copy_16_chars_exit); | |
6267 | |
6268 } else if (UseSSE42Intrinsics) { | |
6269 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector | |
6270 movdl(tmp1Reg, tmp5); | |
6271 pshufd(tmp1Reg, tmp1Reg, 0); | |
6272 jmpb(L_chars_16_check); | |
6273 } | |
6274 | |
6275 bind(L_copy_16_chars); | |
6276 if (UseAVX >= 2) { | |
6277 vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32)); | |
6278 vptest(tmp2Reg, tmp1Reg); | |
6279 jccb(Assembler::notZero, L_copy_16_chars_exit); | |
6280 vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector256 */ true); | |
6281 vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector256 */ true); | |
6282 } else { | |
6283 if (UseAVX > 0) { | |
6284 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); | |
6285 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); | |
6286 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ false); | |
6287 } else { | |
6288 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); | |
6289 por(tmp2Reg, tmp3Reg); | |
6290 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); | |
6291 por(tmp2Reg, tmp4Reg); | |
6292 } | |
6293 ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector | |
6294 jccb(Assembler::notZero, L_copy_16_chars_exit); | |
6295 packuswb(tmp3Reg, tmp4Reg); | |
6296 } | |
6297 movdqu(Address(dst, len, Address::times_1, -16), tmp3Reg); | |
6298 | |
6299 bind(L_chars_16_check); | |
6300 addptr(len, 16); | |
6301 jccb(Assembler::lessEqual, L_copy_16_chars); | |
6302 | |
6303 bind(L_copy_16_chars_exit); | |
6304 subptr(len, 8); | |
6305 jccb(Assembler::greater, L_copy_8_chars_exit); | |
6306 | |
6307 bind(L_copy_8_chars); | |
6308 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16)); | |
6309 ptest(tmp3Reg, tmp1Reg); | |
6310 jccb(Assembler::notZero, L_copy_8_chars_exit); | |
6311 packuswb(tmp3Reg, tmp1Reg); | |
6312 movq(Address(dst, len, Address::times_1, -8), tmp3Reg); | |
6313 addptr(len, 8); | |
6314 jccb(Assembler::lessEqual, L_copy_8_chars); | |
6315 | |
6316 bind(L_copy_8_chars_exit); | |
6317 subptr(len, 8); | |
6318 jccb(Assembler::zero, L_done); | |
6319 } | |
6320 | |
6321 bind(L_copy_1_char); | |
6322 load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0)); | |
6323 testl(tmp5, 0xff00); // check if Unicode char | |
6324 jccb(Assembler::notZero, L_copy_1_char_exit); | |
6325 movb(Address(dst, len, Address::times_1, 0), tmp5); | |
6326 addptr(len, 1); | |
6327 jccb(Assembler::less, L_copy_1_char); | |
6328 | |
6329 bind(L_copy_1_char_exit); | |
6330 addptr(result, len); // len is negative count of not processed elements | |
6331 bind(L_done); | |
6332 } | |
6333 | |
6212 #undef BIND | 6334 #undef BIND |
6213 #undef BLOCK_COMMENT | 6335 #undef BLOCK_COMMENT |
6214 | 6336 |
6215 | 6337 |
6216 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { | 6338 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { |