comparison src/cpu/x86/vm/macroAssembler_x86.cpp @ 7637:b30b3c2a0cf2

6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86 Summary: Use SSE4.2 and AVX2 instructions for encodeArray intrinsic. Reviewed-by: roland
author kvn
date Tue, 22 Jan 2013 15:34:16 -0800
parents 038dd2875b94
children 8391fdd36e1f
comparison
equal deleted inserted replaced
7636:a7114d3d712e 7637:b30b3c2a0cf2
6207 } else { 6207 } else {
6208 BIND(L_fill_2_bytes); 6208 BIND(L_fill_2_bytes);
6209 } 6209 }
6210 BIND(L_exit); 6210 BIND(L_exit);
6211 } 6211 }
6212
6213 // encode char[] to byte[] in ISO_8859_1
6214 void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
6215 XMMRegister tmp1Reg, XMMRegister tmp2Reg,
6216 XMMRegister tmp3Reg, XMMRegister tmp4Reg,
6217 Register tmp5, Register result) {
6218 // rsi: src
6219 // rdi: dst
6220 // rdx: len
6221 // rcx: tmp5
6222 // rax: result
6223 ShortBranchVerifier sbv(this);
6224 assert_different_registers(src, dst, len, tmp5, result);
6225 Label L_done, L_copy_1_char, L_copy_1_char_exit;
6226
6227 // set result
6228 xorl(result, result);
6229 // check for zero length
6230 testl(len, len);
6231 jcc(Assembler::zero, L_done);
6232 movl(result, len);
6233
6234 // Setup pointers
6235 lea(src, Address(src, len, Address::times_2)); // char[]
6236 lea(dst, Address(dst, len, Address::times_1)); // byte[]
6237 negptr(len);
6238
6239 if (UseSSE42Intrinsics || UseAVX >= 2) {
6240 Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit;
6241 Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit;
6242
6243 if (UseAVX >= 2) {
6244 Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit;
6245 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector
6246 movdl(tmp1Reg, tmp5);
6247 vpbroadcastd(tmp1Reg, tmp1Reg);
6248 jmpb(L_chars_32_check);
6249
6250 bind(L_copy_32_chars);
6251 vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64));
6252 vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32));
6253 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ true);
6254 vptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
6255 jccb(Assembler::notZero, L_copy_32_chars_exit);
6256 vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector256 */ true);
6257 vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector256 */ true);
6258 vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg);
6259
6260 bind(L_chars_32_check);
6261 addptr(len, 32);
6262 jccb(Assembler::lessEqual, L_copy_32_chars);
6263
6264 bind(L_copy_32_chars_exit);
6265 subptr(len, 16);
6266 jccb(Assembler::greater, L_copy_16_chars_exit);
6267
6268 } else if (UseSSE42Intrinsics) {
6269 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector
6270 movdl(tmp1Reg, tmp5);
6271 pshufd(tmp1Reg, tmp1Reg, 0);
6272 jmpb(L_chars_16_check);
6273 }
6274
6275 bind(L_copy_16_chars);
6276 if (UseAVX >= 2) {
6277 vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32));
6278 vptest(tmp2Reg, tmp1Reg);
6279 jccb(Assembler::notZero, L_copy_16_chars_exit);
6280 vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector256 */ true);
6281 vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector256 */ true);
6282 } else {
6283 if (UseAVX > 0) {
6284 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
6285 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
6286 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ false);
6287 } else {
6288 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
6289 por(tmp2Reg, tmp3Reg);
6290 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
6291 por(tmp2Reg, tmp4Reg);
6292 }
6293 ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
6294 jccb(Assembler::notZero, L_copy_16_chars_exit);
6295 packuswb(tmp3Reg, tmp4Reg);
6296 }
6297 movdqu(Address(dst, len, Address::times_1, -16), tmp3Reg);
6298
6299 bind(L_chars_16_check);
6300 addptr(len, 16);
6301 jccb(Assembler::lessEqual, L_copy_16_chars);
6302
6303 bind(L_copy_16_chars_exit);
6304 subptr(len, 8);
6305 jccb(Assembler::greater, L_copy_8_chars_exit);
6306
6307 bind(L_copy_8_chars);
6308 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16));
6309 ptest(tmp3Reg, tmp1Reg);
6310 jccb(Assembler::notZero, L_copy_8_chars_exit);
6311 packuswb(tmp3Reg, tmp1Reg);
6312 movq(Address(dst, len, Address::times_1, -8), tmp3Reg);
6313 addptr(len, 8);
6314 jccb(Assembler::lessEqual, L_copy_8_chars);
6315
6316 bind(L_copy_8_chars_exit);
6317 subptr(len, 8);
6318 jccb(Assembler::zero, L_done);
6319 }
6320
6321 bind(L_copy_1_char);
6322 load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0));
6323 testl(tmp5, 0xff00); // check if Unicode char
6324 jccb(Assembler::notZero, L_copy_1_char_exit);
6325 movb(Address(dst, len, Address::times_1, 0), tmp5);
6326 addptr(len, 1);
6327 jccb(Assembler::less, L_copy_1_char);
6328
6329 bind(L_copy_1_char_exit);
6330 addptr(result, len); // len is negative count of not processed elements
6331 bind(L_done);
6332 }
6333
6212 #undef BIND 6334 #undef BIND
6213 #undef BLOCK_COMMENT 6335 #undef BLOCK_COMMENT
6214 6336
6215 6337
6216 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 6338 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {