comparison src/cpu/x86/vm/macroAssembler_x86.cpp @ 7475:e2e6bf86682c

8005544: Use 256bit YMM registers in arraycopy stubs on x86 Summary: Use YMM registers in arraycopy and array_fill stubs. Reviewed-by: roland, twisti
author kvn
date Thu, 03 Jan 2013 16:30:47 -0800
parents 00af3a3a8df4
children ffa87474d7a4
comparison
equal deleted inserted replaced
7474:00af3a3a8df4 7475:e2e6bf86682c
6009 } 6009 }
6010 BIND(L_fill_32_bytes); 6010 BIND(L_fill_32_bytes);
6011 { 6011 {
6012 assert( UseSSE >= 2, "supported cpu only" ); 6012 assert( UseSSE >= 2, "supported cpu only" );
6013 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 6013 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
6014 // Fill 32-byte chunks
6015 movdl(xtmp, value); 6014 movdl(xtmp, value);
6016 pshufd(xtmp, xtmp, 0); 6015 if (UseAVX >= 2 && UseUnalignedLoadStores) {
6017 6016 // Fill 64-byte chunks
6018 subl(count, 8 << shift); 6017 Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
6019 jcc(Assembler::less, L_check_fill_8_bytes); 6018 vpbroadcastd(xtmp, xtmp);
6020 align(16); 6019
6021 6020 subl(count, 16 << shift);
6022 BIND(L_fill_32_bytes_loop); 6021 jcc(Assembler::less, L_check_fill_32_bytes);
6023 6022 align(16);
6024 if (UseUnalignedLoadStores) { 6023
6025 movdqu(Address(to, 0), xtmp); 6024 BIND(L_fill_64_bytes_loop);
6026 movdqu(Address(to, 16), xtmp); 6025 vmovdqu(Address(to, 0), xtmp);
6026 vmovdqu(Address(to, 32), xtmp);
6027 addptr(to, 64);
6028 subl(count, 16 << shift);
6029 jcc(Assembler::greaterEqual, L_fill_64_bytes_loop);
6030
6031 BIND(L_check_fill_32_bytes);
6032 addl(count, 8 << shift);
6033 jccb(Assembler::less, L_check_fill_8_bytes);
6034 vmovdqu(Address(to, 0), xtmp);
6035 addptr(to, 32);
6036 subl(count, 8 << shift);
6027 } else { 6037 } else {
6028 movq(Address(to, 0), xtmp); 6038 // Fill 32-byte chunks
6029 movq(Address(to, 8), xtmp); 6039 pshufd(xtmp, xtmp, 0);
6030 movq(Address(to, 16), xtmp); 6040
6031 movq(Address(to, 24), xtmp); 6041 subl(count, 8 << shift);
6042 jcc(Assembler::less, L_check_fill_8_bytes);
6043 align(16);
6044
6045 BIND(L_fill_32_bytes_loop);
6046
6047 if (UseUnalignedLoadStores) {
6048 movdqu(Address(to, 0), xtmp);
6049 movdqu(Address(to, 16), xtmp);
6050 } else {
6051 movq(Address(to, 0), xtmp);
6052 movq(Address(to, 8), xtmp);
6053 movq(Address(to, 16), xtmp);
6054 movq(Address(to, 24), xtmp);
6055 }
6056
6057 addptr(to, 32);
6058 subl(count, 8 << shift);
6059 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
6032 } 6060 }
6033
6034 addptr(to, 32);
6035 subl(count, 8 << shift);
6036 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
6037 BIND(L_check_fill_8_bytes); 6061 BIND(L_check_fill_8_bytes);
6038 addl(count, 8 << shift); 6062 addl(count, 8 << shift);
6039 jccb(Assembler::zero, L_exit); 6063 jccb(Assembler::zero, L_exit);
6040 jmpb(L_fill_8_bytes); 6064 jmpb(L_fill_8_bytes);
6041 6065