Mercurial > hg > truffle
comparison src/cpu/x86/vm/macroAssembler_x86.cpp @ 7475:e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
Summary: Use YMM registers in arraycopy and array_fill stubs.
Reviewed-by: roland, twisti
author | kvn |
---|---|
date | Thu, 03 Jan 2013 16:30:47 -0800 |
parents | 00af3a3a8df4 |
children | ffa87474d7a4 |
comparison
equal
deleted
inserted
replaced
7474:00af3a3a8df4 | 7475:e2e6bf86682c |
---|---|
6009 } | 6009 } |
6010 BIND(L_fill_32_bytes); | 6010 BIND(L_fill_32_bytes); |
6011 { | 6011 { |
6012 assert( UseSSE >= 2, "supported cpu only" ); | 6012 assert( UseSSE >= 2, "supported cpu only" ); |
6013 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; | 6013 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; |
6014 // Fill 32-byte chunks | |
6015 movdl(xtmp, value); | 6014 movdl(xtmp, value); |
6016 pshufd(xtmp, xtmp, 0); | 6015 if (UseAVX >= 2 && UseUnalignedLoadStores) { |
6017 | 6016 // Fill 64-byte chunks |
6018 subl(count, 8 << shift); | 6017 Label L_fill_64_bytes_loop, L_check_fill_32_bytes; |
6019 jcc(Assembler::less, L_check_fill_8_bytes); | 6018 vpbroadcastd(xtmp, xtmp); |
6020 align(16); | 6019 |
6021 | 6020 subl(count, 16 << shift); |
6022 BIND(L_fill_32_bytes_loop); | 6021 jcc(Assembler::less, L_check_fill_32_bytes); |
6023 | 6022 align(16); |
6024 if (UseUnalignedLoadStores) { | 6023 |
6025 movdqu(Address(to, 0), xtmp); | 6024 BIND(L_fill_64_bytes_loop); |
6026 movdqu(Address(to, 16), xtmp); | 6025 vmovdqu(Address(to, 0), xtmp); |
6026 vmovdqu(Address(to, 32), xtmp); | |
6027 addptr(to, 64); | |
6028 subl(count, 16 << shift); | |
6029 jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); | |
6030 | |
6031 BIND(L_check_fill_32_bytes); | |
6032 addl(count, 8 << shift); | |
6033 jccb(Assembler::less, L_check_fill_8_bytes); | |
6034 vmovdqu(Address(to, 0), xtmp); | |
6035 addptr(to, 32); | |
6036 subl(count, 8 << shift); | |
6027 } else { | 6037 } else { |
6028 movq(Address(to, 0), xtmp); | 6038 // Fill 32-byte chunks |
6029 movq(Address(to, 8), xtmp); | 6039 pshufd(xtmp, xtmp, 0); |
6030 movq(Address(to, 16), xtmp); | 6040 |
6031 movq(Address(to, 24), xtmp); | 6041 subl(count, 8 << shift); |
6042 jcc(Assembler::less, L_check_fill_8_bytes); | |
6043 align(16); | |
6044 | |
6045 BIND(L_fill_32_bytes_loop); | |
6046 | |
6047 if (UseUnalignedLoadStores) { | |
6048 movdqu(Address(to, 0), xtmp); | |
6049 movdqu(Address(to, 16), xtmp); | |
6050 } else { | |
6051 movq(Address(to, 0), xtmp); | |
6052 movq(Address(to, 8), xtmp); | |
6053 movq(Address(to, 16), xtmp); | |
6054 movq(Address(to, 24), xtmp); | |
6055 } | |
6056 | |
6057 addptr(to, 32); | |
6058 subl(count, 8 << shift); | |
6059 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); | |
6032 } | 6060 } |
6033 | |
6034 addptr(to, 32); | |
6035 subl(count, 8 << shift); | |
6036 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); | |
6037 BIND(L_check_fill_8_bytes); | 6061 BIND(L_check_fill_8_bytes); |
6038 addl(count, 8 << shift); | 6062 addl(count, 8 << shift); |
6039 jccb(Assembler::zero, L_exit); | 6063 jccb(Assembler::zero, L_exit); |
6040 jmpb(L_fill_8_bytes); | 6064 jmpb(L_fill_8_bytes); |
6041 | 6065 |