Mercurial > hg > graal-compiler
comparison src/cpu/x86/vm/assembler_x86.cpp @ 1763:d6f45b55c972
4809552: Optimize Arrays.fill(...)
Reviewed-by: kvn
author | never |
---|---|
date | Fri, 27 Aug 2010 17:33:49 -0700 |
parents | 36519c19beeb |
children | d257356e35f0 |
comparison
equal
deleted
inserted
replaced
1731:ee5cc9e78493 | 1763:d6f45b55c972 |
---|---|
8765 | 8765 |
8766 // That's it | 8766 // That's it |
8767 bind(DONE); | 8767 bind(DONE); |
8768 } | 8768 } |
8769 | 8769 |
8770 #ifdef PRODUCT | |
8771 #define BLOCK_COMMENT(str) /* nothing */ | |
8772 #else | |
8773 #define BLOCK_COMMENT(str) block_comment(str) | |
8774 #endif | |
8775 | |
8776 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") | |
8777 void MacroAssembler::generate_fill(BasicType t, bool aligned, | |
8778 Register to, Register value, Register count, | |
8779 Register rtmp, XMMRegister xtmp) { | |
8780 assert_different_registers(to, value, count, rtmp); | |
8781 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; | |
8782 Label L_fill_2_bytes, L_fill_4_bytes; | |
8783 | |
8784 int shift = -1; | |
8785 switch (t) { | |
8786 case T_BYTE: | |
8787 shift = 2; | |
8788 break; | |
8789 case T_SHORT: | |
8790 shift = 1; | |
8791 break; | |
8792 case T_INT: | |
8793 shift = 0; | |
8794 break; | |
8795 default: ShouldNotReachHere(); | |
8796 } | |
8797 | |
8798 if (t == T_BYTE) { | |
8799 andl(value, 0xff); | |
8800 movl(rtmp, value); | |
8801 shll(rtmp, 8); | |
8802 orl(value, rtmp); | |
8803 } | |
8804 if (t == T_SHORT) { | |
8805 andl(value, 0xffff); | |
8806 } | |
8807 if (t == T_BYTE || t == T_SHORT) { | |
8808 movl(rtmp, value); | |
8809 shll(rtmp, 16); | |
8810 orl(value, rtmp); | |
8811 } | |
8812 | |
8813 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element | |
8814 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp | |
8815 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { | |
8816 // align source address at 4 bytes address boundary | |
8817 if (t == T_BYTE) { | |
8818 // One byte misalignment happens only for byte arrays | |
8819 testptr(to, 1); | |
8820 jccb(Assembler::zero, L_skip_align1); | |
8821 movb(Address(to, 0), value); | |
8822 increment(to); | |
8823 decrement(count); | |
8824 BIND(L_skip_align1); | |
8825 } | |
8826 // Two bytes misalignment happens only for byte and short (char) arrays | |
8827 testptr(to, 2); | |
8828 jccb(Assembler::zero, L_skip_align2); | |
8829 movw(Address(to, 0), value); | |
8830 addptr(to, 2); | |
8831 subl(count, 1<<(shift-1)); | |
8832 BIND(L_skip_align2); | |
8833 } | |
8834 if (UseSSE < 2) { | |
8835 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; | |
8836 // Fill 32-byte chunks | |
8837 subl(count, 8 << shift); | |
8838 jcc(Assembler::less, L_check_fill_8_bytes); | |
8839 align(16); | |
8840 | |
8841 BIND(L_fill_32_bytes_loop); | |
8842 | |
8843 for (int i = 0; i < 32; i += 4) { | |
8844 movl(Address(to, i), value); | |
8845 } | |
8846 | |
8847 addptr(to, 32); | |
8848 subl(count, 8 << shift); | |
8849 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); | |
8850 BIND(L_check_fill_8_bytes); | |
8851 addl(count, 8 << shift); | |
8852 jccb(Assembler::zero, L_exit); | |
8853 jmpb(L_fill_8_bytes); | |
8854 | |
8855 // | |
8856 // length is too short, just fill qwords | |
8857 // | |
8858 BIND(L_fill_8_bytes_loop); | |
8859 movl(Address(to, 0), value); | |
8860 movl(Address(to, 4), value); | |
8861 addptr(to, 8); | |
8862 BIND(L_fill_8_bytes); | |
8863 subl(count, 1 << (shift + 1)); | |
8864 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); | |
8865 // fall through to fill 4 bytes | |
8866 } else { | |
8867 Label L_fill_32_bytes; | |
8868 if (!UseUnalignedLoadStores) { | |
8869 // align to 8 bytes, we know we are 4 byte aligned to start | |
8870 testptr(to, 4); | |
8871 jccb(Assembler::zero, L_fill_32_bytes); | |
8872 movl(Address(to, 0), value); | |
8873 addptr(to, 4); | |
8874 subl(count, 1<<shift); | |
8875 } | |
8876 BIND(L_fill_32_bytes); | |
8877 { | |
8878 assert( UseSSE >= 2, "supported cpu only" ); | |
8879 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; | |
8880 // Fill 32-byte chunks | |
8881 movdl(xtmp, value); | |
8882 pshufd(xtmp, xtmp, 0); | |
8883 | |
8884 subl(count, 8 << shift); | |
8885 jcc(Assembler::less, L_check_fill_8_bytes); | |
8886 align(16); | |
8887 | |
8888 BIND(L_fill_32_bytes_loop); | |
8889 | |
8890 if (UseUnalignedLoadStores) { | |
8891 movdqu(Address(to, 0), xtmp); | |
8892 movdqu(Address(to, 16), xtmp); | |
8893 } else { | |
8894 movq(Address(to, 0), xtmp); | |
8895 movq(Address(to, 8), xtmp); | |
8896 movq(Address(to, 16), xtmp); | |
8897 movq(Address(to, 24), xtmp); | |
8898 } | |
8899 | |
8900 addptr(to, 32); | |
8901 subl(count, 8 << shift); | |
8902 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); | |
8903 BIND(L_check_fill_8_bytes); | |
8904 addl(count, 8 << shift); | |
8905 jccb(Assembler::zero, L_exit); | |
8906 jmpb(L_fill_8_bytes); | |
8907 | |
8908 // | |
8909 // length is too short, just fill qwords | |
8910 // | |
8911 BIND(L_fill_8_bytes_loop); | |
8912 movq(Address(to, 0), xtmp); | |
8913 addptr(to, 8); | |
8914 BIND(L_fill_8_bytes); | |
8915 subl(count, 1 << (shift + 1)); | |
8916 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); | |
8917 } | |
8918 } | |
8919 // fill trailing 4 bytes | |
8920 BIND(L_fill_4_bytes); | |
8921 testl(count, 1<<shift); | |
8922 jccb(Assembler::zero, L_fill_2_bytes); | |
8923 movl(Address(to, 0), value); | |
8924 if (t == T_BYTE || t == T_SHORT) { | |
8925 addptr(to, 4); | |
8926 BIND(L_fill_2_bytes); | |
8927 // fill trailing 2 bytes | |
8928 testl(count, 1<<(shift-1)); | |
8929 jccb(Assembler::zero, L_fill_byte); | |
8930 movw(Address(to, 0), value); | |
8931 if (t == T_BYTE) { | |
8932 addptr(to, 2); | |
8933 BIND(L_fill_byte); | |
8934 // fill trailing byte | |
8935 testl(count, 1); | |
8936 jccb(Assembler::zero, L_exit); | |
8937 movb(Address(to, 0), value); | |
8938 } else { | |
8939 BIND(L_fill_byte); | |
8940 } | |
8941 } else { | |
8942 BIND(L_fill_2_bytes); | |
8943 } | |
8944 BIND(L_exit); | |
8945 } | |
8946 #undef BIND | |
8947 #undef BLOCK_COMMENT | |
8948 | |
8949 | |
8770 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { | 8950 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { |
8771 switch (cond) { | 8951 switch (cond) { |
8772 // Note some conditions are synonyms for others | 8952 // Note some conditions are synonyms for others |
8773 case Assembler::zero: return Assembler::notZero; | 8953 case Assembler::zero: return Assembler::notZero; |
8774 case Assembler::notZero: return Assembler::zero; | 8954 case Assembler::notZero: return Assembler::zero; |