comparison src/cpu/x86/vm/assembler_x86.cpp @ 1763:d6f45b55c972

4809552: Optimize Arrays.fill(...) Reviewed-by: kvn
author never
date Fri, 27 Aug 2010 17:33:49 -0700
parents 36519c19beeb
children d257356e35f0
comparison
equal deleted inserted replaced
1731:ee5cc9e78493 1763:d6f45b55c972
8765 8765
8766 // That's it 8766 // That's it
8767 bind(DONE); 8767 bind(DONE);
8768 } 8768 }
8769 8769
8770 #ifdef PRODUCT
8771 #define BLOCK_COMMENT(str) /* nothing */
8772 #else
8773 #define BLOCK_COMMENT(str) block_comment(str)
8774 #endif
8775
8776 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
8777 void MacroAssembler::generate_fill(BasicType t, bool aligned,
8778 Register to, Register value, Register count,
8779 Register rtmp, XMMRegister xtmp) {
8780 assert_different_registers(to, value, count, rtmp);
8781 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
8782 Label L_fill_2_bytes, L_fill_4_bytes;
8783
8784 int shift = -1;
8785 switch (t) {
8786 case T_BYTE:
8787 shift = 2;
8788 break;
8789 case T_SHORT:
8790 shift = 1;
8791 break;
8792 case T_INT:
8793 shift = 0;
8794 break;
8795 default: ShouldNotReachHere();
8796 }
8797
8798 if (t == T_BYTE) {
8799 andl(value, 0xff);
8800 movl(rtmp, value);
8801 shll(rtmp, 8);
8802 orl(value, rtmp);
8803 }
8804 if (t == T_SHORT) {
8805 andl(value, 0xffff);
8806 }
8807 if (t == T_BYTE || t == T_SHORT) {
8808 movl(rtmp, value);
8809 shll(rtmp, 16);
8810 orl(value, rtmp);
8811 }
8812
8813 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
8814 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
8815 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
8816 // align source address at 4 bytes address boundary
8817 if (t == T_BYTE) {
8818 // One byte misalignment happens only for byte arrays
8819 testptr(to, 1);
8820 jccb(Assembler::zero, L_skip_align1);
8821 movb(Address(to, 0), value);
8822 increment(to);
8823 decrement(count);
8824 BIND(L_skip_align1);
8825 }
8826 // Two bytes misalignment happens only for byte and short (char) arrays
8827 testptr(to, 2);
8828 jccb(Assembler::zero, L_skip_align2);
8829 movw(Address(to, 0), value);
8830 addptr(to, 2);
8831 subl(count, 1<<(shift-1));
8832 BIND(L_skip_align2);
8833 }
8834 if (UseSSE < 2) {
8835 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
8836 // Fill 32-byte chunks
8837 subl(count, 8 << shift);
8838 jcc(Assembler::less, L_check_fill_8_bytes);
8839 align(16);
8840
8841 BIND(L_fill_32_bytes_loop);
8842
8843 for (int i = 0; i < 32; i += 4) {
8844 movl(Address(to, i), value);
8845 }
8846
8847 addptr(to, 32);
8848 subl(count, 8 << shift);
8849 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
8850 BIND(L_check_fill_8_bytes);
8851 addl(count, 8 << shift);
8852 jccb(Assembler::zero, L_exit);
8853 jmpb(L_fill_8_bytes);
8854
8855 //
8856 // length is too short, just fill qwords
8857 //
8858 BIND(L_fill_8_bytes_loop);
8859 movl(Address(to, 0), value);
8860 movl(Address(to, 4), value);
8861 addptr(to, 8);
8862 BIND(L_fill_8_bytes);
8863 subl(count, 1 << (shift + 1));
8864 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
8865 // fall through to fill 4 bytes
8866 } else {
8867 Label L_fill_32_bytes;
8868 if (!UseUnalignedLoadStores) {
8869 // align to 8 bytes, we know we are 4 byte aligned to start
8870 testptr(to, 4);
8871 jccb(Assembler::zero, L_fill_32_bytes);
8872 movl(Address(to, 0), value);
8873 addptr(to, 4);
8874 subl(count, 1<<shift);
8875 }
8876 BIND(L_fill_32_bytes);
8877 {
8878 assert( UseSSE >= 2, "supported cpu only" );
8879 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
8880 // Fill 32-byte chunks
8881 movdl(xtmp, value);
8882 pshufd(xtmp, xtmp, 0);
8883
8884 subl(count, 8 << shift);
8885 jcc(Assembler::less, L_check_fill_8_bytes);
8886 align(16);
8887
8888 BIND(L_fill_32_bytes_loop);
8889
8890 if (UseUnalignedLoadStores) {
8891 movdqu(Address(to, 0), xtmp);
8892 movdqu(Address(to, 16), xtmp);
8893 } else {
8894 movq(Address(to, 0), xtmp);
8895 movq(Address(to, 8), xtmp);
8896 movq(Address(to, 16), xtmp);
8897 movq(Address(to, 24), xtmp);
8898 }
8899
8900 addptr(to, 32);
8901 subl(count, 8 << shift);
8902 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
8903 BIND(L_check_fill_8_bytes);
8904 addl(count, 8 << shift);
8905 jccb(Assembler::zero, L_exit);
8906 jmpb(L_fill_8_bytes);
8907
8908 //
8909 // length is too short, just fill qwords
8910 //
8911 BIND(L_fill_8_bytes_loop);
8912 movq(Address(to, 0), xtmp);
8913 addptr(to, 8);
8914 BIND(L_fill_8_bytes);
8915 subl(count, 1 << (shift + 1));
8916 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
8917 }
8918 }
8919 // fill trailing 4 bytes
8920 BIND(L_fill_4_bytes);
8921 testl(count, 1<<shift);
8922 jccb(Assembler::zero, L_fill_2_bytes);
8923 movl(Address(to, 0), value);
8924 if (t == T_BYTE || t == T_SHORT) {
8925 addptr(to, 4);
8926 BIND(L_fill_2_bytes);
8927 // fill trailing 2 bytes
8928 testl(count, 1<<(shift-1));
8929 jccb(Assembler::zero, L_fill_byte);
8930 movw(Address(to, 0), value);
8931 if (t == T_BYTE) {
8932 addptr(to, 2);
8933 BIND(L_fill_byte);
8934 // fill trailing byte
8935 testl(count, 1);
8936 jccb(Assembler::zero, L_exit);
8937 movb(Address(to, 0), value);
8938 } else {
8939 BIND(L_fill_byte);
8940 }
8941 } else {
8942 BIND(L_fill_2_bytes);
8943 }
8944 BIND(L_exit);
8945 }
8946 #undef BIND
8947 #undef BLOCK_COMMENT
8948
8949
8770 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 8950 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
8771 switch (cond) { 8951 switch (cond) {
8772 // Note some conditions are synonyms for others 8952 // Note some conditions are synonyms for others
8773 case Assembler::zero: return Assembler::notZero; 8953 case Assembler::zero: return Assembler::notZero;
8774 case Assembler::notZero: return Assembler::zero; 8954 case Assembler::notZero: return Assembler::zero;