Mercurial > hg > truffle
comparison src/cpu/x86/vm/x86_32.ad @ 1914:ae065c367d93
6987135: Performance regression on Intel platform with 32-bits edition between 6u13 and 6u14.
Summary: Use hardware DIV instruction for long division by constant when it is faster than code with multiply.
Reviewed-by: never
author | kvn |
---|---|
date | Tue, 02 Nov 2010 09:00:37 -0700 |
parents | 3e8fbc61cee8 |
children | 2fe998383789 |
comparison
equal
deleted
inserted
replaced
1913:3b2dea75431e | 1914:ae065c367d93 |
---|---|
1506 | 1506 |
1507 bool Matcher::is_spillable_arg( int reg ) { | 1507 bool Matcher::is_spillable_arg( int reg ) { |
1508 return can_be_java_arg(reg); | 1508 return can_be_java_arg(reg); |
1509 } | 1509 } |
1510 | 1510 |
1511 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { | |
1512 // Use hardware integer DIV instruction when | |
1513 // it is faster than a code which use multiply. | |
1514 // Only when constant divisor fits into 32 bit | |
1515 // (min_jint is excluded to get only correct | |
1516 // positive 32 bit values from negative). | |
1517 return VM_Version::has_fast_idiv() && | |
1518 (divisor == (int)divisor && divisor != min_jint); | |
1519 } | |
1520 | |
1511 // Register for DIVI projection of divmodI | 1521 // Register for DIVI projection of divmodI |
1512 RegMask Matcher::divI_proj_mask() { | 1522 RegMask Matcher::divI_proj_mask() { |
1513 return EAX_REG_mask; | 1523 return EAX_REG_mask; |
1514 } | 1524 } |
1515 | 1525 |
1543 if (opc == Op_AndL) { | 1553 if (opc == Op_AndL) { |
1544 Node* o2 = n->in(2); | 1554 Node* o2 = n->in(2); |
1545 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { | 1555 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { |
1546 return true; | 1556 return true; |
1547 } | 1557 } |
1558 } | |
1559 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { | |
1560 return true; | |
1548 } | 1561 } |
1549 return false; | 1562 return false; |
1550 } | 1563 } |
1551 | 1564 |
1552 %} | 1565 %} |
2307 %} | 2320 %} |
2308 | 2321 |
2309 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ | 2322 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ |
2310 emit_opcode( cbuf, 0x8B ); // Move | 2323 emit_opcode( cbuf, 0x8B ); // Move |
2311 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); | 2324 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); |
2312 emit_d8(cbuf,$primary); | 2325 if( $cnt$$constant > 32 ) { // Shift, if not by zero |
2313 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); | 2326 emit_d8(cbuf,$primary); |
2314 emit_d8(cbuf,$cnt$$constant-32); | 2327 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); |
2328 emit_d8(cbuf,$cnt$$constant-32); | |
2329 } | |
2315 emit_d8(cbuf,$primary); | 2330 emit_d8(cbuf,$primary); |
2316 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); | 2331 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); |
2317 emit_d8(cbuf,31); | 2332 emit_d8(cbuf,31); |
2318 %} | 2333 %} |
2319 | 2334 |
8837 "PUSH $src2.hi\n\t" | 8852 "PUSH $src2.hi\n\t" |
8838 "PUSH $src2.lo\n\t" | 8853 "PUSH $src2.lo\n\t" |
8839 "CALL SharedRuntime::lrem\n\t" | 8854 "CALL SharedRuntime::lrem\n\t" |
8840 "ADD ESP,16" %} | 8855 "ADD ESP,16" %} |
8841 ins_encode( long_mod(src1,src2) ); | 8856 ins_encode( long_mod(src1,src2) ); |
8857 ins_pipe( pipe_slow ); | |
8858 %} | |
8859 | |
8860 // Divide Register Long (no special case since divisor != -1) | |
8861 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{ | |
8862 match(Set dst (DivL dst imm)); | |
8863 effect( TEMP tmp, TEMP tmp2, KILL cr ); | |
8864 ins_cost(1000); | |
8865 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" | |
8866 "CMP $tmp,EDX\n\t" | |
8867 "JA,s fast\n\t" | |
8868 "MOV $tmp2,EAX\n\t" | |
8869 "MOV EAX,EDX\n\t" | |
8870 "SAR EDX,31\n\t" | |
8871 "IDIV $tmp\n\t" | |
8872 "XCHG EAX,$tmp2 \n\t" | |
8873 "IDIV $tmp\n\t" | |
8874 "CDQ\n\t" | |
8875 "ADD EDX,$tmp2\n\t" | |
8876 "JMP,s done\n" | |
8877 "fast:\n\t" | |
8878 "IDIV $tmp\n\t" | |
8879 "XOR EDX,EDX\n" | |
8880 "done:\n\t" | |
8881 "NEG EDX:EAX # if $imm < 0" %} | |
8882 ins_encode %{ | |
8883 int con = (int)$imm$$constant; | |
8884 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); | |
8885 int pcon = (con > 0) ? con : -con; | |
8886 Label Lfast, Ldone; | |
8887 | |
8888 __ movl($tmp$$Register, pcon); | |
8889 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); | |
8890 __ jccb(Assembler::above, Lfast); | |
8891 | |
8892 __ movl($tmp2$$Register, $dst$$Register); // save | |
8893 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); | |
8894 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // src sign | |
8895 __ idivl($tmp$$Register); | |
8896 __ xchgl($dst$$Register, $tmp2$$Register); | |
8897 __ idivl($tmp$$Register); | |
8898 __ cdql(); | |
8899 __ addl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); | |
8900 __ jmpb(Ldone); | |
8901 | |
8902 __ bind(Lfast); | |
8903 // fast path: src is positive and result fits into 32 bit | |
8904 __ idivl($tmp$$Register); | |
8905 __ xorl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); | |
8906 | |
8907 __ bind(Ldone); | |
8908 if (con < 0) { | |
8909 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); | |
8910 } | |
8911 %} | |
8912 ins_pipe( pipe_slow ); | |
8913 %} | |
8914 | |
8915 // Remainder Register Long (remainder fit into 32 bits) | |
8916 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{ | |
8917 match(Set dst (ModL dst imm)); | |
8918 effect( TEMP tmp, TEMP tmp2, KILL cr ); | |
8919 ins_cost(1000); | |
8920 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" | |
8921 "CMP $tmp,EDX\n\t" | |
8922 "JA,s fast\n\t" | |
8923 "MOV $tmp2,EAX\n\t" | |
8924 "MOV EAX,EDX\n\t" | |
8925 "SAR EDX,31\n\t" | |
8926 "IDIV $tmp\n\t" | |
8927 "MOV EAX,$tmp2\n" | |
8928 "fast:\n\t" | |
8929 "IDIV $tmp\n\t" | |
8930 "MOV EAX,EDX\n\t" | |
8931 "SAR EDX,31\n\t" %} | |
8932 ins_encode %{ | |
8933 int con = (int)$imm$$constant; | |
8934 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); | |
8935 int pcon = (con > 0) ? con : -con; | |
8936 Label Lfast; | |
8937 | |
8938 __ movl($tmp$$Register, pcon); | |
8939 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); | |
8940 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit | |
8941 | |
8942 __ movl($tmp2$$Register, $dst$$Register); // save | |
8943 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); | |
8944 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // src sign | |
8945 __ idivl($tmp$$Register); | |
8946 __ movl($dst$$Register, $tmp2$$Register); | |
8947 | |
8948 __ bind(Lfast); | |
8949 __ idivl($tmp$$Register); | |
8950 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); | |
8951 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign | |
8952 | |
8953 %} | |
8842 ins_pipe( pipe_slow ); | 8954 ins_pipe( pipe_slow ); |
8843 %} | 8955 %} |
8844 | 8956 |
8845 // Integer Shift Instructions | 8957 // Integer Shift Instructions |
8846 // Shift Left by one | 8958 // Shift Left by one |