comparison src/cpu/x86/vm/x86_32.ad @ 1914:ae065c367d93

6987135: Performance regression on Intel platform with 32-bits edition between 6u13 and 6u14. Summary: Use hardware DIV instruction for long division by constant when it is faster than code with multiply. Reviewed-by: never
author kvn
date Tue, 02 Nov 2010 09:00:37 -0700
parents 3e8fbc61cee8
children 2fe998383789
comparison
equal deleted inserted replaced
1913:3b2dea75431e 1914:ae065c367d93
1506 1506
1507 bool Matcher::is_spillable_arg( int reg ) { 1507 bool Matcher::is_spillable_arg( int reg ) {
1508 return can_be_java_arg(reg); 1508 return can_be_java_arg(reg);
1509 } 1509 }
1510 1510
1511 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1512 // Use hardware integer DIV instruction when
1513 // it is faster than a code which use multiply.
1514 // Only when constant divisor fits into 32 bit
1515 // (min_jint is excluded to get only correct
1516 // positive 32 bit values from negative).
1517 return VM_Version::has_fast_idiv() &&
1518 (divisor == (int)divisor && divisor != min_jint);
1519 }
1520
1511 // Register for DIVI projection of divmodI 1521 // Register for DIVI projection of divmodI
1512 RegMask Matcher::divI_proj_mask() { 1522 RegMask Matcher::divI_proj_mask() {
1513 return EAX_REG_mask; 1523 return EAX_REG_mask;
1514 } 1524 }
1515 1525
1543 if (opc == Op_AndL) { 1553 if (opc == Op_AndL) {
1544 Node* o2 = n->in(2); 1554 Node* o2 = n->in(2);
1545 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1555 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1546 return true; 1556 return true;
1547 } 1557 }
1558 }
1559 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1560 return true;
1548 } 1561 }
1549 return false; 1562 return false;
1550 } 1563 }
1551 1564
1552 %} 1565 %}
2307 %} 2320 %}
2308 2321
2309 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2322 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2310 emit_opcode( cbuf, 0x8B ); // Move 2323 emit_opcode( cbuf, 0x8B ); // Move
2311 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2324 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2312 emit_d8(cbuf,$primary); 2325 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2313 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2326 emit_d8(cbuf,$primary);
2314 emit_d8(cbuf,$cnt$$constant-32); 2327 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2328 emit_d8(cbuf,$cnt$$constant-32);
2329 }
2315 emit_d8(cbuf,$primary); 2330 emit_d8(cbuf,$primary);
2316 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2331 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2317 emit_d8(cbuf,31); 2332 emit_d8(cbuf,31);
2318 %} 2333 %}
2319 2334
8837 "PUSH $src2.hi\n\t" 8852 "PUSH $src2.hi\n\t"
8838 "PUSH $src2.lo\n\t" 8853 "PUSH $src2.lo\n\t"
8839 "CALL SharedRuntime::lrem\n\t" 8854 "CALL SharedRuntime::lrem\n\t"
8840 "ADD ESP,16" %} 8855 "ADD ESP,16" %}
8841 ins_encode( long_mod(src1,src2) ); 8856 ins_encode( long_mod(src1,src2) );
8857 ins_pipe( pipe_slow );
8858 %}
8859
8860 // Divide Register Long (no special case since divisor != -1)
8861 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8862 match(Set dst (DivL dst imm));
8863 effect( TEMP tmp, TEMP tmp2, KILL cr );
8864 ins_cost(1000);
8865 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
8866 "CMP $tmp,EDX\n\t"
8867 "JA,s fast\n\t"
8868 "MOV $tmp2,EAX\n\t"
8869 "MOV EAX,EDX\n\t"
8870 "SAR EDX,31\n\t"
8871 "IDIV $tmp\n\t"
8872 "XCHG EAX,$tmp2 \n\t"
8873 "IDIV $tmp\n\t"
8874 "CDQ\n\t"
8875 "ADD EDX,$tmp2\n\t"
8876 "JMP,s done\n"
8877 "fast:\n\t"
8878 "IDIV $tmp\n\t"
8879 "XOR EDX,EDX\n"
8880 "done:\n\t"
8881 "NEG EDX:EAX # if $imm < 0" %}
8882 ins_encode %{
8883 int con = (int)$imm$$constant;
8884 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8885 int pcon = (con > 0) ? con : -con;
8886 Label Lfast, Ldone;
8887
8888 __ movl($tmp$$Register, pcon);
8889 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8890 __ jccb(Assembler::above, Lfast);
8891
8892 __ movl($tmp2$$Register, $dst$$Register); // save
8893 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8894 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // src sign
8895 __ idivl($tmp$$Register);
8896 __ xchgl($dst$$Register, $tmp2$$Register);
8897 __ idivl($tmp$$Register);
8898 __ cdql();
8899 __ addl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8900 __ jmpb(Ldone);
8901
8902 __ bind(Lfast);
8903 // fast path: src is positive and result fits into 32 bit
8904 __ idivl($tmp$$Register);
8905 __ xorl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
8906
8907 __ bind(Ldone);
8908 if (con < 0) {
8909 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8910 }
8911 %}
8912 ins_pipe( pipe_slow );
8913 %}
8914
8915 // Remainder Register Long (remainder fit into 32 bits)
8916 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8917 match(Set dst (ModL dst imm));
8918 effect( TEMP tmp, TEMP tmp2, KILL cr );
8919 ins_cost(1000);
8920 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8921 "CMP $tmp,EDX\n\t"
8922 "JA,s fast\n\t"
8923 "MOV $tmp2,EAX\n\t"
8924 "MOV EAX,EDX\n\t"
8925 "SAR EDX,31\n\t"
8926 "IDIV $tmp\n\t"
8927 "MOV EAX,$tmp2\n"
8928 "fast:\n\t"
8929 "IDIV $tmp\n\t"
8930 "MOV EAX,EDX\n\t"
8931 "SAR EDX,31\n\t" %}
8932 ins_encode %{
8933 int con = (int)$imm$$constant;
8934 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8935 int pcon = (con > 0) ? con : -con;
8936 Label Lfast;
8937
8938 __ movl($tmp$$Register, pcon);
8939 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8940 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8941
8942 __ movl($tmp2$$Register, $dst$$Register); // save
8943 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8944 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // src sign
8945 __ idivl($tmp$$Register);
8946 __ movl($dst$$Register, $tmp2$$Register);
8947
8948 __ bind(Lfast);
8949 __ idivl($tmp$$Register);
8950 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8951 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8952
8953 %}
8842 ins_pipe( pipe_slow ); 8954 ins_pipe( pipe_slow );
8843 %} 8955 %}
8844 8956
8845 // Integer Shift Instructions 8957 // Integer Shift Instructions
8846 // Shift Left by one 8958 // Shift Left by one