# HG changeset patch # User rasbold # Date 1210172806 25200 # Node ID f3de1255b0358e372ab9c58439e661afb7c86290 # Parent 60b728ec77c195aebdc52cb94045e9bf2527d654 6603011: RFE: Optimize long division Summary: Transform long division by constant into multiply Reviewed-by: never, kvn diff -r 60b728ec77c1 -r f3de1255b035 src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Tue Apr 29 19:45:22 2008 -0700 +++ b/src/cpu/x86/vm/x86_64.ad Wed May 07 08:06:46 2008 -0700 @@ -8075,6 +8075,18 @@ ins_pipe(ialu_reg_mem_alu0); %} +instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr) +%{ + match(Set dst (MulHiL src rax)); + effect(USE_KILL rax, KILL cr); + + ins_cost(300); + format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %} + opcode(0xF7, 0x5); /* Opcode F7 /5 */ + ins_encode(REX_reg_wide(src), OpcP, reg_opc(src)); + ins_pipe(ialu_reg_reg_alu0); +%} + instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr) %{ diff -r 60b728ec77c1 -r f3de1255b035 src/share/vm/opto/classes.hpp --- a/src/share/vm/opto/classes.hpp Tue Apr 29 19:45:22 2008 -0700 +++ b/src/share/vm/opto/classes.hpp Wed May 07 08:06:46 2008 -0700 @@ -164,6 +164,7 @@ macro(MoveD2L) macro(MulD) macro(MulF) +macro(MulHiL) macro(MulI) macro(MulL) macro(Multi) diff -r 60b728ec77c1 -r f3de1255b035 src/share/vm/opto/divnode.cpp --- a/src/share/vm/opto/divnode.cpp Tue Apr 29 19:45:22 2008 -0700 +++ b/src/share/vm/opto/divnode.cpp Wed May 07 08:06:46 2008 -0700 @@ -30,70 +30,86 @@ #include "incls/_divnode.cpp.incl" #include -// Implement the integer constant divide -> long multiply transform found in -// "Division by Invariant Integers using Multiplication" -// by Granlund and Montgomery -static Node *transform_int_divide_to_long_multiply( PhaseGVN *phase, Node *dividend, int divisor ) { +//----------------------magic_int_divide_constants----------------------------- +// Compute magic multiplier and shift constant for converting a 32 bit divide +// by constant into a multiply/shift/add series. Return false if calculations +// fail. +// +// Borrowed almost verbatum from Hacker's Delight by Henry S. Warren, Jr. with +// minor type name and parameter changes. +static bool magic_int_divide_constants(jint d, jint &M, jint &s) { + int32_t p; + uint32_t ad, anc, delta, q1, r1, q2, r2, t; + const uint32_t two31 = 0x80000000L; // 2**31. + + ad = ABS(d); + if (d == 0 || d == 1) return false; + t = two31 + ((uint32_t)d >> 31); + anc = t - 1 - t%ad; // Absolute value of nc. + p = 31; // Init. p. + q1 = two31/anc; // Init. q1 = 2**p/|nc|. + r1 = two31 - q1*anc; // Init. r1 = rem(2**p, |nc|). + q2 = two31/ad; // Init. q2 = 2**p/|d|. + r2 = two31 - q2*ad; // Init. r2 = rem(2**p, |d|). + do { + p = p + 1; + q1 = 2*q1; // Update q1 = 2**p/|nc|. + r1 = 2*r1; // Update r1 = rem(2**p, |nc|). + if (r1 >= anc) { // (Must be an unsigned + q1 = q1 + 1; // comparison here). + r1 = r1 - anc; + } + q2 = 2*q2; // Update q2 = 2**p/|d|. + r2 = 2*r2; // Update r2 = rem(2**p, |d|). + if (r2 >= ad) { // (Must be an unsigned + q2 = q2 + 1; // comparison here). + r2 = r2 - ad; + } + delta = ad - r2; + } while (q1 < delta || (q1 == delta && r1 == 0)); + + M = q2 + 1; + if (d < 0) M = -M; // Magic number and + s = p - 32; // shift amount to return. + + return true; +} + +//--------------------------transform_int_divide------------------------------- +// Convert a division by constant divisor into an alternate Ideal graph. +// Return NULL if no transformation occurs. +static Node *transform_int_divide( PhaseGVN *phase, Node *dividend, jint divisor ) { // Check for invalid divisors - assert( divisor != 0 && divisor != min_jint && divisor != 1, - "bad divisor for transforming to long multiply" ); - - // Compute l = ceiling(log2(d)) - // presumes d is more likely small - bool d_pos = divisor >= 0; - int d = d_pos ? divisor : -divisor; - unsigned ud = (unsigned)d; - const int N = 32; - int l = log2_intptr(d-1)+1; - int sh_post = l; - - const uint64_t U1 = (uint64_t)1; + assert( divisor != 0 && divisor != min_jint, + "bad divisor for transforming to long multiply" ); - // Cliff pointed out how to prevent overflow (from the paper) - uint64_t m_low = (((U1 << l) - ud) << N) / ud + (U1 << N); - uint64_t m_high = ((((U1 << l) - ud) << N) + (U1 << (l+1))) / ud + (U1 << N); - - // Reduce to lowest terms - for ( ; sh_post > 0; sh_post-- ) { - uint64_t m_low_1 = m_low >> 1; - uint64_t m_high_1 = m_high >> 1; - if ( m_low_1 >= m_high_1 ) - break; - m_low = m_low_1; - m_high = m_high_1; - } + bool d_pos = divisor >= 0; + jint d = d_pos ? divisor : -divisor; + const int N = 32; // Result - Node *q; + Node *q = NULL; - // division by +/- 1 if (d == 1) { - // Filtered out as identity above - if (d_pos) - return NULL; - - // Just negate the value - else { + // division by +/- 1 + if (!d_pos) { + // Just negate the value q = new (phase->C, 3) SubINode(phase->intcon(0), dividend); } - } - - // division by +/- a power of 2 - else if ( is_power_of_2(d) ) { + } else if ( is_power_of_2(d) ) { + // division by +/- a power of 2 // See if we can simply do a shift without rounding bool needs_rounding = true; const Type *dt = phase->type(dividend); const TypeInt *dti = dt->isa_int(); - - // we don't need to round a positive dividend - if (dti && dti->_lo >= 0) + if (dti && dti->_lo >= 0) { + // we don't need to round a positive dividend needs_rounding = false; - - // An AND mask of sufficient size clears the low bits and - // I can avoid rounding. - else if( dividend->Opcode() == Op_AndI ) { + } else if( dividend->Opcode() == Op_AndI ) { + // An AND mask of sufficient size clears the low bits and + // I can avoid rounding. const TypeInt *andconi = phase->type( dividend->in(2) )->isa_int(); if( andconi && andconi->is_con(-d) ) { dividend = dividend->in(1); @@ -102,47 +118,271 @@ } // Add rounding to the shift to handle the sign bit - if( needs_rounding ) { - Node *t1 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(l - 1))); - Node *t2 = phase->transform(new (phase->C, 3) URShiftINode(t1, phase->intcon(N - l))); - dividend = phase->transform(new (phase->C, 3) AddINode(dividend, t2)); + int l = log2_intptr(d-1)+1; + if (needs_rounding) { + // Divide-by-power-of-2 can be made into a shift, but you have to do + // more math for the rounding. You need to add 0 for positive + // numbers, and "i-1" for negative numbers. Example: i=4, so the + // shift is by 2. You need to add 3 to negative dividends and 0 to + // positive ones. So (-7+3)>>2 becomes -1, (-4+3)>>2 becomes -1, + // (-2+3)>>2 becomes 0, etc. + + // Compute 0 or -1, based on sign bit + Node *sign = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N - 1))); + // Mask sign bit to the low sign bits + Node *round = phase->transform(new (phase->C, 3) URShiftINode(sign, phase->intcon(N - l))); + // Round up before shifting + dividend = phase->transform(new (phase->C, 3) AddINode(dividend, round)); } + // Shift for division q = new (phase->C, 3) RShiftINode(dividend, phase->intcon(l)); - if (!d_pos) + if (!d_pos) { q = new (phase->C, 3) SubINode(phase->intcon(0), phase->transform(q)); + } + } else { + // Attempt the jint constant divide -> multiply transform found in + // "Division by Invariant Integers using Multiplication" + // by Granlund and Montgomery + // See also "Hacker's Delight", chapter 10 by Warren. + + jint magic_const; + jint shift_const; + if (magic_int_divide_constants(d, magic_const, shift_const)) { + Node *magic = phase->longcon(magic_const); + Node *dividend_long = phase->transform(new (phase->C, 2) ConvI2LNode(dividend)); + + // Compute the high half of the dividend x magic multiplication + Node *mul_hi = phase->transform(new (phase->C, 3) MulLNode(dividend_long, magic)); + + if (magic_const < 0) { + mul_hi = phase->transform(new (phase->C, 3) RShiftLNode(mul_hi, phase->intcon(N))); + mul_hi = phase->transform(new (phase->C, 2) ConvL2INode(mul_hi)); + + // The magic multiplier is too large for a 32 bit constant. We've adjusted + // it down by 2^32, but have to add 1 dividend back in after the multiplication. + // This handles the "overflow" case described by Granlund and Montgomery. + mul_hi = phase->transform(new (phase->C, 3) AddINode(dividend, mul_hi)); + + // Shift over the (adjusted) mulhi + if (shift_const != 0) { + mul_hi = phase->transform(new (phase->C, 3) RShiftINode(mul_hi, phase->intcon(shift_const))); + } + } else { + // No add is required, we can merge the shifts together. + mul_hi = phase->transform(new (phase->C, 3) RShiftLNode(mul_hi, phase->intcon(N + shift_const))); + mul_hi = phase->transform(new (phase->C, 2) ConvL2INode(mul_hi)); + } + + // Get a 0 or -1 from the sign of the dividend. + Node *addend0 = mul_hi; + Node *addend1 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N-1))); + + // If the divisor is negative, swap the order of the input addends; + // this has the effect of negating the quotient. + if (!d_pos) { + Node *temp = addend0; addend0 = addend1; addend1 = temp; + } + + // Adjust the final quotient by subtracting -1 (adding 1) + // from the mul_hi. + q = new (phase->C, 3) SubINode(addend0, addend1); + } + } + + return q; +} + +//---------------------magic_long_divide_constants----------------------------- +// Compute magic multiplier and shift constant for converting a 64 bit divide +// by constant into a multiply/shift/add series. Return false if calculations +// fail. +// +// Borrowed almost verbatum from Hacker's Delight by Henry S. Warren, Jr. with +// minor type name and parameter changes. Adjusted to 64 bit word width. +static bool magic_long_divide_constants(jlong d, jlong &M, jint &s) { + int64_t p; + uint64_t ad, anc, delta, q1, r1, q2, r2, t; + const uint64_t two63 = 0x8000000000000000LL; // 2**63. + + ad = ABS(d); + if (d == 0 || d == 1) return false; + t = two63 + ((uint64_t)d >> 63); + anc = t - 1 - t%ad; // Absolute value of nc. + p = 63; // Init. p. + q1 = two63/anc; // Init. q1 = 2**p/|nc|. + r1 = two63 - q1*anc; // Init. r1 = rem(2**p, |nc|). + q2 = two63/ad; // Init. q2 = 2**p/|d|. + r2 = two63 - q2*ad; // Init. r2 = rem(2**p, |d|). + do { + p = p + 1; + q1 = 2*q1; // Update q1 = 2**p/|nc|. + r1 = 2*r1; // Update r1 = rem(2**p, |nc|). + if (r1 >= anc) { // (Must be an unsigned + q1 = q1 + 1; // comparison here). + r1 = r1 - anc; + } + q2 = 2*q2; // Update q2 = 2**p/|d|. + r2 = 2*r2; // Update r2 = rem(2**p, |d|). + if (r2 >= ad) { // (Must be an unsigned + q2 = q2 + 1; // comparison here). + r2 = r2 - ad; + } + delta = ad - r2; + } while (q1 < delta || (q1 == delta && r1 == 0)); + + M = q2 + 1; + if (d < 0) M = -M; // Magic number and + s = p - 64; // shift amount to return. + + return true; +} + +//---------------------long_by_long_mulhi-------------------------------------- +// Generate ideal node graph for upper half of a 64 bit x 64 bit multiplication +static Node *long_by_long_mulhi( PhaseGVN *phase, Node *dividend, jlong magic_const) { + // If the architecture supports a 64x64 mulhi, there is + // no need to synthesize it in ideal nodes. + if (Matcher::has_match_rule(Op_MulHiL)) { + Node *v = phase->longcon(magic_const); + return new (phase->C, 3) MulHiLNode(dividend, v); } - // division by something else - else if (m_high < (U1 << (N-1))) { - Node *t1 = phase->transform(new (phase->C, 2) ConvI2LNode(dividend)); - Node *t2 = phase->transform(new (phase->C, 3) MulLNode(t1, phase->longcon(m_high))); - Node *t3 = phase->transform(new (phase->C, 3) RShiftLNode(t2, phase->intcon(sh_post+N))); - Node *t4 = phase->transform(new (phase->C, 2) ConvL2INode(t3)); - Node *t5 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N-1))); + const int N = 64; + + Node *u_hi = phase->transform(new (phase->C, 3) RShiftLNode(dividend, phase->intcon(N / 2))); + Node *u_lo = phase->transform(new (phase->C, 3) AndLNode(dividend, phase->longcon(0xFFFFFFFF))); + + Node *v_hi = phase->longcon(magic_const >> N/2); + Node *v_lo = phase->longcon(magic_const & 0XFFFFFFFF); + + Node *hihi_product = phase->transform(new (phase->C, 3) MulLNode(u_hi, v_hi)); + Node *hilo_product = phase->transform(new (phase->C, 3) MulLNode(u_hi, v_lo)); + Node *lohi_product = phase->transform(new (phase->C, 3) MulLNode(u_lo, v_hi)); + Node *lolo_product = phase->transform(new (phase->C, 3) MulLNode(u_lo, v_lo)); + + Node *t1 = phase->transform(new (phase->C, 3) URShiftLNode(lolo_product, phase->intcon(N / 2))); + Node *t2 = phase->transform(new (phase->C, 3) AddLNode(hilo_product, t1)); + Node *t3 = phase->transform(new (phase->C, 3) RShiftLNode(t2, phase->intcon(N / 2))); + Node *t4 = phase->transform(new (phase->C, 3) AndLNode(t2, phase->longcon(0xFFFFFFFF))); + Node *t5 = phase->transform(new (phase->C, 3) AddLNode(t4, lohi_product)); + Node *t6 = phase->transform(new (phase->C, 3) RShiftLNode(t5, phase->intcon(N / 2))); + Node *t7 = phase->transform(new (phase->C, 3) AddLNode(t3, hihi_product)); + + return new (phase->C, 3) AddLNode(t7, t6); +} + + +//--------------------------transform_long_divide------------------------------ +// Convert a division by constant divisor into an alternate Ideal graph. +// Return NULL if no transformation occurs. +static Node *transform_long_divide( PhaseGVN *phase, Node *dividend, jlong divisor ) { + // Check for invalid divisors + assert( divisor != 0L && divisor != min_jlong, + "bad divisor for transforming to long multiply" ); + + bool d_pos = divisor >= 0; + jlong d = d_pos ? divisor : -divisor; + const int N = 64; + + // Result + Node *q = NULL; + + if (d == 1) { + // division by +/- 1 + if (!d_pos) { + // Just negate the value + q = new (phase->C, 3) SubLNode(phase->longcon(0), dividend); + } + } else if ( is_power_of_2_long(d) ) { + + // division by +/- a power of 2 + + // See if we can simply do a shift without rounding + bool needs_rounding = true; + const Type *dt = phase->type(dividend); + const TypeLong *dtl = dt->isa_long(); - q = new (phase->C, 3) SubINode(d_pos ? t4 : t5, d_pos ? t5 : t4); + if (dtl && dtl->_lo > 0) { + // we don't need to round a positive dividend + needs_rounding = false; + } else if( dividend->Opcode() == Op_AndL ) { + // An AND mask of sufficient size clears the low bits and + // I can avoid rounding. + const TypeLong *andconl = phase->type( dividend->in(2) )->isa_long(); + if( andconl && andconl->is_con(-d)) { + dividend = dividend->in(1); + needs_rounding = false; + } + } + + // Add rounding to the shift to handle the sign bit + int l = log2_long(d-1)+1; + if (needs_rounding) { + // Divide-by-power-of-2 can be made into a shift, but you have to do + // more math for the rounding. You need to add 0 for positive + // numbers, and "i-1" for negative numbers. Example: i=4, so the + // shift is by 2. You need to add 3 to negative dividends and 0 to + // positive ones. So (-7+3)>>2 becomes -1, (-4+3)>>2 becomes -1, + // (-2+3)>>2 becomes 0, etc. + + // Compute 0 or -1, based on sign bit + Node *sign = phase->transform(new (phase->C, 3) RShiftLNode(dividend, phase->intcon(N - 1))); + // Mask sign bit to the low sign bits + Node *round = phase->transform(new (phase->C, 3) URShiftLNode(sign, phase->intcon(N - l))); + // Round up before shifting + dividend = phase->transform(new (phase->C, 3) AddLNode(dividend, round)); + } + + // Shift for division + q = new (phase->C, 3) RShiftLNode(dividend, phase->intcon(l)); + + if (!d_pos) { + q = new (phase->C, 3) SubLNode(phase->longcon(0), phase->transform(q)); + } + } else { + // Attempt the jlong constant divide -> multiply transform found in + // "Division by Invariant Integers using Multiplication" + // by Granlund and Montgomery + // See also "Hacker's Delight", chapter 10 by Warren. + + jlong magic_const; + jint shift_const; + if (magic_long_divide_constants(d, magic_const, shift_const)) { + // Compute the high half of the dividend x magic multiplication + Node *mul_hi = phase->transform(long_by_long_mulhi(phase, dividend, magic_const)); + + // The high half of the 128-bit multiply is computed. + if (magic_const < 0) { + // The magic multiplier is too large for a 64 bit constant. We've adjusted + // it down by 2^64, but have to add 1 dividend back in after the multiplication. + // This handles the "overflow" case described by Granlund and Montgomery. + mul_hi = phase->transform(new (phase->C, 3) AddLNode(dividend, mul_hi)); + } + + // Shift over the (adjusted) mulhi + if (shift_const != 0) { + mul_hi = phase->transform(new (phase->C, 3) RShiftLNode(mul_hi, phase->intcon(shift_const))); + } + + // Get a 0 or -1 from the sign of the dividend. + Node *addend0 = mul_hi; + Node *addend1 = phase->transform(new (phase->C, 3) RShiftLNode(dividend, phase->intcon(N-1))); + + // If the divisor is negative, swap the order of the input addends; + // this has the effect of negating the quotient. + if (!d_pos) { + Node *temp = addend0; addend0 = addend1; addend1 = temp; + } + + // Adjust the final quotient by subtracting -1 (adding 1) + // from the mul_hi. + q = new (phase->C, 3) SubLNode(addend0, addend1); + } } - // This handles that case where m_high is >= 2**(N-1). In that case, - // we subtract out 2**N from the multiply and add it in later as - // "dividend" in the equation (t5). This case computes the same result - // as the immediately preceeding case, save that rounding and overflow - // are accounted for. - else { - Node *t1 = phase->transform(new (phase->C, 2) ConvI2LNode(dividend)); - Node *t2 = phase->transform(new (phase->C, 3) MulLNode(t1, phase->longcon(m_high - (U1 << N)))); - Node *t3 = phase->transform(new (phase->C, 3) RShiftLNode(t2, phase->intcon(N))); - Node *t4 = phase->transform(new (phase->C, 2) ConvL2INode(t3)); - Node *t5 = phase->transform(new (phase->C, 3) AddINode(dividend, t4)); - Node *t6 = phase->transform(new (phase->C, 3) RShiftINode(t5, phase->intcon(sh_post))); - Node *t7 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N-1))); - - q = new (phase->C, 3) SubINode(d_pos ? t6 : t7, d_pos ? t7 : t6); - } - - return (q); + return q; } //============================================================================= @@ -164,7 +404,7 @@ const TypeInt *ti = t->isa_int(); if( !ti ) return NULL; if( !ti->is_con() ) return NULL; - int i = ti->get_con(); // Get divisor + jint i = ti->get_con(); // Get divisor if (i == 0) return NULL; // Dividing by zero constant does not idealize @@ -173,7 +413,7 @@ // Dividing by MININT does not optimize as a power-of-2 shift. if( i == min_jint ) return NULL; - return transform_int_divide_to_long_multiply( phase, in(1), i ); + return transform_int_divide( phase, in(1), i ); } //------------------------------Value------------------------------------------ @@ -255,85 +495,22 @@ if (in(0) && remove_dead_region(phase, can_reshape)) return this; const Type *t = phase->type( in(2) ); - if( t == TypeLong::ONE ) // Identity? + if( t == TypeLong::ONE ) // Identity? return NULL; // Skip it - const TypeLong *ti = t->isa_long(); - if( !ti ) return NULL; - if( !ti->is_con() ) return NULL; - jlong i = ti->get_con(); // Get divisor - if( i ) set_req(0, NULL); // Dividing by a not-zero constant; no faulting + const TypeLong *tl = t->isa_long(); + if( !tl ) return NULL; + if( !tl->is_con() ) return NULL; + jlong l = tl->get_con(); // Get divisor + + if (l == 0) return NULL; // Dividing by zero constant does not idealize + + set_req(0,NULL); // Dividing by a not-zero constant; no faulting // Dividing by MININT does not optimize as a power-of-2 shift. - if( i == min_jlong ) return NULL; - - // Check for negative power of 2 divisor, if so, negate it and set a flag - // to indicate result needs to be negated. Note that negating the dividend - // here does not work when it has the value MININT - Node *dividend = in(1); - bool negate_res = false; - if (is_power_of_2_long(-i)) { - i = -i; // Flip divisor - negate_res = true; - } - - // Check for power of 2 - if (!is_power_of_2_long(i)) // Is divisor a power of 2? - return NULL; // Not a power of 2 - - // Compute number of bits to shift - int log_i = log2_long(i); - - // See if we can simply do a shift without rounding - bool needs_rounding = true; - const Type *dt = phase->type(dividend); - const TypeLong *dtl = dt->isa_long(); + if( l == min_jlong ) return NULL; - if (dtl && dtl->_lo > 0) { - // we don't need to round a positive dividend - needs_rounding = false; - } else if( dividend->Opcode() == Op_AndL ) { - // An AND mask of sufficient size clears the low bits and - // I can avoid rounding. - const TypeLong *andconi = phase->type( dividend->in(2) )->isa_long(); - if( andconi && - andconi->is_con() && - andconi->get_con() == -i ) { - dividend = dividend->in(1); - needs_rounding = false; - } - } - - if (!needs_rounding) { - Node *result = new (phase->C, 3) RShiftLNode(dividend, phase->intcon(log_i)); - if (negate_res) { - result = phase->transform(result); - result = new (phase->C, 3) SubLNode(phase->longcon(0), result); - } - return result; - } - - // Divide-by-power-of-2 can be made into a shift, but you have to do - // more math for the rounding. You need to add 0 for positive - // numbers, and "i-1" for negative numbers. Example: i=4, so the - // shift is by 2. You need to add 3 to negative dividends and 0 to - // positive ones. So (-7+3)>>2 becomes -1, (-4+3)>>2 becomes -1, - // (-2+3)>>2 becomes 0, etc. - - // Compute 0 or -1, based on sign bit - Node *sign = phase->transform(new (phase->C, 3) RShiftLNode(dividend,phase->intcon(63))); - // Mask sign bit to the low sign bits - Node *round = phase->transform(new (phase->C, 3) AndLNode(sign,phase->longcon(i-1))); - // Round up before shifting - Node *sum = phase->transform(new (phase->C, 3) AddLNode(dividend,round)); - // Shift for division - Node *result = new (phase->C, 3) RShiftLNode(sum, phase->intcon(log_i)); - if (negate_res) { - result = phase->transform(result); - result = new (phase->C, 3) SubLNode(phase->longcon(0), result); - } - - return result; + return transform_long_divide( phase, in(1), l ); } //------------------------------Value------------------------------------------ @@ -615,10 +792,10 @@ hook->init_req(0, x); // Add a use to x to prevent him from dying // Generate code to reduce X rapidly to nearly 2^k-1. for( int i = 0; i < trip_count; i++ ) { - Node *xl = phase->transform( new (phase->C, 3) AndINode(x,divisor) ); - Node *xh = phase->transform( new (phase->C, 3) RShiftINode(x,phase->intcon(k)) ); // Must be signed - x = phase->transform( new (phase->C, 3) AddINode(xh,xl) ); - hook->set_req(0, x); + Node *xl = phase->transform( new (phase->C, 3) AndINode(x,divisor) ); + Node *xh = phase->transform( new (phase->C, 3) RShiftINode(x,phase->intcon(k)) ); // Must be signed + x = phase->transform( new (phase->C, 3) AddINode(xh,xl) ); + hook->set_req(0, x); } // Generate sign-fixup code. Was original value positive? @@ -675,18 +852,21 @@ hook->init_req(0, in(1)); // Divide using the transform from DivI to MulL - Node *divide = phase->transform( transform_int_divide_to_long_multiply( phase, in(1), pos_con ) ); + Node *result = transform_int_divide( phase, in(1), pos_con ); + if (result != NULL) { + Node *divide = phase->transform(result); - // Re-multiply, using a shift if this is a power of two - Node *mult = NULL; + // Re-multiply, using a shift if this is a power of two + Node *mult = NULL; - if( log2_con >= 0 ) - mult = phase->transform( new (phase->C, 3) LShiftINode( divide, phase->intcon( log2_con ) ) ); - else - mult = phase->transform( new (phase->C, 3) MulINode( divide, phase->intcon( pos_con ) ) ); + if( log2_con >= 0 ) + mult = phase->transform( new (phase->C, 3) LShiftINode( divide, phase->intcon( log2_con ) ) ); + else + mult = phase->transform( new (phase->C, 3) MulINode( divide, phase->intcon( pos_con ) ) ); - // Finally, subtract the multiplied divided value from the original - Node *result = new (phase->C, 3) SubINode( in(1), mult ); + // Finally, subtract the multiplied divided value from the original + result = new (phase->C, 3) SubINode( in(1), mult ); + } // Now remove the bogus extra edges used to keep things alive if (can_reshape) { @@ -748,73 +928,126 @@ // Get the modulus const Type *t = phase->type( in(2) ); if( t == Type::TOP ) return NULL; - const TypeLong *ti = t->is_long(); + const TypeLong *tl = t->is_long(); // Check for useless control input // Check for excluding mod-zero case - if( in(0) && (ti->_hi < 0 || ti->_lo > 0) ) { + if( in(0) && (tl->_hi < 0 || tl->_lo > 0) ) { set_req(0, NULL); // Yank control input return this; } // See if we are MOD'ing by 2^k or 2^k-1. - if( !ti->is_con() ) return NULL; - jlong con = ti->get_con(); - bool m1 = false; - if( !is_power_of_2_long(con) ) { // Not 2^k - if( !is_power_of_2_long(con+1) ) // Not 2^k-1? - return NULL; // No interesting mod hacks - m1 = true; // Found 2^k-1 - con++; // Convert to 2^k form - } - uint k = log2_long(con); // Extract k + if( !tl->is_con() ) return NULL; + jlong con = tl->get_con(); + + Node *hook = new (phase->C, 1) Node(1); // Expand mod - if( !m1 ) { // Case 2^k - } else { // Case 2^k-1 + if( con >= 0 && con < max_jlong && is_power_of_2_long(con+1) ) { + uint k = log2_long(con); // Extract k + // Basic algorithm by David Detlefs. See fastmod_long.java for gory details. // Used to help a popular random number generator which does a long-mod // of 2^31-1 and shows up in SpecJBB and SciMark. static int unroll_factor[] = { 999, 999, 61, 30, 20, 15, 12, 10, 8, 7, 6, 6, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 /*past here we assume 1 forever*/}; int trip_count = 1; if( k < ARRAY_SIZE(unroll_factor)) trip_count = unroll_factor[k]; - if( trip_count > 4 ) return NULL; // Too much unrolling - if (ConditionalMoveLimit == 0) return NULL; // cmov is required - Node *x = in(1); // Value being mod'd - Node *divisor = in(2); // Also is mask + // If the unroll factor is not too large, and if conditional moves are + // ok, then use this case + if( trip_count <= 5 && ConditionalMoveLimit != 0 ) { + Node *x = in(1); // Value being mod'd + Node *divisor = in(2); // Also is mask - Node *hook = new (phase->C, 1) Node(x); - // Generate code to reduce X rapidly to nearly 2^k-1. - for( int i = 0; i < trip_count; i++ ) { + hook->init_req(0, x); // Add a use to x to prevent him from dying + // Generate code to reduce X rapidly to nearly 2^k-1. + for( int i = 0; i < trip_count; i++ ) { Node *xl = phase->transform( new (phase->C, 3) AndLNode(x,divisor) ); Node *xh = phase->transform( new (phase->C, 3) RShiftLNode(x,phase->intcon(k)) ); // Must be signed x = phase->transform( new (phase->C, 3) AddLNode(xh,xl) ); hook->set_req(0, x); // Add a use to x to prevent him from dying + } + + // Generate sign-fixup code. Was original value positive? + // long hack_res = (i >= 0) ? divisor : CONST64(1); + Node *cmp1 = phase->transform( new (phase->C, 3) CmpLNode( in(1), phase->longcon(0) ) ); + Node *bol1 = phase->transform( new (phase->C, 2) BoolNode( cmp1, BoolTest::ge ) ); + Node *cmov1= phase->transform( new (phase->C, 4) CMoveLNode(bol1, phase->longcon(1), divisor, TypeLong::LONG) ); + // if( x >= hack_res ) x -= divisor; + Node *sub = phase->transform( new (phase->C, 3) SubLNode( x, divisor ) ); + Node *cmp2 = phase->transform( new (phase->C, 3) CmpLNode( x, cmov1 ) ); + Node *bol2 = phase->transform( new (phase->C, 2) BoolNode( cmp2, BoolTest::ge ) ); + // Convention is to not transform the return value of an Ideal + // since Ideal is expected to return a modified 'this' or a new node. + Node *cmov2= new (phase->C, 4) CMoveLNode(bol2, x, sub, TypeLong::LONG); + // cmov2 is now the mod + + // Now remove the bogus extra edges used to keep things alive + if (can_reshape) { + phase->is_IterGVN()->remove_dead_node(hook); + } else { + hook->set_req(0, NULL); // Just yank bogus edge during Parse phase + } + return cmov2; } - // Generate sign-fixup code. Was original value positive? - // long hack_res = (i >= 0) ? divisor : CONST64(1); - Node *cmp1 = phase->transform( new (phase->C, 3) CmpLNode( in(1), phase->longcon(0) ) ); - Node *bol1 = phase->transform( new (phase->C, 2) BoolNode( cmp1, BoolTest::ge ) ); - Node *cmov1= phase->transform( new (phase->C, 4) CMoveLNode(bol1, phase->longcon(1), divisor, TypeLong::LONG) ); - // if( x >= hack_res ) x -= divisor; - Node *sub = phase->transform( new (phase->C, 3) SubLNode( x, divisor ) ); - Node *cmp2 = phase->transform( new (phase->C, 3) CmpLNode( x, cmov1 ) ); - Node *bol2 = phase->transform( new (phase->C, 2) BoolNode( cmp2, BoolTest::ge ) ); - // Convention is to not transform the return value of an Ideal - // since Ideal is expected to return a modified 'this' or a new node. - Node *cmov2= new (phase->C, 4) CMoveLNode(bol2, x, sub, TypeLong::LONG); - // cmov2 is now the mod + } + + // Fell thru, the unroll case is not appropriate. Transform the modulo + // into a long multiply/int multiply/subtract case + + // Cannot handle mod 0, and min_jint isn't handled by the transform + if( con == 0 || con == min_jlong ) return NULL; + + // Get the absolute value of the constant; at this point, we can use this + jlong pos_con = (con >= 0) ? con : -con; + + // integer Mod 1 is always 0 + if( pos_con == 1 ) return new (phase->C, 1) ConLNode(TypeLong::ZERO); + + int log2_con = -1; + + // If this is a power of two, they maybe we can mask it + if( is_power_of_2_long(pos_con) ) { + log2_con = log2_long(pos_con); + + const Type *dt = phase->type(in(1)); + const TypeLong *dtl = dt->isa_long(); + + // See if this can be masked, if the dividend is non-negative + if( dtl && dtl->_lo >= 0 ) + return ( new (phase->C, 3) AndLNode( in(1), phase->longcon( pos_con-1 ) ) ); + } - // Now remove the bogus extra edges used to keep things alive - if (can_reshape) { - phase->is_IterGVN()->remove_dead_node(hook); - } else { - hook->set_req(0, NULL); // Just yank bogus edge during Parse phase - } - return cmov2; + // Save in(1) so that it cannot be changed or deleted + hook->init_req(0, in(1)); + + // Divide using the transform from DivI to MulL + Node *result = transform_long_divide( phase, in(1), pos_con ); + if (result != NULL) { + Node *divide = phase->transform(result); + + // Re-multiply, using a shift if this is a power of two + Node *mult = NULL; + + if( log2_con >= 0 ) + mult = phase->transform( new (phase->C, 3) LShiftLNode( divide, phase->intcon( log2_con ) ) ); + else + mult = phase->transform( new (phase->C, 3) MulLNode( divide, phase->longcon( pos_con ) ) ); + + // Finally, subtract the multiplied divided value from the original + result = new (phase->C, 3) SubLNode( in(1), mult ); } - return NULL; + + // Now remove the bogus extra edges used to keep things alive + if (can_reshape) { + phase->is_IterGVN()->remove_dead_node(hook); + } else { + hook->set_req(0, NULL); // Just yank bogus edge during Parse phase + } + + // return the value + return result; } //------------------------------Value------------------------------------------ diff -r 60b728ec77c1 -r f3de1255b035 src/share/vm/opto/mulnode.cpp --- a/src/share/vm/opto/mulnode.cpp Tue Apr 29 19:45:22 2008 -0700 +++ b/src/share/vm/opto/mulnode.cpp Wed May 07 08:06:46 2008 -0700 @@ -365,6 +365,25 @@ } //============================================================================= +//------------------------------Value------------------------------------------ +const Type *MulHiLNode::Value( PhaseTransform *phase ) const { + // Either input is TOP ==> the result is TOP + const Type *t1 = phase->type( in(1) ); + const Type *t2 = phase->type( in(2) ); + if( t1 == Type::TOP ) return Type::TOP; + if( t2 == Type::TOP ) return Type::TOP; + + // Either input is BOTTOM ==> the result is the local BOTTOM + const Type *bot = bottom_type(); + if( (t1 == bot) || (t2 == bot) || + (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) ) + return bot; + + // It is not worth trying to constant fold this stuff! + return TypeLong::LONG; +} + +//============================================================================= //------------------------------mul_ring--------------------------------------- // Supplied function returns the product of the inputs IN THE CURRENT RING. // For the logical operations the ring's MUL is really a logical AND function. diff -r 60b728ec77c1 -r f3de1255b035 src/share/vm/opto/mulnode.hpp --- a/src/share/vm/opto/mulnode.hpp Tue Apr 29 19:45:22 2008 -0700 +++ b/src/share/vm/opto/mulnode.hpp Wed May 07 08:06:46 2008 -0700 @@ -133,6 +133,16 @@ virtual uint ideal_reg() const { return Op_RegD; } }; +//-------------------------------MulHiLNode------------------------------------ +// Upper 64 bits of a 64 bit by 64 bit multiply +class MulHiLNode : public Node { +public: + MulHiLNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {} + virtual int Opcode() const; + virtual const Type *Value( PhaseTransform *phase ) const; + const Type *bottom_type() const { return TypeLong::LONG; } + virtual uint ideal_reg() const { return Op_RegL; } +}; //------------------------------AndINode--------------------------------------- // Logically AND 2 integers. Included with the MUL nodes because it inherits diff -r 60b728ec77c1 -r f3de1255b035 src/share/vm/opto/type.hpp --- a/src/share/vm/opto/type.hpp Tue Apr 29 19:45:22 2008 -0700 +++ b/src/share/vm/opto/type.hpp Wed May 07 08:06:46 2008 -0700 @@ -442,6 +442,7 @@ // Check for single integer int is_con() const { return _lo==_hi; } + bool is_con(int i) const { return is_con() && _lo == i; } jlong get_con() const { assert( is_con(), "" ); return _lo; } virtual bool is_finite() const; // Has a finite value diff -r 60b728ec77c1 -r f3de1255b035 src/share/vm/utilities/globalDefinitions.hpp --- a/src/share/vm/utilities/globalDefinitions.hpp Tue Apr 29 19:45:22 2008 -0700 +++ b/src/share/vm/utilities/globalDefinitions.hpp Wed May 07 08:06:46 2008 -0700 @@ -890,7 +890,7 @@ i++; p *= 2; } // p = 2^(i+1) && x < p (i.e., 2^i <= x < 2^(i+1)) - // (if p = 0 then overflow occured and i = 31) + // (if p = 0 then overflow occured and i = 63) return i; }