Mercurial > hg > truffle
comparison src/share/vm/opto/divnode.cpp @ 145:f3de1255b035
6603011: RFE: Optimize long division
Summary: Transform long division by constant into multiply
Reviewed-by: never, kvn
author | rasbold |
---|---|
date | Wed, 07 May 2008 08:06:46 -0700 |
parents | 6e825ad773c6 |
children | d1605aabd0a1 |
comparison
equal
deleted
inserted
replaced
132:60b728ec77c1 | 145:f3de1255b035 |
---|---|
28 | 28 |
29 #include "incls/_precompiled.incl" | 29 #include "incls/_precompiled.incl" |
30 #include "incls/_divnode.cpp.incl" | 30 #include "incls/_divnode.cpp.incl" |
31 #include <math.h> | 31 #include <math.h> |
32 | 32 |
33 // Implement the integer constant divide -> long multiply transform found in | 33 //----------------------magic_int_divide_constants----------------------------- |
34 // "Division by Invariant Integers using Multiplication" | 34 // Compute magic multiplier and shift constant for converting a 32 bit divide |
35 // by Granlund and Montgomery | 35 // by constant into a multiply/shift/add series. Return false if calculations |
36 static Node *transform_int_divide_to_long_multiply( PhaseGVN *phase, Node *dividend, int divisor ) { | 36 // fail. |
37 // | |
38 // Borrowed almost verbatum from Hacker's Delight by Henry S. Warren, Jr. with | |
39 // minor type name and parameter changes. | |
40 static bool magic_int_divide_constants(jint d, jint &M, jint &s) { | |
41 int32_t p; | |
42 uint32_t ad, anc, delta, q1, r1, q2, r2, t; | |
43 const uint32_t two31 = 0x80000000L; // 2**31. | |
44 | |
45 ad = ABS(d); | |
46 if (d == 0 || d == 1) return false; | |
47 t = two31 + ((uint32_t)d >> 31); | |
48 anc = t - 1 - t%ad; // Absolute value of nc. | |
49 p = 31; // Init. p. | |
50 q1 = two31/anc; // Init. q1 = 2**p/|nc|. | |
51 r1 = two31 - q1*anc; // Init. r1 = rem(2**p, |nc|). | |
52 q2 = two31/ad; // Init. q2 = 2**p/|d|. | |
53 r2 = two31 - q2*ad; // Init. r2 = rem(2**p, |d|). | |
54 do { | |
55 p = p + 1; | |
56 q1 = 2*q1; // Update q1 = 2**p/|nc|. | |
57 r1 = 2*r1; // Update r1 = rem(2**p, |nc|). | |
58 if (r1 >= anc) { // (Must be an unsigned | |
59 q1 = q1 + 1; // comparison here). | |
60 r1 = r1 - anc; | |
61 } | |
62 q2 = 2*q2; // Update q2 = 2**p/|d|. | |
63 r2 = 2*r2; // Update r2 = rem(2**p, |d|). | |
64 if (r2 >= ad) { // (Must be an unsigned | |
65 q2 = q2 + 1; // comparison here). | |
66 r2 = r2 - ad; | |
67 } | |
68 delta = ad - r2; | |
69 } while (q1 < delta || (q1 == delta && r1 == 0)); | |
70 | |
71 M = q2 + 1; | |
72 if (d < 0) M = -M; // Magic number and | |
73 s = p - 32; // shift amount to return. | |
74 | |
75 return true; | |
76 } | |
77 | |
78 //--------------------------transform_int_divide------------------------------- | |
79 // Convert a division by constant divisor into an alternate Ideal graph. | |
80 // Return NULL if no transformation occurs. | |
81 static Node *transform_int_divide( PhaseGVN *phase, Node *dividend, jint divisor ) { | |
37 | 82 |
38 // Check for invalid divisors | 83 // Check for invalid divisors |
39 assert( divisor != 0 && divisor != min_jint && divisor != 1, | 84 assert( divisor != 0 && divisor != min_jint, |
40 "bad divisor for transforming to long multiply" ); | 85 "bad divisor for transforming to long multiply" ); |
41 | 86 |
42 // Compute l = ceiling(log2(d)) | |
43 // presumes d is more likely small | |
44 bool d_pos = divisor >= 0; | 87 bool d_pos = divisor >= 0; |
45 int d = d_pos ? divisor : -divisor; | 88 jint d = d_pos ? divisor : -divisor; |
46 unsigned ud = (unsigned)d; | |
47 const int N = 32; | 89 const int N = 32; |
48 int l = log2_intptr(d-1)+1; | |
49 int sh_post = l; | |
50 | |
51 const uint64_t U1 = (uint64_t)1; | |
52 | |
53 // Cliff pointed out how to prevent overflow (from the paper) | |
54 uint64_t m_low = (((U1 << l) - ud) << N) / ud + (U1 << N); | |
55 uint64_t m_high = ((((U1 << l) - ud) << N) + (U1 << (l+1))) / ud + (U1 << N); | |
56 | |
57 // Reduce to lowest terms | |
58 for ( ; sh_post > 0; sh_post-- ) { | |
59 uint64_t m_low_1 = m_low >> 1; | |
60 uint64_t m_high_1 = m_high >> 1; | |
61 if ( m_low_1 >= m_high_1 ) | |
62 break; | |
63 m_low = m_low_1; | |
64 m_high = m_high_1; | |
65 } | |
66 | 90 |
67 // Result | 91 // Result |
68 Node *q; | 92 Node *q = NULL; |
69 | 93 |
70 // division by +/- 1 | |
71 if (d == 1) { | 94 if (d == 1) { |
72 // Filtered out as identity above | 95 // division by +/- 1 |
73 if (d_pos) | 96 if (!d_pos) { |
74 return NULL; | 97 // Just negate the value |
75 | |
76 // Just negate the value | |
77 else { | |
78 q = new (phase->C, 3) SubINode(phase->intcon(0), dividend); | 98 q = new (phase->C, 3) SubINode(phase->intcon(0), dividend); |
79 } | 99 } |
80 } | 100 } else if ( is_power_of_2(d) ) { |
81 | 101 // division by +/- a power of 2 |
82 // division by +/- a power of 2 | |
83 else if ( is_power_of_2(d) ) { | |
84 | 102 |
85 // See if we can simply do a shift without rounding | 103 // See if we can simply do a shift without rounding |
86 bool needs_rounding = true; | 104 bool needs_rounding = true; |
87 const Type *dt = phase->type(dividend); | 105 const Type *dt = phase->type(dividend); |
88 const TypeInt *dti = dt->isa_int(); | 106 const TypeInt *dti = dt->isa_int(); |
89 | 107 if (dti && dti->_lo >= 0) { |
90 // we don't need to round a positive dividend | 108 // we don't need to round a positive dividend |
91 if (dti && dti->_lo >= 0) | |
92 needs_rounding = false; | 109 needs_rounding = false; |
93 | 110 } else if( dividend->Opcode() == Op_AndI ) { |
94 // An AND mask of sufficient size clears the low bits and | 111 // An AND mask of sufficient size clears the low bits and |
95 // I can avoid rounding. | 112 // I can avoid rounding. |
96 else if( dividend->Opcode() == Op_AndI ) { | |
97 const TypeInt *andconi = phase->type( dividend->in(2) )->isa_int(); | 113 const TypeInt *andconi = phase->type( dividend->in(2) )->isa_int(); |
98 if( andconi && andconi->is_con(-d) ) { | 114 if( andconi && andconi->is_con(-d) ) { |
99 dividend = dividend->in(1); | 115 dividend = dividend->in(1); |
100 needs_rounding = false; | 116 needs_rounding = false; |
101 } | 117 } |
102 } | 118 } |
103 | 119 |
104 // Add rounding to the shift to handle the sign bit | 120 // Add rounding to the shift to handle the sign bit |
105 if( needs_rounding ) { | 121 int l = log2_intptr(d-1)+1; |
106 Node *t1 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(l - 1))); | 122 if (needs_rounding) { |
107 Node *t2 = phase->transform(new (phase->C, 3) URShiftINode(t1, phase->intcon(N - l))); | 123 // Divide-by-power-of-2 can be made into a shift, but you have to do |
108 dividend = phase->transform(new (phase->C, 3) AddINode(dividend, t2)); | 124 // more math for the rounding. You need to add 0 for positive |
109 } | 125 // numbers, and "i-1" for negative numbers. Example: i=4, so the |
110 | 126 // shift is by 2. You need to add 3 to negative dividends and 0 to |
127 // positive ones. So (-7+3)>>2 becomes -1, (-4+3)>>2 becomes -1, | |
128 // (-2+3)>>2 becomes 0, etc. | |
129 | |
130 // Compute 0 or -1, based on sign bit | |
131 Node *sign = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N - 1))); | |
132 // Mask sign bit to the low sign bits | |
133 Node *round = phase->transform(new (phase->C, 3) URShiftINode(sign, phase->intcon(N - l))); | |
134 // Round up before shifting | |
135 dividend = phase->transform(new (phase->C, 3) AddINode(dividend, round)); | |
136 } | |
137 | |
138 // Shift for division | |
111 q = new (phase->C, 3) RShiftINode(dividend, phase->intcon(l)); | 139 q = new (phase->C, 3) RShiftINode(dividend, phase->intcon(l)); |
112 | 140 |
113 if (!d_pos) | 141 if (!d_pos) { |
114 q = new (phase->C, 3) SubINode(phase->intcon(0), phase->transform(q)); | 142 q = new (phase->C, 3) SubINode(phase->intcon(0), phase->transform(q)); |
115 } | 143 } |
116 | 144 } else { |
117 // division by something else | 145 // Attempt the jint constant divide -> multiply transform found in |
118 else if (m_high < (U1 << (N-1))) { | 146 // "Division by Invariant Integers using Multiplication" |
119 Node *t1 = phase->transform(new (phase->C, 2) ConvI2LNode(dividend)); | 147 // by Granlund and Montgomery |
120 Node *t2 = phase->transform(new (phase->C, 3) MulLNode(t1, phase->longcon(m_high))); | 148 // See also "Hacker's Delight", chapter 10 by Warren. |
121 Node *t3 = phase->transform(new (phase->C, 3) RShiftLNode(t2, phase->intcon(sh_post+N))); | 149 |
122 Node *t4 = phase->transform(new (phase->C, 2) ConvL2INode(t3)); | 150 jint magic_const; |
123 Node *t5 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N-1))); | 151 jint shift_const; |
124 | 152 if (magic_int_divide_constants(d, magic_const, shift_const)) { |
125 q = new (phase->C, 3) SubINode(d_pos ? t4 : t5, d_pos ? t5 : t4); | 153 Node *magic = phase->longcon(magic_const); |
126 } | 154 Node *dividend_long = phase->transform(new (phase->C, 2) ConvI2LNode(dividend)); |
127 | 155 |
128 // This handles that case where m_high is >= 2**(N-1). In that case, | 156 // Compute the high half of the dividend x magic multiplication |
129 // we subtract out 2**N from the multiply and add it in later as | 157 Node *mul_hi = phase->transform(new (phase->C, 3) MulLNode(dividend_long, magic)); |
130 // "dividend" in the equation (t5). This case computes the same result | 158 |
131 // as the immediately preceeding case, save that rounding and overflow | 159 if (magic_const < 0) { |
132 // are accounted for. | 160 mul_hi = phase->transform(new (phase->C, 3) RShiftLNode(mul_hi, phase->intcon(N))); |
133 else { | 161 mul_hi = phase->transform(new (phase->C, 2) ConvL2INode(mul_hi)); |
134 Node *t1 = phase->transform(new (phase->C, 2) ConvI2LNode(dividend)); | 162 |
135 Node *t2 = phase->transform(new (phase->C, 3) MulLNode(t1, phase->longcon(m_high - (U1 << N)))); | 163 // The magic multiplier is too large for a 32 bit constant. We've adjusted |
136 Node *t3 = phase->transform(new (phase->C, 3) RShiftLNode(t2, phase->intcon(N))); | 164 // it down by 2^32, but have to add 1 dividend back in after the multiplication. |
137 Node *t4 = phase->transform(new (phase->C, 2) ConvL2INode(t3)); | 165 // This handles the "overflow" case described by Granlund and Montgomery. |
138 Node *t5 = phase->transform(new (phase->C, 3) AddINode(dividend, t4)); | 166 mul_hi = phase->transform(new (phase->C, 3) AddINode(dividend, mul_hi)); |
139 Node *t6 = phase->transform(new (phase->C, 3) RShiftINode(t5, phase->intcon(sh_post))); | 167 |
140 Node *t7 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N-1))); | 168 // Shift over the (adjusted) mulhi |
141 | 169 if (shift_const != 0) { |
142 q = new (phase->C, 3) SubINode(d_pos ? t6 : t7, d_pos ? t7 : t6); | 170 mul_hi = phase->transform(new (phase->C, 3) RShiftINode(mul_hi, phase->intcon(shift_const))); |
143 } | 171 } |
144 | 172 } else { |
145 return (q); | 173 // No add is required, we can merge the shifts together. |
174 mul_hi = phase->transform(new (phase->C, 3) RShiftLNode(mul_hi, phase->intcon(N + shift_const))); | |
175 mul_hi = phase->transform(new (phase->C, 2) ConvL2INode(mul_hi)); | |
176 } | |
177 | |
178 // Get a 0 or -1 from the sign of the dividend. | |
179 Node *addend0 = mul_hi; | |
180 Node *addend1 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N-1))); | |
181 | |
182 // If the divisor is negative, swap the order of the input addends; | |
183 // this has the effect of negating the quotient. | |
184 if (!d_pos) { | |
185 Node *temp = addend0; addend0 = addend1; addend1 = temp; | |
186 } | |
187 | |
188 // Adjust the final quotient by subtracting -1 (adding 1) | |
189 // from the mul_hi. | |
190 q = new (phase->C, 3) SubINode(addend0, addend1); | |
191 } | |
192 } | |
193 | |
194 return q; | |
195 } | |
196 | |
197 //---------------------magic_long_divide_constants----------------------------- | |
198 // Compute magic multiplier and shift constant for converting a 64 bit divide | |
199 // by constant into a multiply/shift/add series. Return false if calculations | |
200 // fail. | |
201 // | |
202 // Borrowed almost verbatum from Hacker's Delight by Henry S. Warren, Jr. with | |
203 // minor type name and parameter changes. Adjusted to 64 bit word width. | |
204 static bool magic_long_divide_constants(jlong d, jlong &M, jint &s) { | |
205 int64_t p; | |
206 uint64_t ad, anc, delta, q1, r1, q2, r2, t; | |
207 const uint64_t two63 = 0x8000000000000000LL; // 2**63. | |
208 | |
209 ad = ABS(d); | |
210 if (d == 0 || d == 1) return false; | |
211 t = two63 + ((uint64_t)d >> 63); | |
212 anc = t - 1 - t%ad; // Absolute value of nc. | |
213 p = 63; // Init. p. | |
214 q1 = two63/anc; // Init. q1 = 2**p/|nc|. | |
215 r1 = two63 - q1*anc; // Init. r1 = rem(2**p, |nc|). | |
216 q2 = two63/ad; // Init. q2 = 2**p/|d|. | |
217 r2 = two63 - q2*ad; // Init. r2 = rem(2**p, |d|). | |
218 do { | |
219 p = p + 1; | |
220 q1 = 2*q1; // Update q1 = 2**p/|nc|. | |
221 r1 = 2*r1; // Update r1 = rem(2**p, |nc|). | |
222 if (r1 >= anc) { // (Must be an unsigned | |
223 q1 = q1 + 1; // comparison here). | |
224 r1 = r1 - anc; | |
225 } | |
226 q2 = 2*q2; // Update q2 = 2**p/|d|. | |
227 r2 = 2*r2; // Update r2 = rem(2**p, |d|). | |
228 if (r2 >= ad) { // (Must be an unsigned | |
229 q2 = q2 + 1; // comparison here). | |
230 r2 = r2 - ad; | |
231 } | |
232 delta = ad - r2; | |
233 } while (q1 < delta || (q1 == delta && r1 == 0)); | |
234 | |
235 M = q2 + 1; | |
236 if (d < 0) M = -M; // Magic number and | |
237 s = p - 64; // shift amount to return. | |
238 | |
239 return true; | |
240 } | |
241 | |
242 //---------------------long_by_long_mulhi-------------------------------------- | |
243 // Generate ideal node graph for upper half of a 64 bit x 64 bit multiplication | |
244 static Node *long_by_long_mulhi( PhaseGVN *phase, Node *dividend, jlong magic_const) { | |
245 // If the architecture supports a 64x64 mulhi, there is | |
246 // no need to synthesize it in ideal nodes. | |
247 if (Matcher::has_match_rule(Op_MulHiL)) { | |
248 Node *v = phase->longcon(magic_const); | |
249 return new (phase->C, 3) MulHiLNode(dividend, v); | |
250 } | |
251 | |
252 const int N = 64; | |
253 | |
254 Node *u_hi = phase->transform(new (phase->C, 3) RShiftLNode(dividend, phase->intcon(N / 2))); | |
255 Node *u_lo = phase->transform(new (phase->C, 3) AndLNode(dividend, phase->longcon(0xFFFFFFFF))); | |
256 | |
257 Node *v_hi = phase->longcon(magic_const >> N/2); | |
258 Node *v_lo = phase->longcon(magic_const & 0XFFFFFFFF); | |
259 | |
260 Node *hihi_product = phase->transform(new (phase->C, 3) MulLNode(u_hi, v_hi)); | |
261 Node *hilo_product = phase->transform(new (phase->C, 3) MulLNode(u_hi, v_lo)); | |
262 Node *lohi_product = phase->transform(new (phase->C, 3) MulLNode(u_lo, v_hi)); | |
263 Node *lolo_product = phase->transform(new (phase->C, 3) MulLNode(u_lo, v_lo)); | |
264 | |
265 Node *t1 = phase->transform(new (phase->C, 3) URShiftLNode(lolo_product, phase->intcon(N / 2))); | |
266 Node *t2 = phase->transform(new (phase->C, 3) AddLNode(hilo_product, t1)); | |
267 Node *t3 = phase->transform(new (phase->C, 3) RShiftLNode(t2, phase->intcon(N / 2))); | |
268 Node *t4 = phase->transform(new (phase->C, 3) AndLNode(t2, phase->longcon(0xFFFFFFFF))); | |
269 Node *t5 = phase->transform(new (phase->C, 3) AddLNode(t4, lohi_product)); | |
270 Node *t6 = phase->transform(new (phase->C, 3) RShiftLNode(t5, phase->intcon(N / 2))); | |
271 Node *t7 = phase->transform(new (phase->C, 3) AddLNode(t3, hihi_product)); | |
272 | |
273 return new (phase->C, 3) AddLNode(t7, t6); | |
274 } | |
275 | |
276 | |
277 //--------------------------transform_long_divide------------------------------ | |
278 // Convert a division by constant divisor into an alternate Ideal graph. | |
279 // Return NULL if no transformation occurs. | |
280 static Node *transform_long_divide( PhaseGVN *phase, Node *dividend, jlong divisor ) { | |
281 // Check for invalid divisors | |
282 assert( divisor != 0L && divisor != min_jlong, | |
283 "bad divisor for transforming to long multiply" ); | |
284 | |
285 bool d_pos = divisor >= 0; | |
286 jlong d = d_pos ? divisor : -divisor; | |
287 const int N = 64; | |
288 | |
289 // Result | |
290 Node *q = NULL; | |
291 | |
292 if (d == 1) { | |
293 // division by +/- 1 | |
294 if (!d_pos) { | |
295 // Just negate the value | |
296 q = new (phase->C, 3) SubLNode(phase->longcon(0), dividend); | |
297 } | |
298 } else if ( is_power_of_2_long(d) ) { | |
299 | |
300 // division by +/- a power of 2 | |
301 | |
302 // See if we can simply do a shift without rounding | |
303 bool needs_rounding = true; | |
304 const Type *dt = phase->type(dividend); | |
305 const TypeLong *dtl = dt->isa_long(); | |
306 | |
307 if (dtl && dtl->_lo > 0) { | |
308 // we don't need to round a positive dividend | |
309 needs_rounding = false; | |
310 } else if( dividend->Opcode() == Op_AndL ) { | |
311 // An AND mask of sufficient size clears the low bits and | |
312 // I can avoid rounding. | |
313 const TypeLong *andconl = phase->type( dividend->in(2) )->isa_long(); | |
314 if( andconl && andconl->is_con(-d)) { | |
315 dividend = dividend->in(1); | |
316 needs_rounding = false; | |
317 } | |
318 } | |
319 | |
320 // Add rounding to the shift to handle the sign bit | |
321 int l = log2_long(d-1)+1; | |
322 if (needs_rounding) { | |
323 // Divide-by-power-of-2 can be made into a shift, but you have to do | |
324 // more math for the rounding. You need to add 0 for positive | |
325 // numbers, and "i-1" for negative numbers. Example: i=4, so the | |
326 // shift is by 2. You need to add 3 to negative dividends and 0 to | |
327 // positive ones. So (-7+3)>>2 becomes -1, (-4+3)>>2 becomes -1, | |
328 // (-2+3)>>2 becomes 0, etc. | |
329 | |
330 // Compute 0 or -1, based on sign bit | |
331 Node *sign = phase->transform(new (phase->C, 3) RShiftLNode(dividend, phase->intcon(N - 1))); | |
332 // Mask sign bit to the low sign bits | |
333 Node *round = phase->transform(new (phase->C, 3) URShiftLNode(sign, phase->intcon(N - l))); | |
334 // Round up before shifting | |
335 dividend = phase->transform(new (phase->C, 3) AddLNode(dividend, round)); | |
336 } | |
337 | |
338 // Shift for division | |
339 q = new (phase->C, 3) RShiftLNode(dividend, phase->intcon(l)); | |
340 | |
341 if (!d_pos) { | |
342 q = new (phase->C, 3) SubLNode(phase->longcon(0), phase->transform(q)); | |
343 } | |
344 } else { | |
345 // Attempt the jlong constant divide -> multiply transform found in | |
346 // "Division by Invariant Integers using Multiplication" | |
347 // by Granlund and Montgomery | |
348 // See also "Hacker's Delight", chapter 10 by Warren. | |
349 | |
350 jlong magic_const; | |
351 jint shift_const; | |
352 if (magic_long_divide_constants(d, magic_const, shift_const)) { | |
353 // Compute the high half of the dividend x magic multiplication | |
354 Node *mul_hi = phase->transform(long_by_long_mulhi(phase, dividend, magic_const)); | |
355 | |
356 // The high half of the 128-bit multiply is computed. | |
357 if (magic_const < 0) { | |
358 // The magic multiplier is too large for a 64 bit constant. We've adjusted | |
359 // it down by 2^64, but have to add 1 dividend back in after the multiplication. | |
360 // This handles the "overflow" case described by Granlund and Montgomery. | |
361 mul_hi = phase->transform(new (phase->C, 3) AddLNode(dividend, mul_hi)); | |
362 } | |
363 | |
364 // Shift over the (adjusted) mulhi | |
365 if (shift_const != 0) { | |
366 mul_hi = phase->transform(new (phase->C, 3) RShiftLNode(mul_hi, phase->intcon(shift_const))); | |
367 } | |
368 | |
369 // Get a 0 or -1 from the sign of the dividend. | |
370 Node *addend0 = mul_hi; | |
371 Node *addend1 = phase->transform(new (phase->C, 3) RShiftLNode(dividend, phase->intcon(N-1))); | |
372 | |
373 // If the divisor is negative, swap the order of the input addends; | |
374 // this has the effect of negating the quotient. | |
375 if (!d_pos) { | |
376 Node *temp = addend0; addend0 = addend1; addend1 = temp; | |
377 } | |
378 | |
379 // Adjust the final quotient by subtracting -1 (adding 1) | |
380 // from the mul_hi. | |
381 q = new (phase->C, 3) SubLNode(addend0, addend1); | |
382 } | |
383 } | |
384 | |
385 return q; | |
146 } | 386 } |
147 | 387 |
148 //============================================================================= | 388 //============================================================================= |
149 //------------------------------Identity--------------------------------------- | 389 //------------------------------Identity--------------------------------------- |
150 // If the divisor is 1, we are an identity on the dividend. | 390 // If the divisor is 1, we are an identity on the dividend. |
162 return NULL; // Skip it | 402 return NULL; // Skip it |
163 | 403 |
164 const TypeInt *ti = t->isa_int(); | 404 const TypeInt *ti = t->isa_int(); |
165 if( !ti ) return NULL; | 405 if( !ti ) return NULL; |
166 if( !ti->is_con() ) return NULL; | 406 if( !ti->is_con() ) return NULL; |
167 int i = ti->get_con(); // Get divisor | 407 jint i = ti->get_con(); // Get divisor |
168 | 408 |
169 if (i == 0) return NULL; // Dividing by zero constant does not idealize | 409 if (i == 0) return NULL; // Dividing by zero constant does not idealize |
170 | 410 |
171 set_req(0,NULL); // Dividing by a not-zero constant; no faulting | 411 set_req(0,NULL); // Dividing by a not-zero constant; no faulting |
172 | 412 |
173 // Dividing by MININT does not optimize as a power-of-2 shift. | 413 // Dividing by MININT does not optimize as a power-of-2 shift. |
174 if( i == min_jint ) return NULL; | 414 if( i == min_jint ) return NULL; |
175 | 415 |
176 return transform_int_divide_to_long_multiply( phase, in(1), i ); | 416 return transform_int_divide( phase, in(1), i ); |
177 } | 417 } |
178 | 418 |
179 //------------------------------Value------------------------------------------ | 419 //------------------------------Value------------------------------------------ |
180 // A DivINode divides its inputs. The third input is a Control input, used to | 420 // A DivINode divides its inputs. The third input is a Control input, used to |
181 // prevent hoisting the divide above an unsafe test. | 421 // prevent hoisting the divide above an unsafe test. |
253 // Dividing by a power of 2 is a shift. | 493 // Dividing by a power of 2 is a shift. |
254 Node *DivLNode::Ideal( PhaseGVN *phase, bool can_reshape) { | 494 Node *DivLNode::Ideal( PhaseGVN *phase, bool can_reshape) { |
255 if (in(0) && remove_dead_region(phase, can_reshape)) return this; | 495 if (in(0) && remove_dead_region(phase, can_reshape)) return this; |
256 | 496 |
257 const Type *t = phase->type( in(2) ); | 497 const Type *t = phase->type( in(2) ); |
258 if( t == TypeLong::ONE ) // Identity? | 498 if( t == TypeLong::ONE ) // Identity? |
259 return NULL; // Skip it | 499 return NULL; // Skip it |
260 | 500 |
261 const TypeLong *ti = t->isa_long(); | 501 const TypeLong *tl = t->isa_long(); |
262 if( !ti ) return NULL; | 502 if( !tl ) return NULL; |
263 if( !ti->is_con() ) return NULL; | 503 if( !tl->is_con() ) return NULL; |
264 jlong i = ti->get_con(); // Get divisor | 504 jlong l = tl->get_con(); // Get divisor |
265 if( i ) set_req(0, NULL); // Dividing by a not-zero constant; no faulting | 505 |
506 if (l == 0) return NULL; // Dividing by zero constant does not idealize | |
507 | |
508 set_req(0,NULL); // Dividing by a not-zero constant; no faulting | |
266 | 509 |
267 // Dividing by MININT does not optimize as a power-of-2 shift. | 510 // Dividing by MININT does not optimize as a power-of-2 shift. |
268 if( i == min_jlong ) return NULL; | 511 if( l == min_jlong ) return NULL; |
269 | 512 |
270 // Check for negative power of 2 divisor, if so, negate it and set a flag | 513 return transform_long_divide( phase, in(1), l ); |
271 // to indicate result needs to be negated. Note that negating the dividend | |
272 // here does not work when it has the value MININT | |
273 Node *dividend = in(1); | |
274 bool negate_res = false; | |
275 if (is_power_of_2_long(-i)) { | |
276 i = -i; // Flip divisor | |
277 negate_res = true; | |
278 } | |
279 | |
280 // Check for power of 2 | |
281 if (!is_power_of_2_long(i)) // Is divisor a power of 2? | |
282 return NULL; // Not a power of 2 | |
283 | |
284 // Compute number of bits to shift | |
285 int log_i = log2_long(i); | |
286 | |
287 // See if we can simply do a shift without rounding | |
288 bool needs_rounding = true; | |
289 const Type *dt = phase->type(dividend); | |
290 const TypeLong *dtl = dt->isa_long(); | |
291 | |
292 if (dtl && dtl->_lo > 0) { | |
293 // we don't need to round a positive dividend | |
294 needs_rounding = false; | |
295 } else if( dividend->Opcode() == Op_AndL ) { | |
296 // An AND mask of sufficient size clears the low bits and | |
297 // I can avoid rounding. | |
298 const TypeLong *andconi = phase->type( dividend->in(2) )->isa_long(); | |
299 if( andconi && | |
300 andconi->is_con() && | |
301 andconi->get_con() == -i ) { | |
302 dividend = dividend->in(1); | |
303 needs_rounding = false; | |
304 } | |
305 } | |
306 | |
307 if (!needs_rounding) { | |
308 Node *result = new (phase->C, 3) RShiftLNode(dividend, phase->intcon(log_i)); | |
309 if (negate_res) { | |
310 result = phase->transform(result); | |
311 result = new (phase->C, 3) SubLNode(phase->longcon(0), result); | |
312 } | |
313 return result; | |
314 } | |
315 | |
316 // Divide-by-power-of-2 can be made into a shift, but you have to do | |
317 // more math for the rounding. You need to add 0 for positive | |
318 // numbers, and "i-1" for negative numbers. Example: i=4, so the | |
319 // shift is by 2. You need to add 3 to negative dividends and 0 to | |
320 // positive ones. So (-7+3)>>2 becomes -1, (-4+3)>>2 becomes -1, | |
321 // (-2+3)>>2 becomes 0, etc. | |
322 | |
323 // Compute 0 or -1, based on sign bit | |
324 Node *sign = phase->transform(new (phase->C, 3) RShiftLNode(dividend,phase->intcon(63))); | |
325 // Mask sign bit to the low sign bits | |
326 Node *round = phase->transform(new (phase->C, 3) AndLNode(sign,phase->longcon(i-1))); | |
327 // Round up before shifting | |
328 Node *sum = phase->transform(new (phase->C, 3) AddLNode(dividend,round)); | |
329 // Shift for division | |
330 Node *result = new (phase->C, 3) RShiftLNode(sum, phase->intcon(log_i)); | |
331 if (negate_res) { | |
332 result = phase->transform(result); | |
333 result = new (phase->C, 3) SubLNode(phase->longcon(0), result); | |
334 } | |
335 | |
336 return result; | |
337 } | 514 } |
338 | 515 |
339 //------------------------------Value------------------------------------------ | 516 //------------------------------Value------------------------------------------ |
340 // A DivLNode divides its inputs. The third input is a Control input, used to | 517 // A DivLNode divides its inputs. The third input is a Control input, used to |
341 // prevent hoisting the divide above an unsafe test. | 518 // prevent hoisting the divide above an unsafe test. |
613 Node *divisor = in(2); // Also is mask | 790 Node *divisor = in(2); // Also is mask |
614 | 791 |
615 hook->init_req(0, x); // Add a use to x to prevent him from dying | 792 hook->init_req(0, x); // Add a use to x to prevent him from dying |
616 // Generate code to reduce X rapidly to nearly 2^k-1. | 793 // Generate code to reduce X rapidly to nearly 2^k-1. |
617 for( int i = 0; i < trip_count; i++ ) { | 794 for( int i = 0; i < trip_count; i++ ) { |
618 Node *xl = phase->transform( new (phase->C, 3) AndINode(x,divisor) ); | 795 Node *xl = phase->transform( new (phase->C, 3) AndINode(x,divisor) ); |
619 Node *xh = phase->transform( new (phase->C, 3) RShiftINode(x,phase->intcon(k)) ); // Must be signed | 796 Node *xh = phase->transform( new (phase->C, 3) RShiftINode(x,phase->intcon(k)) ); // Must be signed |
620 x = phase->transform( new (phase->C, 3) AddINode(xh,xl) ); | 797 x = phase->transform( new (phase->C, 3) AddINode(xh,xl) ); |
621 hook->set_req(0, x); | 798 hook->set_req(0, x); |
622 } | 799 } |
623 | 800 |
624 // Generate sign-fixup code. Was original value positive? | 801 // Generate sign-fixup code. Was original value positive? |
625 // int hack_res = (i >= 0) ? divisor : 1; | 802 // int hack_res = (i >= 0) ? divisor : 1; |
626 Node *cmp1 = phase->transform( new (phase->C, 3) CmpINode( in(1), phase->intcon(0) ) ); | 803 Node *cmp1 = phase->transform( new (phase->C, 3) CmpINode( in(1), phase->intcon(0) ) ); |
673 | 850 |
674 // Save in(1) so that it cannot be changed or deleted | 851 // Save in(1) so that it cannot be changed or deleted |
675 hook->init_req(0, in(1)); | 852 hook->init_req(0, in(1)); |
676 | 853 |
677 // Divide using the transform from DivI to MulL | 854 // Divide using the transform from DivI to MulL |
678 Node *divide = phase->transform( transform_int_divide_to_long_multiply( phase, in(1), pos_con ) ); | 855 Node *result = transform_int_divide( phase, in(1), pos_con ); |
679 | 856 if (result != NULL) { |
680 // Re-multiply, using a shift if this is a power of two | 857 Node *divide = phase->transform(result); |
681 Node *mult = NULL; | 858 |
682 | 859 // Re-multiply, using a shift if this is a power of two |
683 if( log2_con >= 0 ) | 860 Node *mult = NULL; |
684 mult = phase->transform( new (phase->C, 3) LShiftINode( divide, phase->intcon( log2_con ) ) ); | 861 |
685 else | 862 if( log2_con >= 0 ) |
686 mult = phase->transform( new (phase->C, 3) MulINode( divide, phase->intcon( pos_con ) ) ); | 863 mult = phase->transform( new (phase->C, 3) LShiftINode( divide, phase->intcon( log2_con ) ) ); |
687 | 864 else |
688 // Finally, subtract the multiplied divided value from the original | 865 mult = phase->transform( new (phase->C, 3) MulINode( divide, phase->intcon( pos_con ) ) ); |
689 Node *result = new (phase->C, 3) SubINode( in(1), mult ); | 866 |
867 // Finally, subtract the multiplied divided value from the original | |
868 result = new (phase->C, 3) SubINode( in(1), mult ); | |
869 } | |
690 | 870 |
691 // Now remove the bogus extra edges used to keep things alive | 871 // Now remove the bogus extra edges used to keep things alive |
692 if (can_reshape) { | 872 if (can_reshape) { |
693 phase->is_IterGVN()->remove_dead_node(hook); | 873 phase->is_IterGVN()->remove_dead_node(hook); |
694 } else { | 874 } else { |
746 if( remove_dead_region(phase, can_reshape) ) return this; | 926 if( remove_dead_region(phase, can_reshape) ) return this; |
747 | 927 |
748 // Get the modulus | 928 // Get the modulus |
749 const Type *t = phase->type( in(2) ); | 929 const Type *t = phase->type( in(2) ); |
750 if( t == Type::TOP ) return NULL; | 930 if( t == Type::TOP ) return NULL; |
751 const TypeLong *ti = t->is_long(); | 931 const TypeLong *tl = t->is_long(); |
752 | 932 |
753 // Check for useless control input | 933 // Check for useless control input |
754 // Check for excluding mod-zero case | 934 // Check for excluding mod-zero case |
755 if( in(0) && (ti->_hi < 0 || ti->_lo > 0) ) { | 935 if( in(0) && (tl->_hi < 0 || tl->_lo > 0) ) { |
756 set_req(0, NULL); // Yank control input | 936 set_req(0, NULL); // Yank control input |
757 return this; | 937 return this; |
758 } | 938 } |
759 | 939 |
760 // See if we are MOD'ing by 2^k or 2^k-1. | 940 // See if we are MOD'ing by 2^k or 2^k-1. |
761 if( !ti->is_con() ) return NULL; | 941 if( !tl->is_con() ) return NULL; |
762 jlong con = ti->get_con(); | 942 jlong con = tl->get_con(); |
763 bool m1 = false; | 943 |
764 if( !is_power_of_2_long(con) ) { // Not 2^k | 944 Node *hook = new (phase->C, 1) Node(1); |
765 if( !is_power_of_2_long(con+1) ) // Not 2^k-1? | |
766 return NULL; // No interesting mod hacks | |
767 m1 = true; // Found 2^k-1 | |
768 con++; // Convert to 2^k form | |
769 } | |
770 uint k = log2_long(con); // Extract k | |
771 | 945 |
772 // Expand mod | 946 // Expand mod |
773 if( !m1 ) { // Case 2^k | 947 if( con >= 0 && con < max_jlong && is_power_of_2_long(con+1) ) { |
774 } else { // Case 2^k-1 | 948 uint k = log2_long(con); // Extract k |
949 | |
775 // Basic algorithm by David Detlefs. See fastmod_long.java for gory details. | 950 // Basic algorithm by David Detlefs. See fastmod_long.java for gory details. |
776 // Used to help a popular random number generator which does a long-mod | 951 // Used to help a popular random number generator which does a long-mod |
777 // of 2^31-1 and shows up in SpecJBB and SciMark. | 952 // of 2^31-1 and shows up in SpecJBB and SciMark. |
778 static int unroll_factor[] = { 999, 999, 61, 30, 20, 15, 12, 10, 8, 7, 6, 6, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 /*past here we assume 1 forever*/}; | 953 static int unroll_factor[] = { 999, 999, 61, 30, 20, 15, 12, 10, 8, 7, 6, 6, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 /*past here we assume 1 forever*/}; |
779 int trip_count = 1; | 954 int trip_count = 1; |
780 if( k < ARRAY_SIZE(unroll_factor)) trip_count = unroll_factor[k]; | 955 if( k < ARRAY_SIZE(unroll_factor)) trip_count = unroll_factor[k]; |
781 if( trip_count > 4 ) return NULL; // Too much unrolling | 956 |
782 if (ConditionalMoveLimit == 0) return NULL; // cmov is required | 957 // If the unroll factor is not too large, and if conditional moves are |
783 | 958 // ok, then use this case |
784 Node *x = in(1); // Value being mod'd | 959 if( trip_count <= 5 && ConditionalMoveLimit != 0 ) { |
785 Node *divisor = in(2); // Also is mask | 960 Node *x = in(1); // Value being mod'd |
786 | 961 Node *divisor = in(2); // Also is mask |
787 Node *hook = new (phase->C, 1) Node(x); | 962 |
788 // Generate code to reduce X rapidly to nearly 2^k-1. | 963 hook->init_req(0, x); // Add a use to x to prevent him from dying |
789 for( int i = 0; i < trip_count; i++ ) { | 964 // Generate code to reduce X rapidly to nearly 2^k-1. |
965 for( int i = 0; i < trip_count; i++ ) { | |
790 Node *xl = phase->transform( new (phase->C, 3) AndLNode(x,divisor) ); | 966 Node *xl = phase->transform( new (phase->C, 3) AndLNode(x,divisor) ); |
791 Node *xh = phase->transform( new (phase->C, 3) RShiftLNode(x,phase->intcon(k)) ); // Must be signed | 967 Node *xh = phase->transform( new (phase->C, 3) RShiftLNode(x,phase->intcon(k)) ); // Must be signed |
792 x = phase->transform( new (phase->C, 3) AddLNode(xh,xl) ); | 968 x = phase->transform( new (phase->C, 3) AddLNode(xh,xl) ); |
793 hook->set_req(0, x); // Add a use to x to prevent him from dying | 969 hook->set_req(0, x); // Add a use to x to prevent him from dying |
794 } | 970 } |
795 // Generate sign-fixup code. Was original value positive? | 971 |
796 // long hack_res = (i >= 0) ? divisor : CONST64(1); | 972 // Generate sign-fixup code. Was original value positive? |
797 Node *cmp1 = phase->transform( new (phase->C, 3) CmpLNode( in(1), phase->longcon(0) ) ); | 973 // long hack_res = (i >= 0) ? divisor : CONST64(1); |
798 Node *bol1 = phase->transform( new (phase->C, 2) BoolNode( cmp1, BoolTest::ge ) ); | 974 Node *cmp1 = phase->transform( new (phase->C, 3) CmpLNode( in(1), phase->longcon(0) ) ); |
799 Node *cmov1= phase->transform( new (phase->C, 4) CMoveLNode(bol1, phase->longcon(1), divisor, TypeLong::LONG) ); | 975 Node *bol1 = phase->transform( new (phase->C, 2) BoolNode( cmp1, BoolTest::ge ) ); |
800 // if( x >= hack_res ) x -= divisor; | 976 Node *cmov1= phase->transform( new (phase->C, 4) CMoveLNode(bol1, phase->longcon(1), divisor, TypeLong::LONG) ); |
801 Node *sub = phase->transform( new (phase->C, 3) SubLNode( x, divisor ) ); | 977 // if( x >= hack_res ) x -= divisor; |
802 Node *cmp2 = phase->transform( new (phase->C, 3) CmpLNode( x, cmov1 ) ); | 978 Node *sub = phase->transform( new (phase->C, 3) SubLNode( x, divisor ) ); |
803 Node *bol2 = phase->transform( new (phase->C, 2) BoolNode( cmp2, BoolTest::ge ) ); | 979 Node *cmp2 = phase->transform( new (phase->C, 3) CmpLNode( x, cmov1 ) ); |
804 // Convention is to not transform the return value of an Ideal | 980 Node *bol2 = phase->transform( new (phase->C, 2) BoolNode( cmp2, BoolTest::ge ) ); |
805 // since Ideal is expected to return a modified 'this' or a new node. | 981 // Convention is to not transform the return value of an Ideal |
806 Node *cmov2= new (phase->C, 4) CMoveLNode(bol2, x, sub, TypeLong::LONG); | 982 // since Ideal is expected to return a modified 'this' or a new node. |
807 // cmov2 is now the mod | 983 Node *cmov2= new (phase->C, 4) CMoveLNode(bol2, x, sub, TypeLong::LONG); |
808 | 984 // cmov2 is now the mod |
809 // Now remove the bogus extra edges used to keep things alive | 985 |
810 if (can_reshape) { | 986 // Now remove the bogus extra edges used to keep things alive |
811 phase->is_IterGVN()->remove_dead_node(hook); | 987 if (can_reshape) { |
812 } else { | 988 phase->is_IterGVN()->remove_dead_node(hook); |
813 hook->set_req(0, NULL); // Just yank bogus edge during Parse phase | 989 } else { |
814 } | 990 hook->set_req(0, NULL); // Just yank bogus edge during Parse phase |
815 return cmov2; | 991 } |
816 } | 992 return cmov2; |
817 return NULL; | 993 } |
994 } | |
995 | |
996 // Fell thru, the unroll case is not appropriate. Transform the modulo | |
997 // into a long multiply/int multiply/subtract case | |
998 | |
999 // Cannot handle mod 0, and min_jint isn't handled by the transform | |
1000 if( con == 0 || con == min_jlong ) return NULL; | |
1001 | |
1002 // Get the absolute value of the constant; at this point, we can use this | |
1003 jlong pos_con = (con >= 0) ? con : -con; | |
1004 | |
1005 // integer Mod 1 is always 0 | |
1006 if( pos_con == 1 ) return new (phase->C, 1) ConLNode(TypeLong::ZERO); | |
1007 | |
1008 int log2_con = -1; | |
1009 | |
1010 // If this is a power of two, they maybe we can mask it | |
1011 if( is_power_of_2_long(pos_con) ) { | |
1012 log2_con = log2_long(pos_con); | |
1013 | |
1014 const Type *dt = phase->type(in(1)); | |
1015 const TypeLong *dtl = dt->isa_long(); | |
1016 | |
1017 // See if this can be masked, if the dividend is non-negative | |
1018 if( dtl && dtl->_lo >= 0 ) | |
1019 return ( new (phase->C, 3) AndLNode( in(1), phase->longcon( pos_con-1 ) ) ); | |
1020 } | |
1021 | |
1022 // Save in(1) so that it cannot be changed or deleted | |
1023 hook->init_req(0, in(1)); | |
1024 | |
1025 // Divide using the transform from DivI to MulL | |
1026 Node *result = transform_long_divide( phase, in(1), pos_con ); | |
1027 if (result != NULL) { | |
1028 Node *divide = phase->transform(result); | |
1029 | |
1030 // Re-multiply, using a shift if this is a power of two | |
1031 Node *mult = NULL; | |
1032 | |
1033 if( log2_con >= 0 ) | |
1034 mult = phase->transform( new (phase->C, 3) LShiftLNode( divide, phase->intcon( log2_con ) ) ); | |
1035 else | |
1036 mult = phase->transform( new (phase->C, 3) MulLNode( divide, phase->longcon( pos_con ) ) ); | |
1037 | |
1038 // Finally, subtract the multiplied divided value from the original | |
1039 result = new (phase->C, 3) SubLNode( in(1), mult ); | |
1040 } | |
1041 | |
1042 // Now remove the bogus extra edges used to keep things alive | |
1043 if (can_reshape) { | |
1044 phase->is_IterGVN()->remove_dead_node(hook); | |
1045 } else { | |
1046 hook->set_req(0, NULL); // Just yank bogus edge during Parse phase | |
1047 } | |
1048 | |
1049 // return the value | |
1050 return result; | |
818 } | 1051 } |
819 | 1052 |
820 //------------------------------Value------------------------------------------ | 1053 //------------------------------Value------------------------------------------ |
821 const Type *ModLNode::Value( PhaseTransform *phase ) const { | 1054 const Type *ModLNode::Value( PhaseTransform *phase ) const { |
822 // Either input is TOP ==> the result is TOP | 1055 // Either input is TOP ==> the result is TOP |