comparison src/share/vm/opto/divnode.cpp @ 145:f3de1255b035

6603011: RFE: Optimize long division Summary: Transform long division by constant into multiply Reviewed-by: never, kvn
author rasbold
date Wed, 07 May 2008 08:06:46 -0700
parents 6e825ad773c6
children d1605aabd0a1
comparison
equal deleted inserted replaced
132:60b728ec77c1 145:f3de1255b035
28 28
29 #include "incls/_precompiled.incl" 29 #include "incls/_precompiled.incl"
30 #include "incls/_divnode.cpp.incl" 30 #include "incls/_divnode.cpp.incl"
31 #include <math.h> 31 #include <math.h>
32 32
33 // Implement the integer constant divide -> long multiply transform found in 33 //----------------------magic_int_divide_constants-----------------------------
34 // "Division by Invariant Integers using Multiplication" 34 // Compute magic multiplier and shift constant for converting a 32 bit divide
35 // by Granlund and Montgomery 35 // by constant into a multiply/shift/add series. Return false if calculations
36 static Node *transform_int_divide_to_long_multiply( PhaseGVN *phase, Node *dividend, int divisor ) { 36 // fail.
37 //
38 // Borrowed almost verbatum from Hacker's Delight by Henry S. Warren, Jr. with
39 // minor type name and parameter changes.
40 static bool magic_int_divide_constants(jint d, jint &M, jint &s) {
41 int32_t p;
42 uint32_t ad, anc, delta, q1, r1, q2, r2, t;
43 const uint32_t two31 = 0x80000000L; // 2**31.
44
45 ad = ABS(d);
46 if (d == 0 || d == 1) return false;
47 t = two31 + ((uint32_t)d >> 31);
48 anc = t - 1 - t%ad; // Absolute value of nc.
49 p = 31; // Init. p.
50 q1 = two31/anc; // Init. q1 = 2**p/|nc|.
51 r1 = two31 - q1*anc; // Init. r1 = rem(2**p, |nc|).
52 q2 = two31/ad; // Init. q2 = 2**p/|d|.
53 r2 = two31 - q2*ad; // Init. r2 = rem(2**p, |d|).
54 do {
55 p = p + 1;
56 q1 = 2*q1; // Update q1 = 2**p/|nc|.
57 r1 = 2*r1; // Update r1 = rem(2**p, |nc|).
58 if (r1 >= anc) { // (Must be an unsigned
59 q1 = q1 + 1; // comparison here).
60 r1 = r1 - anc;
61 }
62 q2 = 2*q2; // Update q2 = 2**p/|d|.
63 r2 = 2*r2; // Update r2 = rem(2**p, |d|).
64 if (r2 >= ad) { // (Must be an unsigned
65 q2 = q2 + 1; // comparison here).
66 r2 = r2 - ad;
67 }
68 delta = ad - r2;
69 } while (q1 < delta || (q1 == delta && r1 == 0));
70
71 M = q2 + 1;
72 if (d < 0) M = -M; // Magic number and
73 s = p - 32; // shift amount to return.
74
75 return true;
76 }
77
78 //--------------------------transform_int_divide-------------------------------
79 // Convert a division by constant divisor into an alternate Ideal graph.
80 // Return NULL if no transformation occurs.
81 static Node *transform_int_divide( PhaseGVN *phase, Node *dividend, jint divisor ) {
37 82
38 // Check for invalid divisors 83 // Check for invalid divisors
39 assert( divisor != 0 && divisor != min_jint && divisor != 1, 84 assert( divisor != 0 && divisor != min_jint,
40 "bad divisor for transforming to long multiply" ); 85 "bad divisor for transforming to long multiply" );
41 86
42 // Compute l = ceiling(log2(d))
43 // presumes d is more likely small
44 bool d_pos = divisor >= 0; 87 bool d_pos = divisor >= 0;
45 int d = d_pos ? divisor : -divisor; 88 jint d = d_pos ? divisor : -divisor;
46 unsigned ud = (unsigned)d;
47 const int N = 32; 89 const int N = 32;
48 int l = log2_intptr(d-1)+1;
49 int sh_post = l;
50
51 const uint64_t U1 = (uint64_t)1;
52
53 // Cliff pointed out how to prevent overflow (from the paper)
54 uint64_t m_low = (((U1 << l) - ud) << N) / ud + (U1 << N);
55 uint64_t m_high = ((((U1 << l) - ud) << N) + (U1 << (l+1))) / ud + (U1 << N);
56
57 // Reduce to lowest terms
58 for ( ; sh_post > 0; sh_post-- ) {
59 uint64_t m_low_1 = m_low >> 1;
60 uint64_t m_high_1 = m_high >> 1;
61 if ( m_low_1 >= m_high_1 )
62 break;
63 m_low = m_low_1;
64 m_high = m_high_1;
65 }
66 90
67 // Result 91 // Result
68 Node *q; 92 Node *q = NULL;
69 93
70 // division by +/- 1
71 if (d == 1) { 94 if (d == 1) {
72 // Filtered out as identity above 95 // division by +/- 1
73 if (d_pos) 96 if (!d_pos) {
74 return NULL; 97 // Just negate the value
75
76 // Just negate the value
77 else {
78 q = new (phase->C, 3) SubINode(phase->intcon(0), dividend); 98 q = new (phase->C, 3) SubINode(phase->intcon(0), dividend);
79 } 99 }
80 } 100 } else if ( is_power_of_2(d) ) {
81 101 // division by +/- a power of 2
82 // division by +/- a power of 2
83 else if ( is_power_of_2(d) ) {
84 102
85 // See if we can simply do a shift without rounding 103 // See if we can simply do a shift without rounding
86 bool needs_rounding = true; 104 bool needs_rounding = true;
87 const Type *dt = phase->type(dividend); 105 const Type *dt = phase->type(dividend);
88 const TypeInt *dti = dt->isa_int(); 106 const TypeInt *dti = dt->isa_int();
89 107 if (dti && dti->_lo >= 0) {
90 // we don't need to round a positive dividend 108 // we don't need to round a positive dividend
91 if (dti && dti->_lo >= 0)
92 needs_rounding = false; 109 needs_rounding = false;
93 110 } else if( dividend->Opcode() == Op_AndI ) {
94 // An AND mask of sufficient size clears the low bits and 111 // An AND mask of sufficient size clears the low bits and
95 // I can avoid rounding. 112 // I can avoid rounding.
96 else if( dividend->Opcode() == Op_AndI ) {
97 const TypeInt *andconi = phase->type( dividend->in(2) )->isa_int(); 113 const TypeInt *andconi = phase->type( dividend->in(2) )->isa_int();
98 if( andconi && andconi->is_con(-d) ) { 114 if( andconi && andconi->is_con(-d) ) {
99 dividend = dividend->in(1); 115 dividend = dividend->in(1);
100 needs_rounding = false; 116 needs_rounding = false;
101 } 117 }
102 } 118 }
103 119
104 // Add rounding to the shift to handle the sign bit 120 // Add rounding to the shift to handle the sign bit
105 if( needs_rounding ) { 121 int l = log2_intptr(d-1)+1;
106 Node *t1 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(l - 1))); 122 if (needs_rounding) {
107 Node *t2 = phase->transform(new (phase->C, 3) URShiftINode(t1, phase->intcon(N - l))); 123 // Divide-by-power-of-2 can be made into a shift, but you have to do
108 dividend = phase->transform(new (phase->C, 3) AddINode(dividend, t2)); 124 // more math for the rounding. You need to add 0 for positive
109 } 125 // numbers, and "i-1" for negative numbers. Example: i=4, so the
110 126 // shift is by 2. You need to add 3 to negative dividends and 0 to
127 // positive ones. So (-7+3)>>2 becomes -1, (-4+3)>>2 becomes -1,
128 // (-2+3)>>2 becomes 0, etc.
129
130 // Compute 0 or -1, based on sign bit
131 Node *sign = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N - 1)));
132 // Mask sign bit to the low sign bits
133 Node *round = phase->transform(new (phase->C, 3) URShiftINode(sign, phase->intcon(N - l)));
134 // Round up before shifting
135 dividend = phase->transform(new (phase->C, 3) AddINode(dividend, round));
136 }
137
138 // Shift for division
111 q = new (phase->C, 3) RShiftINode(dividend, phase->intcon(l)); 139 q = new (phase->C, 3) RShiftINode(dividend, phase->intcon(l));
112 140
113 if (!d_pos) 141 if (!d_pos) {
114 q = new (phase->C, 3) SubINode(phase->intcon(0), phase->transform(q)); 142 q = new (phase->C, 3) SubINode(phase->intcon(0), phase->transform(q));
115 } 143 }
116 144 } else {
117 // division by something else 145 // Attempt the jint constant divide -> multiply transform found in
118 else if (m_high < (U1 << (N-1))) { 146 // "Division by Invariant Integers using Multiplication"
119 Node *t1 = phase->transform(new (phase->C, 2) ConvI2LNode(dividend)); 147 // by Granlund and Montgomery
120 Node *t2 = phase->transform(new (phase->C, 3) MulLNode(t1, phase->longcon(m_high))); 148 // See also "Hacker's Delight", chapter 10 by Warren.
121 Node *t3 = phase->transform(new (phase->C, 3) RShiftLNode(t2, phase->intcon(sh_post+N))); 149
122 Node *t4 = phase->transform(new (phase->C, 2) ConvL2INode(t3)); 150 jint magic_const;
123 Node *t5 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N-1))); 151 jint shift_const;
124 152 if (magic_int_divide_constants(d, magic_const, shift_const)) {
125 q = new (phase->C, 3) SubINode(d_pos ? t4 : t5, d_pos ? t5 : t4); 153 Node *magic = phase->longcon(magic_const);
126 } 154 Node *dividend_long = phase->transform(new (phase->C, 2) ConvI2LNode(dividend));
127 155
128 // This handles that case where m_high is >= 2**(N-1). In that case, 156 // Compute the high half of the dividend x magic multiplication
129 // we subtract out 2**N from the multiply and add it in later as 157 Node *mul_hi = phase->transform(new (phase->C, 3) MulLNode(dividend_long, magic));
130 // "dividend" in the equation (t5). This case computes the same result 158
131 // as the immediately preceeding case, save that rounding and overflow 159 if (magic_const < 0) {
132 // are accounted for. 160 mul_hi = phase->transform(new (phase->C, 3) RShiftLNode(mul_hi, phase->intcon(N)));
133 else { 161 mul_hi = phase->transform(new (phase->C, 2) ConvL2INode(mul_hi));
134 Node *t1 = phase->transform(new (phase->C, 2) ConvI2LNode(dividend)); 162
135 Node *t2 = phase->transform(new (phase->C, 3) MulLNode(t1, phase->longcon(m_high - (U1 << N)))); 163 // The magic multiplier is too large for a 32 bit constant. We've adjusted
136 Node *t3 = phase->transform(new (phase->C, 3) RShiftLNode(t2, phase->intcon(N))); 164 // it down by 2^32, but have to add 1 dividend back in after the multiplication.
137 Node *t4 = phase->transform(new (phase->C, 2) ConvL2INode(t3)); 165 // This handles the "overflow" case described by Granlund and Montgomery.
138 Node *t5 = phase->transform(new (phase->C, 3) AddINode(dividend, t4)); 166 mul_hi = phase->transform(new (phase->C, 3) AddINode(dividend, mul_hi));
139 Node *t6 = phase->transform(new (phase->C, 3) RShiftINode(t5, phase->intcon(sh_post))); 167
140 Node *t7 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N-1))); 168 // Shift over the (adjusted) mulhi
141 169 if (shift_const != 0) {
142 q = new (phase->C, 3) SubINode(d_pos ? t6 : t7, d_pos ? t7 : t6); 170 mul_hi = phase->transform(new (phase->C, 3) RShiftINode(mul_hi, phase->intcon(shift_const)));
143 } 171 }
144 172 } else {
145 return (q); 173 // No add is required, we can merge the shifts together.
174 mul_hi = phase->transform(new (phase->C, 3) RShiftLNode(mul_hi, phase->intcon(N + shift_const)));
175 mul_hi = phase->transform(new (phase->C, 2) ConvL2INode(mul_hi));
176 }
177
178 // Get a 0 or -1 from the sign of the dividend.
179 Node *addend0 = mul_hi;
180 Node *addend1 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N-1)));
181
182 // If the divisor is negative, swap the order of the input addends;
183 // this has the effect of negating the quotient.
184 if (!d_pos) {
185 Node *temp = addend0; addend0 = addend1; addend1 = temp;
186 }
187
188 // Adjust the final quotient by subtracting -1 (adding 1)
189 // from the mul_hi.
190 q = new (phase->C, 3) SubINode(addend0, addend1);
191 }
192 }
193
194 return q;
195 }
196
197 //---------------------magic_long_divide_constants-----------------------------
198 // Compute magic multiplier and shift constant for converting a 64 bit divide
199 // by constant into a multiply/shift/add series. Return false if calculations
200 // fail.
201 //
202 // Borrowed almost verbatum from Hacker's Delight by Henry S. Warren, Jr. with
203 // minor type name and parameter changes. Adjusted to 64 bit word width.
204 static bool magic_long_divide_constants(jlong d, jlong &M, jint &s) {
205 int64_t p;
206 uint64_t ad, anc, delta, q1, r1, q2, r2, t;
207 const uint64_t two63 = 0x8000000000000000LL; // 2**63.
208
209 ad = ABS(d);
210 if (d == 0 || d == 1) return false;
211 t = two63 + ((uint64_t)d >> 63);
212 anc = t - 1 - t%ad; // Absolute value of nc.
213 p = 63; // Init. p.
214 q1 = two63/anc; // Init. q1 = 2**p/|nc|.
215 r1 = two63 - q1*anc; // Init. r1 = rem(2**p, |nc|).
216 q2 = two63/ad; // Init. q2 = 2**p/|d|.
217 r2 = two63 - q2*ad; // Init. r2 = rem(2**p, |d|).
218 do {
219 p = p + 1;
220 q1 = 2*q1; // Update q1 = 2**p/|nc|.
221 r1 = 2*r1; // Update r1 = rem(2**p, |nc|).
222 if (r1 >= anc) { // (Must be an unsigned
223 q1 = q1 + 1; // comparison here).
224 r1 = r1 - anc;
225 }
226 q2 = 2*q2; // Update q2 = 2**p/|d|.
227 r2 = 2*r2; // Update r2 = rem(2**p, |d|).
228 if (r2 >= ad) { // (Must be an unsigned
229 q2 = q2 + 1; // comparison here).
230 r2 = r2 - ad;
231 }
232 delta = ad - r2;
233 } while (q1 < delta || (q1 == delta && r1 == 0));
234
235 M = q2 + 1;
236 if (d < 0) M = -M; // Magic number and
237 s = p - 64; // shift amount to return.
238
239 return true;
240 }
241
242 //---------------------long_by_long_mulhi--------------------------------------
243 // Generate ideal node graph for upper half of a 64 bit x 64 bit multiplication
244 static Node *long_by_long_mulhi( PhaseGVN *phase, Node *dividend, jlong magic_const) {
245 // If the architecture supports a 64x64 mulhi, there is
246 // no need to synthesize it in ideal nodes.
247 if (Matcher::has_match_rule(Op_MulHiL)) {
248 Node *v = phase->longcon(magic_const);
249 return new (phase->C, 3) MulHiLNode(dividend, v);
250 }
251
252 const int N = 64;
253
254 Node *u_hi = phase->transform(new (phase->C, 3) RShiftLNode(dividend, phase->intcon(N / 2)));
255 Node *u_lo = phase->transform(new (phase->C, 3) AndLNode(dividend, phase->longcon(0xFFFFFFFF)));
256
257 Node *v_hi = phase->longcon(magic_const >> N/2);
258 Node *v_lo = phase->longcon(magic_const & 0XFFFFFFFF);
259
260 Node *hihi_product = phase->transform(new (phase->C, 3) MulLNode(u_hi, v_hi));
261 Node *hilo_product = phase->transform(new (phase->C, 3) MulLNode(u_hi, v_lo));
262 Node *lohi_product = phase->transform(new (phase->C, 3) MulLNode(u_lo, v_hi));
263 Node *lolo_product = phase->transform(new (phase->C, 3) MulLNode(u_lo, v_lo));
264
265 Node *t1 = phase->transform(new (phase->C, 3) URShiftLNode(lolo_product, phase->intcon(N / 2)));
266 Node *t2 = phase->transform(new (phase->C, 3) AddLNode(hilo_product, t1));
267 Node *t3 = phase->transform(new (phase->C, 3) RShiftLNode(t2, phase->intcon(N / 2)));
268 Node *t4 = phase->transform(new (phase->C, 3) AndLNode(t2, phase->longcon(0xFFFFFFFF)));
269 Node *t5 = phase->transform(new (phase->C, 3) AddLNode(t4, lohi_product));
270 Node *t6 = phase->transform(new (phase->C, 3) RShiftLNode(t5, phase->intcon(N / 2)));
271 Node *t7 = phase->transform(new (phase->C, 3) AddLNode(t3, hihi_product));
272
273 return new (phase->C, 3) AddLNode(t7, t6);
274 }
275
276
277 //--------------------------transform_long_divide------------------------------
278 // Convert a division by constant divisor into an alternate Ideal graph.
279 // Return NULL if no transformation occurs.
280 static Node *transform_long_divide( PhaseGVN *phase, Node *dividend, jlong divisor ) {
281 // Check for invalid divisors
282 assert( divisor != 0L && divisor != min_jlong,
283 "bad divisor for transforming to long multiply" );
284
285 bool d_pos = divisor >= 0;
286 jlong d = d_pos ? divisor : -divisor;
287 const int N = 64;
288
289 // Result
290 Node *q = NULL;
291
292 if (d == 1) {
293 // division by +/- 1
294 if (!d_pos) {
295 // Just negate the value
296 q = new (phase->C, 3) SubLNode(phase->longcon(0), dividend);
297 }
298 } else if ( is_power_of_2_long(d) ) {
299
300 // division by +/- a power of 2
301
302 // See if we can simply do a shift without rounding
303 bool needs_rounding = true;
304 const Type *dt = phase->type(dividend);
305 const TypeLong *dtl = dt->isa_long();
306
307 if (dtl && dtl->_lo > 0) {
308 // we don't need to round a positive dividend
309 needs_rounding = false;
310 } else if( dividend->Opcode() == Op_AndL ) {
311 // An AND mask of sufficient size clears the low bits and
312 // I can avoid rounding.
313 const TypeLong *andconl = phase->type( dividend->in(2) )->isa_long();
314 if( andconl && andconl->is_con(-d)) {
315 dividend = dividend->in(1);
316 needs_rounding = false;
317 }
318 }
319
320 // Add rounding to the shift to handle the sign bit
321 int l = log2_long(d-1)+1;
322 if (needs_rounding) {
323 // Divide-by-power-of-2 can be made into a shift, but you have to do
324 // more math for the rounding. You need to add 0 for positive
325 // numbers, and "i-1" for negative numbers. Example: i=4, so the
326 // shift is by 2. You need to add 3 to negative dividends and 0 to
327 // positive ones. So (-7+3)>>2 becomes -1, (-4+3)>>2 becomes -1,
328 // (-2+3)>>2 becomes 0, etc.
329
330 // Compute 0 or -1, based on sign bit
331 Node *sign = phase->transform(new (phase->C, 3) RShiftLNode(dividend, phase->intcon(N - 1)));
332 // Mask sign bit to the low sign bits
333 Node *round = phase->transform(new (phase->C, 3) URShiftLNode(sign, phase->intcon(N - l)));
334 // Round up before shifting
335 dividend = phase->transform(new (phase->C, 3) AddLNode(dividend, round));
336 }
337
338 // Shift for division
339 q = new (phase->C, 3) RShiftLNode(dividend, phase->intcon(l));
340
341 if (!d_pos) {
342 q = new (phase->C, 3) SubLNode(phase->longcon(0), phase->transform(q));
343 }
344 } else {
345 // Attempt the jlong constant divide -> multiply transform found in
346 // "Division by Invariant Integers using Multiplication"
347 // by Granlund and Montgomery
348 // See also "Hacker's Delight", chapter 10 by Warren.
349
350 jlong magic_const;
351 jint shift_const;
352 if (magic_long_divide_constants(d, magic_const, shift_const)) {
353 // Compute the high half of the dividend x magic multiplication
354 Node *mul_hi = phase->transform(long_by_long_mulhi(phase, dividend, magic_const));
355
356 // The high half of the 128-bit multiply is computed.
357 if (magic_const < 0) {
358 // The magic multiplier is too large for a 64 bit constant. We've adjusted
359 // it down by 2^64, but have to add 1 dividend back in after the multiplication.
360 // This handles the "overflow" case described by Granlund and Montgomery.
361 mul_hi = phase->transform(new (phase->C, 3) AddLNode(dividend, mul_hi));
362 }
363
364 // Shift over the (adjusted) mulhi
365 if (shift_const != 0) {
366 mul_hi = phase->transform(new (phase->C, 3) RShiftLNode(mul_hi, phase->intcon(shift_const)));
367 }
368
369 // Get a 0 or -1 from the sign of the dividend.
370 Node *addend0 = mul_hi;
371 Node *addend1 = phase->transform(new (phase->C, 3) RShiftLNode(dividend, phase->intcon(N-1)));
372
373 // If the divisor is negative, swap the order of the input addends;
374 // this has the effect of negating the quotient.
375 if (!d_pos) {
376 Node *temp = addend0; addend0 = addend1; addend1 = temp;
377 }
378
379 // Adjust the final quotient by subtracting -1 (adding 1)
380 // from the mul_hi.
381 q = new (phase->C, 3) SubLNode(addend0, addend1);
382 }
383 }
384
385 return q;
146 } 386 }
147 387
148 //============================================================================= 388 //=============================================================================
149 //------------------------------Identity--------------------------------------- 389 //------------------------------Identity---------------------------------------
150 // If the divisor is 1, we are an identity on the dividend. 390 // If the divisor is 1, we are an identity on the dividend.
162 return NULL; // Skip it 402 return NULL; // Skip it
163 403
164 const TypeInt *ti = t->isa_int(); 404 const TypeInt *ti = t->isa_int();
165 if( !ti ) return NULL; 405 if( !ti ) return NULL;
166 if( !ti->is_con() ) return NULL; 406 if( !ti->is_con() ) return NULL;
167 int i = ti->get_con(); // Get divisor 407 jint i = ti->get_con(); // Get divisor
168 408
169 if (i == 0) return NULL; // Dividing by zero constant does not idealize 409 if (i == 0) return NULL; // Dividing by zero constant does not idealize
170 410
171 set_req(0,NULL); // Dividing by a not-zero constant; no faulting 411 set_req(0,NULL); // Dividing by a not-zero constant; no faulting
172 412
173 // Dividing by MININT does not optimize as a power-of-2 shift. 413 // Dividing by MININT does not optimize as a power-of-2 shift.
174 if( i == min_jint ) return NULL; 414 if( i == min_jint ) return NULL;
175 415
176 return transform_int_divide_to_long_multiply( phase, in(1), i ); 416 return transform_int_divide( phase, in(1), i );
177 } 417 }
178 418
179 //------------------------------Value------------------------------------------ 419 //------------------------------Value------------------------------------------
180 // A DivINode divides its inputs. The third input is a Control input, used to 420 // A DivINode divides its inputs. The third input is a Control input, used to
181 // prevent hoisting the divide above an unsafe test. 421 // prevent hoisting the divide above an unsafe test.
253 // Dividing by a power of 2 is a shift. 493 // Dividing by a power of 2 is a shift.
254 Node *DivLNode::Ideal( PhaseGVN *phase, bool can_reshape) { 494 Node *DivLNode::Ideal( PhaseGVN *phase, bool can_reshape) {
255 if (in(0) && remove_dead_region(phase, can_reshape)) return this; 495 if (in(0) && remove_dead_region(phase, can_reshape)) return this;
256 496
257 const Type *t = phase->type( in(2) ); 497 const Type *t = phase->type( in(2) );
258 if( t == TypeLong::ONE ) // Identity? 498 if( t == TypeLong::ONE ) // Identity?
259 return NULL; // Skip it 499 return NULL; // Skip it
260 500
261 const TypeLong *ti = t->isa_long(); 501 const TypeLong *tl = t->isa_long();
262 if( !ti ) return NULL; 502 if( !tl ) return NULL;
263 if( !ti->is_con() ) return NULL; 503 if( !tl->is_con() ) return NULL;
264 jlong i = ti->get_con(); // Get divisor 504 jlong l = tl->get_con(); // Get divisor
265 if( i ) set_req(0, NULL); // Dividing by a not-zero constant; no faulting 505
506 if (l == 0) return NULL; // Dividing by zero constant does not idealize
507
508 set_req(0,NULL); // Dividing by a not-zero constant; no faulting
266 509
267 // Dividing by MININT does not optimize as a power-of-2 shift. 510 // Dividing by MININT does not optimize as a power-of-2 shift.
268 if( i == min_jlong ) return NULL; 511 if( l == min_jlong ) return NULL;
269 512
270 // Check for negative power of 2 divisor, if so, negate it and set a flag 513 return transform_long_divide( phase, in(1), l );
271 // to indicate result needs to be negated. Note that negating the dividend
272 // here does not work when it has the value MININT
273 Node *dividend = in(1);
274 bool negate_res = false;
275 if (is_power_of_2_long(-i)) {
276 i = -i; // Flip divisor
277 negate_res = true;
278 }
279
280 // Check for power of 2
281 if (!is_power_of_2_long(i)) // Is divisor a power of 2?
282 return NULL; // Not a power of 2
283
284 // Compute number of bits to shift
285 int log_i = log2_long(i);
286
287 // See if we can simply do a shift without rounding
288 bool needs_rounding = true;
289 const Type *dt = phase->type(dividend);
290 const TypeLong *dtl = dt->isa_long();
291
292 if (dtl && dtl->_lo > 0) {
293 // we don't need to round a positive dividend
294 needs_rounding = false;
295 } else if( dividend->Opcode() == Op_AndL ) {
296 // An AND mask of sufficient size clears the low bits and
297 // I can avoid rounding.
298 const TypeLong *andconi = phase->type( dividend->in(2) )->isa_long();
299 if( andconi &&
300 andconi->is_con() &&
301 andconi->get_con() == -i ) {
302 dividend = dividend->in(1);
303 needs_rounding = false;
304 }
305 }
306
307 if (!needs_rounding) {
308 Node *result = new (phase->C, 3) RShiftLNode(dividend, phase->intcon(log_i));
309 if (negate_res) {
310 result = phase->transform(result);
311 result = new (phase->C, 3) SubLNode(phase->longcon(0), result);
312 }
313 return result;
314 }
315
316 // Divide-by-power-of-2 can be made into a shift, but you have to do
317 // more math for the rounding. You need to add 0 for positive
318 // numbers, and "i-1" for negative numbers. Example: i=4, so the
319 // shift is by 2. You need to add 3 to negative dividends and 0 to
320 // positive ones. So (-7+3)>>2 becomes -1, (-4+3)>>2 becomes -1,
321 // (-2+3)>>2 becomes 0, etc.
322
323 // Compute 0 or -1, based on sign bit
324 Node *sign = phase->transform(new (phase->C, 3) RShiftLNode(dividend,phase->intcon(63)));
325 // Mask sign bit to the low sign bits
326 Node *round = phase->transform(new (phase->C, 3) AndLNode(sign,phase->longcon(i-1)));
327 // Round up before shifting
328 Node *sum = phase->transform(new (phase->C, 3) AddLNode(dividend,round));
329 // Shift for division
330 Node *result = new (phase->C, 3) RShiftLNode(sum, phase->intcon(log_i));
331 if (negate_res) {
332 result = phase->transform(result);
333 result = new (phase->C, 3) SubLNode(phase->longcon(0), result);
334 }
335
336 return result;
337 } 514 }
338 515
339 //------------------------------Value------------------------------------------ 516 //------------------------------Value------------------------------------------
340 // A DivLNode divides its inputs. The third input is a Control input, used to 517 // A DivLNode divides its inputs. The third input is a Control input, used to
341 // prevent hoisting the divide above an unsafe test. 518 // prevent hoisting the divide above an unsafe test.
613 Node *divisor = in(2); // Also is mask 790 Node *divisor = in(2); // Also is mask
614 791
615 hook->init_req(0, x); // Add a use to x to prevent him from dying 792 hook->init_req(0, x); // Add a use to x to prevent him from dying
616 // Generate code to reduce X rapidly to nearly 2^k-1. 793 // Generate code to reduce X rapidly to nearly 2^k-1.
617 for( int i = 0; i < trip_count; i++ ) { 794 for( int i = 0; i < trip_count; i++ ) {
618 Node *xl = phase->transform( new (phase->C, 3) AndINode(x,divisor) ); 795 Node *xl = phase->transform( new (phase->C, 3) AndINode(x,divisor) );
619 Node *xh = phase->transform( new (phase->C, 3) RShiftINode(x,phase->intcon(k)) ); // Must be signed 796 Node *xh = phase->transform( new (phase->C, 3) RShiftINode(x,phase->intcon(k)) ); // Must be signed
620 x = phase->transform( new (phase->C, 3) AddINode(xh,xl) ); 797 x = phase->transform( new (phase->C, 3) AddINode(xh,xl) );
621 hook->set_req(0, x); 798 hook->set_req(0, x);
622 } 799 }
623 800
624 // Generate sign-fixup code. Was original value positive? 801 // Generate sign-fixup code. Was original value positive?
625 // int hack_res = (i >= 0) ? divisor : 1; 802 // int hack_res = (i >= 0) ? divisor : 1;
626 Node *cmp1 = phase->transform( new (phase->C, 3) CmpINode( in(1), phase->intcon(0) ) ); 803 Node *cmp1 = phase->transform( new (phase->C, 3) CmpINode( in(1), phase->intcon(0) ) );
673 850
674 // Save in(1) so that it cannot be changed or deleted 851 // Save in(1) so that it cannot be changed or deleted
675 hook->init_req(0, in(1)); 852 hook->init_req(0, in(1));
676 853
677 // Divide using the transform from DivI to MulL 854 // Divide using the transform from DivI to MulL
678 Node *divide = phase->transform( transform_int_divide_to_long_multiply( phase, in(1), pos_con ) ); 855 Node *result = transform_int_divide( phase, in(1), pos_con );
679 856 if (result != NULL) {
680 // Re-multiply, using a shift if this is a power of two 857 Node *divide = phase->transform(result);
681 Node *mult = NULL; 858
682 859 // Re-multiply, using a shift if this is a power of two
683 if( log2_con >= 0 ) 860 Node *mult = NULL;
684 mult = phase->transform( new (phase->C, 3) LShiftINode( divide, phase->intcon( log2_con ) ) ); 861
685 else 862 if( log2_con >= 0 )
686 mult = phase->transform( new (phase->C, 3) MulINode( divide, phase->intcon( pos_con ) ) ); 863 mult = phase->transform( new (phase->C, 3) LShiftINode( divide, phase->intcon( log2_con ) ) );
687 864 else
688 // Finally, subtract the multiplied divided value from the original 865 mult = phase->transform( new (phase->C, 3) MulINode( divide, phase->intcon( pos_con ) ) );
689 Node *result = new (phase->C, 3) SubINode( in(1), mult ); 866
867 // Finally, subtract the multiplied divided value from the original
868 result = new (phase->C, 3) SubINode( in(1), mult );
869 }
690 870
691 // Now remove the bogus extra edges used to keep things alive 871 // Now remove the bogus extra edges used to keep things alive
692 if (can_reshape) { 872 if (can_reshape) {
693 phase->is_IterGVN()->remove_dead_node(hook); 873 phase->is_IterGVN()->remove_dead_node(hook);
694 } else { 874 } else {
746 if( remove_dead_region(phase, can_reshape) ) return this; 926 if( remove_dead_region(phase, can_reshape) ) return this;
747 927
748 // Get the modulus 928 // Get the modulus
749 const Type *t = phase->type( in(2) ); 929 const Type *t = phase->type( in(2) );
750 if( t == Type::TOP ) return NULL; 930 if( t == Type::TOP ) return NULL;
751 const TypeLong *ti = t->is_long(); 931 const TypeLong *tl = t->is_long();
752 932
753 // Check for useless control input 933 // Check for useless control input
754 // Check for excluding mod-zero case 934 // Check for excluding mod-zero case
755 if( in(0) && (ti->_hi < 0 || ti->_lo > 0) ) { 935 if( in(0) && (tl->_hi < 0 || tl->_lo > 0) ) {
756 set_req(0, NULL); // Yank control input 936 set_req(0, NULL); // Yank control input
757 return this; 937 return this;
758 } 938 }
759 939
760 // See if we are MOD'ing by 2^k or 2^k-1. 940 // See if we are MOD'ing by 2^k or 2^k-1.
761 if( !ti->is_con() ) return NULL; 941 if( !tl->is_con() ) return NULL;
762 jlong con = ti->get_con(); 942 jlong con = tl->get_con();
763 bool m1 = false; 943
764 if( !is_power_of_2_long(con) ) { // Not 2^k 944 Node *hook = new (phase->C, 1) Node(1);
765 if( !is_power_of_2_long(con+1) ) // Not 2^k-1?
766 return NULL; // No interesting mod hacks
767 m1 = true; // Found 2^k-1
768 con++; // Convert to 2^k form
769 }
770 uint k = log2_long(con); // Extract k
771 945
772 // Expand mod 946 // Expand mod
773 if( !m1 ) { // Case 2^k 947 if( con >= 0 && con < max_jlong && is_power_of_2_long(con+1) ) {
774 } else { // Case 2^k-1 948 uint k = log2_long(con); // Extract k
949
775 // Basic algorithm by David Detlefs. See fastmod_long.java for gory details. 950 // Basic algorithm by David Detlefs. See fastmod_long.java for gory details.
776 // Used to help a popular random number generator which does a long-mod 951 // Used to help a popular random number generator which does a long-mod
777 // of 2^31-1 and shows up in SpecJBB and SciMark. 952 // of 2^31-1 and shows up in SpecJBB and SciMark.
778 static int unroll_factor[] = { 999, 999, 61, 30, 20, 15, 12, 10, 8, 7, 6, 6, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 /*past here we assume 1 forever*/}; 953 static int unroll_factor[] = { 999, 999, 61, 30, 20, 15, 12, 10, 8, 7, 6, 6, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 /*past here we assume 1 forever*/};
779 int trip_count = 1; 954 int trip_count = 1;
780 if( k < ARRAY_SIZE(unroll_factor)) trip_count = unroll_factor[k]; 955 if( k < ARRAY_SIZE(unroll_factor)) trip_count = unroll_factor[k];
781 if( trip_count > 4 ) return NULL; // Too much unrolling 956
782 if (ConditionalMoveLimit == 0) return NULL; // cmov is required 957 // If the unroll factor is not too large, and if conditional moves are
783 958 // ok, then use this case
784 Node *x = in(1); // Value being mod'd 959 if( trip_count <= 5 && ConditionalMoveLimit != 0 ) {
785 Node *divisor = in(2); // Also is mask 960 Node *x = in(1); // Value being mod'd
786 961 Node *divisor = in(2); // Also is mask
787 Node *hook = new (phase->C, 1) Node(x); 962
788 // Generate code to reduce X rapidly to nearly 2^k-1. 963 hook->init_req(0, x); // Add a use to x to prevent him from dying
789 for( int i = 0; i < trip_count; i++ ) { 964 // Generate code to reduce X rapidly to nearly 2^k-1.
965 for( int i = 0; i < trip_count; i++ ) {
790 Node *xl = phase->transform( new (phase->C, 3) AndLNode(x,divisor) ); 966 Node *xl = phase->transform( new (phase->C, 3) AndLNode(x,divisor) );
791 Node *xh = phase->transform( new (phase->C, 3) RShiftLNode(x,phase->intcon(k)) ); // Must be signed 967 Node *xh = phase->transform( new (phase->C, 3) RShiftLNode(x,phase->intcon(k)) ); // Must be signed
792 x = phase->transform( new (phase->C, 3) AddLNode(xh,xl) ); 968 x = phase->transform( new (phase->C, 3) AddLNode(xh,xl) );
793 hook->set_req(0, x); // Add a use to x to prevent him from dying 969 hook->set_req(0, x); // Add a use to x to prevent him from dying
794 } 970 }
795 // Generate sign-fixup code. Was original value positive? 971
796 // long hack_res = (i >= 0) ? divisor : CONST64(1); 972 // Generate sign-fixup code. Was original value positive?
797 Node *cmp1 = phase->transform( new (phase->C, 3) CmpLNode( in(1), phase->longcon(0) ) ); 973 // long hack_res = (i >= 0) ? divisor : CONST64(1);
798 Node *bol1 = phase->transform( new (phase->C, 2) BoolNode( cmp1, BoolTest::ge ) ); 974 Node *cmp1 = phase->transform( new (phase->C, 3) CmpLNode( in(1), phase->longcon(0) ) );
799 Node *cmov1= phase->transform( new (phase->C, 4) CMoveLNode(bol1, phase->longcon(1), divisor, TypeLong::LONG) ); 975 Node *bol1 = phase->transform( new (phase->C, 2) BoolNode( cmp1, BoolTest::ge ) );
800 // if( x >= hack_res ) x -= divisor; 976 Node *cmov1= phase->transform( new (phase->C, 4) CMoveLNode(bol1, phase->longcon(1), divisor, TypeLong::LONG) );
801 Node *sub = phase->transform( new (phase->C, 3) SubLNode( x, divisor ) ); 977 // if( x >= hack_res ) x -= divisor;
802 Node *cmp2 = phase->transform( new (phase->C, 3) CmpLNode( x, cmov1 ) ); 978 Node *sub = phase->transform( new (phase->C, 3) SubLNode( x, divisor ) );
803 Node *bol2 = phase->transform( new (phase->C, 2) BoolNode( cmp2, BoolTest::ge ) ); 979 Node *cmp2 = phase->transform( new (phase->C, 3) CmpLNode( x, cmov1 ) );
804 // Convention is to not transform the return value of an Ideal 980 Node *bol2 = phase->transform( new (phase->C, 2) BoolNode( cmp2, BoolTest::ge ) );
805 // since Ideal is expected to return a modified 'this' or a new node. 981 // Convention is to not transform the return value of an Ideal
806 Node *cmov2= new (phase->C, 4) CMoveLNode(bol2, x, sub, TypeLong::LONG); 982 // since Ideal is expected to return a modified 'this' or a new node.
807 // cmov2 is now the mod 983 Node *cmov2= new (phase->C, 4) CMoveLNode(bol2, x, sub, TypeLong::LONG);
808 984 // cmov2 is now the mod
809 // Now remove the bogus extra edges used to keep things alive 985
810 if (can_reshape) { 986 // Now remove the bogus extra edges used to keep things alive
811 phase->is_IterGVN()->remove_dead_node(hook); 987 if (can_reshape) {
812 } else { 988 phase->is_IterGVN()->remove_dead_node(hook);
813 hook->set_req(0, NULL); // Just yank bogus edge during Parse phase 989 } else {
814 } 990 hook->set_req(0, NULL); // Just yank bogus edge during Parse phase
815 return cmov2; 991 }
816 } 992 return cmov2;
817 return NULL; 993 }
994 }
995
996 // Fell thru, the unroll case is not appropriate. Transform the modulo
997 // into a long multiply/int multiply/subtract case
998
999 // Cannot handle mod 0, and min_jint isn't handled by the transform
1000 if( con == 0 || con == min_jlong ) return NULL;
1001
1002 // Get the absolute value of the constant; at this point, we can use this
1003 jlong pos_con = (con >= 0) ? con : -con;
1004
1005 // integer Mod 1 is always 0
1006 if( pos_con == 1 ) return new (phase->C, 1) ConLNode(TypeLong::ZERO);
1007
1008 int log2_con = -1;
1009
1010 // If this is a power of two, they maybe we can mask it
1011 if( is_power_of_2_long(pos_con) ) {
1012 log2_con = log2_long(pos_con);
1013
1014 const Type *dt = phase->type(in(1));
1015 const TypeLong *dtl = dt->isa_long();
1016
1017 // See if this can be masked, if the dividend is non-negative
1018 if( dtl && dtl->_lo >= 0 )
1019 return ( new (phase->C, 3) AndLNode( in(1), phase->longcon( pos_con-1 ) ) );
1020 }
1021
1022 // Save in(1) so that it cannot be changed or deleted
1023 hook->init_req(0, in(1));
1024
1025 // Divide using the transform from DivI to MulL
1026 Node *result = transform_long_divide( phase, in(1), pos_con );
1027 if (result != NULL) {
1028 Node *divide = phase->transform(result);
1029
1030 // Re-multiply, using a shift if this is a power of two
1031 Node *mult = NULL;
1032
1033 if( log2_con >= 0 )
1034 mult = phase->transform( new (phase->C, 3) LShiftLNode( divide, phase->intcon( log2_con ) ) );
1035 else
1036 mult = phase->transform( new (phase->C, 3) MulLNode( divide, phase->longcon( pos_con ) ) );
1037
1038 // Finally, subtract the multiplied divided value from the original
1039 result = new (phase->C, 3) SubLNode( in(1), mult );
1040 }
1041
1042 // Now remove the bogus extra edges used to keep things alive
1043 if (can_reshape) {
1044 phase->is_IterGVN()->remove_dead_node(hook);
1045 } else {
1046 hook->set_req(0, NULL); // Just yank bogus edge during Parse phase
1047 }
1048
1049 // return the value
1050 return result;
818 } 1051 }
819 1052
820 //------------------------------Value------------------------------------------ 1053 //------------------------------Value------------------------------------------
821 const Type *ModLNode::Value( PhaseTransform *phase ) const { 1054 const Type *ModLNode::Value( PhaseTransform *phase ) const {
822 // Either input is TOP ==> the result is TOP 1055 // Either input is TOP ==> the result is TOP