Mercurial > hg > truffle
annotate src/share/vm/opto/loopTransform.cpp @ 1721:413ad0331a0c
6977924: Changes for 6975078 produce build error with certain gcc versions
Summary: The changes introduced for 6975078 assign badHeapOopVal to the _allocation field in the ResourceObj class. In 32 bit linux builds with certain versions of gcc this assignment will be flagged as an error while compiling allocation.cpp. In 32 bit builds the constant value badHeapOopVal (which is cast to an intptr_t) is negative. The _allocation field is typed as an unsigned intptr_t and gcc catches this as an error.
Reviewed-by: jcoomes, ysr, phh
author | johnc |
---|---|
date | Wed, 18 Aug 2010 10:59:06 -0700 |
parents | 6027dddc26c6 |
children | d6f45b55c972 |
rev | line source |
---|---|
0 | 1 /* |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
2 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
25 #include "incls/_precompiled.incl" | |
26 #include "incls/_loopTransform.cpp.incl" | |
27 | |
28 //------------------------------is_loop_exit----------------------------------- | |
29 // Given an IfNode, return the loop-exiting projection or NULL if both | |
30 // arms remain in the loop. | |
31 Node *IdealLoopTree::is_loop_exit(Node *iff) const { | |
32 if( iff->outcnt() != 2 ) return NULL; // Ignore partially dead tests | |
33 PhaseIdealLoop *phase = _phase; | |
34 // Test is an IfNode, has 2 projections. If BOTH are in the loop | |
35 // we need loop unswitching instead of peeling. | |
36 if( !is_member(phase->get_loop( iff->raw_out(0) )) ) | |
37 return iff->raw_out(0); | |
38 if( !is_member(phase->get_loop( iff->raw_out(1) )) ) | |
39 return iff->raw_out(1); | |
40 return NULL; | |
41 } | |
42 | |
43 | |
44 //============================================================================= | |
45 | |
46 | |
47 //------------------------------record_for_igvn---------------------------- | |
48 // Put loop body on igvn work list | |
49 void IdealLoopTree::record_for_igvn() { | |
50 for( uint i = 0; i < _body.size(); i++ ) { | |
51 Node *n = _body.at(i); | |
52 _phase->_igvn._worklist.push(n); | |
53 } | |
54 } | |
55 | |
56 //------------------------------compute_profile_trip_cnt---------------------------- | |
57 // Compute loop trip count from profile data as | |
58 // (backedge_count + loop_exit_count) / loop_exit_count | |
59 void IdealLoopTree::compute_profile_trip_cnt( PhaseIdealLoop *phase ) { | |
60 if (!_head->is_CountedLoop()) { | |
61 return; | |
62 } | |
63 CountedLoopNode* head = _head->as_CountedLoop(); | |
64 if (head->profile_trip_cnt() != COUNT_UNKNOWN) { | |
65 return; // Already computed | |
66 } | |
67 float trip_cnt = (float)max_jint; // default is big | |
68 | |
69 Node* back = head->in(LoopNode::LoopBackControl); | |
70 while (back != head) { | |
71 if ((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) && | |
72 back->in(0) && | |
73 back->in(0)->is_If() && | |
74 back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN && | |
75 back->in(0)->as_If()->_prob != PROB_UNKNOWN) { | |
76 break; | |
77 } | |
78 back = phase->idom(back); | |
79 } | |
80 if (back != head) { | |
81 assert((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) && | |
82 back->in(0), "if-projection exists"); | |
83 IfNode* back_if = back->in(0)->as_If(); | |
84 float loop_back_cnt = back_if->_fcnt * back_if->_prob; | |
85 | |
86 // Now compute a loop exit count | |
87 float loop_exit_cnt = 0.0f; | |
88 for( uint i = 0; i < _body.size(); i++ ) { | |
89 Node *n = _body[i]; | |
90 if( n->is_If() ) { | |
91 IfNode *iff = n->as_If(); | |
92 if( iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN ) { | |
93 Node *exit = is_loop_exit(iff); | |
94 if( exit ) { | |
95 float exit_prob = iff->_prob; | |
96 if (exit->Opcode() == Op_IfFalse) exit_prob = 1.0 - exit_prob; | |
97 if (exit_prob > PROB_MIN) { | |
98 float exit_cnt = iff->_fcnt * exit_prob; | |
99 loop_exit_cnt += exit_cnt; | |
100 } | |
101 } | |
102 } | |
103 } | |
104 } | |
105 if (loop_exit_cnt > 0.0f) { | |
106 trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt; | |
107 } else { | |
108 // No exit count so use | |
109 trip_cnt = loop_back_cnt; | |
110 } | |
111 } | |
112 #ifndef PRODUCT | |
113 if (TraceProfileTripCount) { | |
114 tty->print_cr("compute_profile_trip_cnt lp: %d cnt: %f\n", head->_idx, trip_cnt); | |
115 } | |
116 #endif | |
117 head->set_profile_trip_cnt(trip_cnt); | |
118 } | |
119 | |
120 //---------------------is_invariant_addition----------------------------- | |
121 // Return nonzero index of invariant operand for an Add or Sub | |
605 | 122 // of (nonconstant) invariant and variant values. Helper for reassociate_invariants. |
0 | 123 int IdealLoopTree::is_invariant_addition(Node* n, PhaseIdealLoop *phase) { |
124 int op = n->Opcode(); | |
125 if (op == Op_AddI || op == Op_SubI) { | |
126 bool in1_invar = this->is_invariant(n->in(1)); | |
127 bool in2_invar = this->is_invariant(n->in(2)); | |
128 if (in1_invar && !in2_invar) return 1; | |
129 if (!in1_invar && in2_invar) return 2; | |
130 } | |
131 return 0; | |
132 } | |
133 | |
134 //---------------------reassociate_add_sub----------------------------- | |
135 // Reassociate invariant add and subtract expressions: | |
136 // | |
137 // inv1 + (x + inv2) => ( inv1 + inv2) + x | |
138 // (x + inv2) + inv1 => ( inv1 + inv2) + x | |
139 // inv1 + (x - inv2) => ( inv1 - inv2) + x | |
140 // inv1 - (inv2 - x) => ( inv1 - inv2) + x | |
141 // (x + inv2) - inv1 => (-inv1 + inv2) + x | |
142 // (x - inv2) + inv1 => ( inv1 - inv2) + x | |
143 // (x - inv2) - inv1 => (-inv1 - inv2) + x | |
144 // inv1 + (inv2 - x) => ( inv1 + inv2) - x | |
145 // inv1 - (x - inv2) => ( inv1 + inv2) - x | |
146 // (inv2 - x) + inv1 => ( inv1 + inv2) - x | |
147 // (inv2 - x) - inv1 => (-inv1 + inv2) - x | |
148 // inv1 - (x + inv2) => ( inv1 - inv2) - x | |
149 // | |
150 Node* IdealLoopTree::reassociate_add_sub(Node* n1, PhaseIdealLoop *phase) { | |
151 if (!n1->is_Add() && !n1->is_Sub() || n1->outcnt() == 0) return NULL; | |
152 if (is_invariant(n1)) return NULL; | |
153 int inv1_idx = is_invariant_addition(n1, phase); | |
154 if (!inv1_idx) return NULL; | |
155 // Don't mess with add of constant (igvn moves them to expression tree root.) | |
156 if (n1->is_Add() && n1->in(2)->is_Con()) return NULL; | |
157 Node* inv1 = n1->in(inv1_idx); | |
158 Node* n2 = n1->in(3 - inv1_idx); | |
159 int inv2_idx = is_invariant_addition(n2, phase); | |
160 if (!inv2_idx) return NULL; | |
161 Node* x = n2->in(3 - inv2_idx); | |
162 Node* inv2 = n2->in(inv2_idx); | |
163 | |
164 bool neg_x = n2->is_Sub() && inv2_idx == 1; | |
165 bool neg_inv2 = n2->is_Sub() && inv2_idx == 2; | |
166 bool neg_inv1 = n1->is_Sub() && inv1_idx == 2; | |
167 if (n1->is_Sub() && inv1_idx == 1) { | |
168 neg_x = !neg_x; | |
169 neg_inv2 = !neg_inv2; | |
170 } | |
171 Node* inv1_c = phase->get_ctrl(inv1); | |
172 Node* inv2_c = phase->get_ctrl(inv2); | |
173 Node* n_inv1; | |
174 if (neg_inv1) { | |
175 Node *zero = phase->_igvn.intcon(0); | |
176 phase->set_ctrl(zero, phase->C->root()); | |
177 n_inv1 = new (phase->C, 3) SubINode(zero, inv1); | |
178 phase->register_new_node(n_inv1, inv1_c); | |
179 } else { | |
180 n_inv1 = inv1; | |
181 } | |
182 Node* inv; | |
183 if (neg_inv2) { | |
184 inv = new (phase->C, 3) SubINode(n_inv1, inv2); | |
185 } else { | |
186 inv = new (phase->C, 3) AddINode(n_inv1, inv2); | |
187 } | |
188 phase->register_new_node(inv, phase->get_early_ctrl(inv)); | |
189 | |
190 Node* addx; | |
191 if (neg_x) { | |
192 addx = new (phase->C, 3) SubINode(inv, x); | |
193 } else { | |
194 addx = new (phase->C, 3) AddINode(x, inv); | |
195 } | |
196 phase->register_new_node(addx, phase->get_ctrl(x)); | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
197 phase->_igvn.replace_node(n1, addx); |
0 | 198 return addx; |
199 } | |
200 | |
201 //---------------------reassociate_invariants----------------------------- | |
202 // Reassociate invariant expressions: | |
203 void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) { | |
204 for (int i = _body.size() - 1; i >= 0; i--) { | |
205 Node *n = _body.at(i); | |
206 for (int j = 0; j < 5; j++) { | |
207 Node* nn = reassociate_add_sub(n, phase); | |
208 if (nn == NULL) break; | |
209 n = nn; // again | |
210 }; | |
211 } | |
212 } | |
213 | |
214 //------------------------------policy_peeling--------------------------------- | |
215 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can | |
216 // make some loop-invariant test (usually a null-check) happen before the loop. | |
217 bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const { | |
218 Node *test = ((IdealLoopTree*)this)->tail(); | |
219 int body_size = ((IdealLoopTree*)this)->_body.size(); | |
220 int uniq = phase->C->unique(); | |
221 // Peeling does loop cloning which can result in O(N^2) node construction | |
222 if( body_size > 255 /* Prevent overflow for large body_size */ | |
223 || (body_size * body_size + uniq > MaxNodeLimit) ) { | |
224 return false; // too large to safely clone | |
225 } | |
226 while( test != _head ) { // Scan till run off top of loop | |
227 if( test->is_If() ) { // Test? | |
228 Node *ctrl = phase->get_ctrl(test->in(1)); | |
229 if (ctrl->is_top()) | |
230 return false; // Found dead test on live IF? No peeling! | |
231 // Standard IF only has one input value to check for loop invariance | |
232 assert( test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added"); | |
233 // Condition is not a member of this loop? | |
234 if( !is_member(phase->get_loop(ctrl)) && | |
235 is_loop_exit(test) ) | |
236 return true; // Found reason to peel! | |
237 } | |
238 // Walk up dominators to loop _head looking for test which is | |
239 // executed on every path thru loop. | |
240 test = phase->idom(test); | |
241 } | |
242 return false; | |
243 } | |
244 | |
245 //------------------------------peeled_dom_test_elim--------------------------- | |
246 // If we got the effect of peeling, either by actually peeling or by making | |
247 // a pre-loop which must execute at least once, we can remove all | |
248 // loop-invariant dominated tests in the main body. | |
249 void PhaseIdealLoop::peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new ) { | |
250 bool progress = true; | |
251 while( progress ) { | |
252 progress = false; // Reset for next iteration | |
253 Node *prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail(); | |
254 Node *test = prev->in(0); | |
255 while( test != loop->_head ) { // Scan till run off top of loop | |
256 | |
257 int p_op = prev->Opcode(); | |
258 if( (p_op == Op_IfFalse || p_op == Op_IfTrue) && | |
259 test->is_If() && // Test? | |
260 !test->in(1)->is_Con() && // And not already obvious? | |
261 // Condition is not a member of this loop? | |
262 !loop->is_member(get_loop(get_ctrl(test->in(1))))){ | |
263 // Walk loop body looking for instances of this test | |
264 for( uint i = 0; i < loop->_body.size(); i++ ) { | |
265 Node *n = loop->_body.at(i); | |
266 if( n->is_If() && n->in(1) == test->in(1) /*&& n != loop->tail()->in(0)*/ ) { | |
267 // IfNode was dominated by version in peeled loop body | |
268 progress = true; | |
269 dominated_by( old_new[prev->_idx], n ); | |
270 } | |
271 } | |
272 } | |
273 prev = test; | |
274 test = idom(test); | |
275 } // End of scan tests in loop | |
276 | |
277 } // End of while( progress ) | |
278 } | |
279 | |
280 //------------------------------do_peeling------------------------------------- | |
281 // Peel the first iteration of the given loop. | |
282 // Step 1: Clone the loop body. The clone becomes the peeled iteration. | |
283 // The pre-loop illegally has 2 control users (old & new loops). | |
284 // Step 2: Make the old-loop fall-in edges point to the peeled iteration. | |
285 // Do this by making the old-loop fall-in edges act as if they came | |
286 // around the loopback from the prior iteration (follow the old-loop | |
287 // backedges) and then map to the new peeled iteration. This leaves | |
288 // the pre-loop with only 1 user (the new peeled iteration), but the | |
289 // peeled-loop backedge has 2 users. | |
290 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the | |
291 // extra backedge user. | |
292 void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) { | |
293 | |
294 C->set_major_progress(); | |
295 // Peeling a 'main' loop in a pre/main/post situation obfuscates the | |
296 // 'pre' loop from the main and the 'pre' can no longer have it's | |
297 // iterations adjusted. Therefore, we need to declare this loop as | |
298 // no longer a 'main' loop; it will need new pre and post loops before | |
299 // we can do further RCE. | |
300 Node *h = loop->_head; | |
301 if( h->is_CountedLoop() ) { | |
302 CountedLoopNode *cl = h->as_CountedLoop(); | |
303 assert(cl->trip_count() > 0, "peeling a fully unrolled loop"); | |
304 cl->set_trip_count(cl->trip_count() - 1); | |
305 if( cl->is_main_loop() ) { | |
306 cl->set_normal_loop(); | |
307 #ifndef PRODUCT | |
308 if( PrintOpto && VerifyLoopOptimizations ) { | |
309 tty->print("Peeling a 'main' loop; resetting to 'normal' "); | |
310 loop->dump_head(); | |
311 } | |
312 #endif | |
313 } | |
314 } | |
315 | |
316 // Step 1: Clone the loop body. The clone becomes the peeled iteration. | |
317 // The pre-loop illegally has 2 control users (old & new loops). | |
318 clone_loop( loop, old_new, dom_depth(loop->_head) ); | |
319 | |
320 | |
321 // Step 2: Make the old-loop fall-in edges point to the peeled iteration. | |
322 // Do this by making the old-loop fall-in edges act as if they came | |
323 // around the loopback from the prior iteration (follow the old-loop | |
324 // backedges) and then map to the new peeled iteration. This leaves | |
325 // the pre-loop with only 1 user (the new peeled iteration), but the | |
326 // peeled-loop backedge has 2 users. | |
327 for (DUIterator_Fast jmax, j = loop->_head->fast_outs(jmax); j < jmax; j++) { | |
328 Node* old = loop->_head->fast_out(j); | |
329 if( old->in(0) == loop->_head && old->req() == 3 && | |
330 (old->is_Loop() || old->is_Phi()) ) { | |
331 Node *new_exit_value = old_new[old->in(LoopNode::LoopBackControl)->_idx]; | |
332 if( !new_exit_value ) // Backedge value is ALSO loop invariant? | |
333 // Then loop body backedge value remains the same. | |
334 new_exit_value = old->in(LoopNode::LoopBackControl); | |
335 _igvn.hash_delete(old); | |
336 old->set_req(LoopNode::EntryControl, new_exit_value); | |
337 } | |
338 } | |
339 | |
340 | |
341 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the | |
342 // extra backedge user. | |
343 Node *nnn = old_new[loop->_head->_idx]; | |
344 _igvn.hash_delete(nnn); | |
345 nnn->set_req(LoopNode::LoopBackControl, C->top()); | |
346 for (DUIterator_Fast j2max, j2 = nnn->fast_outs(j2max); j2 < j2max; j2++) { | |
347 Node* use = nnn->fast_out(j2); | |
348 if( use->in(0) == nnn && use->req() == 3 && use->is_Phi() ) { | |
349 _igvn.hash_delete(use); | |
350 use->set_req(LoopNode::LoopBackControl, C->top()); | |
351 } | |
352 } | |
353 | |
354 | |
355 // Step 4: Correct dom-depth info. Set to loop-head depth. | |
356 int dd = dom_depth(loop->_head); | |
357 set_idom(loop->_head, loop->_head->in(1), dd); | |
358 for (uint j3 = 0; j3 < loop->_body.size(); j3++) { | |
359 Node *old = loop->_body.at(j3); | |
360 Node *nnn = old_new[old->_idx]; | |
361 if (!has_ctrl(nnn)) | |
362 set_idom(nnn, idom(nnn), dd-1); | |
363 // While we're at it, remove any SafePoints from the peeled code | |
364 if( old->Opcode() == Op_SafePoint ) { | |
365 Node *nnn = old_new[old->_idx]; | |
366 lazy_replace(nnn,nnn->in(TypeFunc::Control)); | |
367 } | |
368 } | |
369 | |
370 // Now force out all loop-invariant dominating tests. The optimizer | |
371 // finds some, but we _know_ they are all useless. | |
372 peeled_dom_test_elim(loop,old_new); | |
373 | |
374 loop->record_for_igvn(); | |
375 } | |
376 | |
377 //------------------------------policy_maximally_unroll------------------------ | |
378 // Return exact loop trip count, or 0 if not maximally unrolling | |
379 bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const { | |
380 CountedLoopNode *cl = _head->as_CountedLoop(); | |
381 assert( cl->is_normal_loop(), "" ); | |
382 | |
383 Node *init_n = cl->init_trip(); | |
384 Node *limit_n = cl->limit(); | |
385 | |
386 // Non-constant bounds | |
387 if( init_n == NULL || !init_n->is_Con() || | |
388 limit_n == NULL || !limit_n->is_Con() || | |
389 // protect against stride not being a constant | |
390 !cl->stride_is_con() ) { | |
391 return false; | |
392 } | |
393 int init = init_n->get_int(); | |
394 int limit = limit_n->get_int(); | |
395 int span = limit - init; | |
396 int stride = cl->stride_con(); | |
397 | |
398 if (init >= limit || stride > span) { | |
399 // return a false (no maximally unroll) and the regular unroll/peel | |
400 // route will make a small mess which CCP will fold away. | |
401 return false; | |
402 } | |
403 uint trip_count = span/stride; // trip_count can be greater than 2 Gig. | |
404 assert( (int)trip_count*stride == span, "must divide evenly" ); | |
405 | |
406 // Real policy: if we maximally unroll, does it get too big? | |
407 // Allow the unrolled mess to get larger than standard loop | |
408 // size. After all, it will no longer be a loop. | |
409 uint body_size = _body.size(); | |
410 uint unroll_limit = (uint)LoopUnrollLimit * 4; | |
411 assert( (intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits"); | |
412 cl->set_trip_count(trip_count); | |
413 if( trip_count <= unroll_limit && body_size <= unroll_limit ) { | |
414 uint new_body_size = body_size * trip_count; | |
415 if (new_body_size <= unroll_limit && | |
416 body_size == new_body_size / trip_count && | |
417 // Unrolling can result in a large amount of node construction | |
418 new_body_size < MaxNodeLimit - phase->C->unique()) { | |
419 return true; // maximally unroll | |
420 } | |
421 } | |
422 | |
423 return false; // Do not maximally unroll | |
424 } | |
425 | |
426 | |
427 //------------------------------policy_unroll---------------------------------- | |
428 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if | |
429 // the loop is a CountedLoop and the body is small enough. | |
430 bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const { | |
431 | |
432 CountedLoopNode *cl = _head->as_CountedLoop(); | |
433 assert( cl->is_normal_loop() || cl->is_main_loop(), "" ); | |
434 | |
435 // protect against stride not being a constant | |
436 if( !cl->stride_is_con() ) return false; | |
437 | |
438 // protect against over-unrolling | |
439 if( cl->trip_count() <= 1 ) return false; | |
440 | |
441 int future_unroll_ct = cl->unrolled_count() * 2; | |
442 | |
443 // Don't unroll if the next round of unrolling would push us | |
444 // over the expected trip count of the loop. One is subtracted | |
445 // from the expected trip count because the pre-loop normally | |
446 // executes 1 iteration. | |
447 if (UnrollLimitForProfileCheck > 0 && | |
448 cl->profile_trip_cnt() != COUNT_UNKNOWN && | |
449 future_unroll_ct > UnrollLimitForProfileCheck && | |
450 (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) { | |
451 return false; | |
452 } | |
453 | |
454 // When unroll count is greater than LoopUnrollMin, don't unroll if: | |
455 // the residual iterations are more than 10% of the trip count | |
456 // and rounds of "unroll,optimize" are not making significant progress | |
457 // Progress defined as current size less than 20% larger than previous size. | |
458 if (UseSuperWord && cl->node_count_before_unroll() > 0 && | |
459 future_unroll_ct > LoopUnrollMin && | |
460 (future_unroll_ct - 1) * 10.0 > cl->profile_trip_cnt() && | |
461 1.2 * cl->node_count_before_unroll() < (double)_body.size()) { | |
462 return false; | |
463 } | |
464 | |
465 Node *init_n = cl->init_trip(); | |
466 Node *limit_n = cl->limit(); | |
467 // Non-constant bounds. | |
468 // Protect against over-unrolling when init or/and limit are not constant | |
469 // (so that trip_count's init value is maxint) but iv range is known. | |
470 if( init_n == NULL || !init_n->is_Con() || | |
471 limit_n == NULL || !limit_n->is_Con() ) { | |
472 Node* phi = cl->phi(); | |
473 if( phi != NULL ) { | |
474 assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi."); | |
475 const TypeInt* iv_type = phase->_igvn.type(phi)->is_int(); | |
476 int next_stride = cl->stride_con() * 2; // stride after this unroll | |
477 if( next_stride > 0 ) { | |
478 if( iv_type->_lo + next_stride <= iv_type->_lo || // overflow | |
479 iv_type->_lo + next_stride > iv_type->_hi ) { | |
480 return false; // over-unrolling | |
481 } | |
482 } else if( next_stride < 0 ) { | |
483 if( iv_type->_hi + next_stride >= iv_type->_hi || // overflow | |
484 iv_type->_hi + next_stride < iv_type->_lo ) { | |
485 return false; // over-unrolling | |
486 } | |
487 } | |
488 } | |
489 } | |
490 | |
491 // Adjust body_size to determine if we unroll or not | |
492 uint body_size = _body.size(); | |
493 // Key test to unroll CaffeineMark's Logic test | |
494 int xors_in_loop = 0; | |
495 // Also count ModL, DivL and MulL which expand mightly | |
496 for( uint k = 0; k < _body.size(); k++ ) { | |
497 switch( _body.at(k)->Opcode() ) { | |
498 case Op_XorI: xors_in_loop++; break; // CaffeineMark's Logic test | |
499 case Op_ModL: body_size += 30; break; | |
500 case Op_DivL: body_size += 30; break; | |
501 case Op_MulL: body_size += 10; break; | |
502 } | |
503 } | |
504 | |
505 // Check for being too big | |
506 if( body_size > (uint)LoopUnrollLimit ) { | |
507 if( xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; | |
508 // Normal case: loop too big | |
509 return false; | |
510 } | |
511 | |
512 // Check for stride being a small enough constant | |
513 if( abs(cl->stride_con()) > (1<<3) ) return false; | |
514 | |
515 // Unroll once! (Each trip will soon do double iterations) | |
516 return true; | |
517 } | |
518 | |
519 //------------------------------policy_align----------------------------------- | |
520 // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the | |
521 // expression that does the alignment. Note that only one array base can be | |
605 | 522 // aligned in a loop (unless the VM guarantees mutual alignment). Note that |
0 | 523 // if we vectorize short memory ops into longer memory ops, we may want to |
524 // increase alignment. | |
525 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const { | |
526 return false; | |
527 } | |
528 | |
529 //------------------------------policy_range_check----------------------------- | |
530 // Return TRUE or FALSE if the loop should be range-check-eliminated. | |
531 // Actually we do iteration-splitting, a more powerful form of RCE. | |
532 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const { | |
533 if( !RangeCheckElimination ) return false; | |
534 | |
535 CountedLoopNode *cl = _head->as_CountedLoop(); | |
536 // If we unrolled with no intention of doing RCE and we later | |
537 // changed our minds, we got no pre-loop. Either we need to | |
538 // make a new pre-loop, or we gotta disallow RCE. | |
539 if( cl->is_main_no_pre_loop() ) return false; // Disallowed for now. | |
540 Node *trip_counter = cl->phi(); | |
541 | |
542 // Check loop body for tests of trip-counter plus loop-invariant vs | |
543 // loop-invariant. | |
544 for( uint i = 0; i < _body.size(); i++ ) { | |
545 Node *iff = _body[i]; | |
546 if( iff->Opcode() == Op_If ) { // Test? | |
547 | |
548 // Comparing trip+off vs limit | |
549 Node *bol = iff->in(1); | |
550 if( bol->req() != 2 ) continue; // dead constant test | |
1172 | 551 if (!bol->is_Bool()) { |
552 assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only"); | |
553 continue; | |
554 } | |
0 | 555 Node *cmp = bol->in(1); |
556 | |
557 Node *rc_exp = cmp->in(1); | |
558 Node *limit = cmp->in(2); | |
559 | |
560 Node *limit_c = phase->get_ctrl(limit); | |
561 if( limit_c == phase->C->top() ) | |
562 return false; // Found dead test on live IF? No RCE! | |
563 if( is_member(phase->get_loop(limit_c) ) ) { | |
564 // Compare might have operands swapped; commute them | |
565 rc_exp = cmp->in(2); | |
566 limit = cmp->in(1); | |
567 limit_c = phase->get_ctrl(limit); | |
568 if( is_member(phase->get_loop(limit_c) ) ) | |
569 continue; // Both inputs are loop varying; cannot RCE | |
570 } | |
571 | |
572 if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) { | |
573 continue; | |
574 } | |
575 // Yeah! Found a test like 'trip+off vs limit' | |
576 // Test is an IfNode, has 2 projections. If BOTH are in the loop | |
577 // we need loop unswitching instead of iteration splitting. | |
578 if( is_loop_exit(iff) ) | |
579 return true; // Found reason to split iterations | |
580 } // End of is IF | |
581 } | |
582 | |
583 return false; | |
584 } | |
585 | |
586 //------------------------------policy_peel_only------------------------------- | |
587 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful | |
588 // for unrolling loops with NO array accesses. | |
589 bool IdealLoopTree::policy_peel_only( PhaseIdealLoop *phase ) const { | |
590 | |
591 for( uint i = 0; i < _body.size(); i++ ) | |
592 if( _body[i]->is_Mem() ) | |
593 return false; | |
594 | |
595 // No memory accesses at all! | |
596 return true; | |
597 } | |
598 | |
599 //------------------------------clone_up_backedge_goo-------------------------- | |
600 // If Node n lives in the back_ctrl block and cannot float, we clone a private | |
601 // version of n in preheader_ctrl block and return that, otherwise return n. | |
602 Node *PhaseIdealLoop::clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n ) { | |
603 if( get_ctrl(n) != back_ctrl ) return n; | |
604 | |
605 Node *x = NULL; // If required, a clone of 'n' | |
606 // Check for 'n' being pinned in the backedge. | |
607 if( n->in(0) && n->in(0) == back_ctrl ) { | |
608 x = n->clone(); // Clone a copy of 'n' to preheader | |
609 x->set_req( 0, preheader_ctrl ); // Fix x's control input to preheader | |
610 } | |
611 | |
612 // Recursive fixup any other input edges into x. | |
613 // If there are no changes we can just return 'n', otherwise | |
614 // we need to clone a private copy and change it. | |
615 for( uint i = 1; i < n->req(); i++ ) { | |
616 Node *g = clone_up_backedge_goo( back_ctrl, preheader_ctrl, n->in(i) ); | |
617 if( g != n->in(i) ) { | |
618 if( !x ) | |
619 x = n->clone(); | |
620 x->set_req(i, g); | |
621 } | |
622 } | |
623 if( x ) { // x can legally float to pre-header location | |
624 register_new_node( x, preheader_ctrl ); | |
625 return x; | |
626 } else { // raise n to cover LCA of uses | |
627 set_ctrl( n, find_non_split_ctrl(back_ctrl->in(0)) ); | |
628 } | |
629 return n; | |
630 } | |
631 | |
632 //------------------------------insert_pre_post_loops-------------------------- | |
633 // Insert pre and post loops. If peel_only is set, the pre-loop can not have | |
634 // more iterations added. It acts as a 'peel' only, no lower-bound RCE, no | |
635 // alignment. Useful to unroll loops that do no array accesses. | |
636 void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) { | |
637 | |
638 C->set_major_progress(); | |
639 | |
640 // Find common pieces of the loop being guarded with pre & post loops | |
641 CountedLoopNode *main_head = loop->_head->as_CountedLoop(); | |
642 assert( main_head->is_normal_loop(), "" ); | |
643 CountedLoopEndNode *main_end = main_head->loopexit(); | |
644 assert( main_end->outcnt() == 2, "1 true, 1 false path only" ); | |
645 uint dd_main_head = dom_depth(main_head); | |
646 uint max = main_head->outcnt(); | |
647 | |
648 Node *pre_header= main_head->in(LoopNode::EntryControl); | |
649 Node *init = main_head->init_trip(); | |
650 Node *incr = main_end ->incr(); | |
651 Node *limit = main_end ->limit(); | |
652 Node *stride = main_end ->stride(); | |
653 Node *cmp = main_end ->cmp_node(); | |
654 BoolTest::mask b_test = main_end->test_trip(); | |
655 | |
656 // Need only 1 user of 'bol' because I will be hacking the loop bounds. | |
657 Node *bol = main_end->in(CountedLoopEndNode::TestValue); | |
658 if( bol->outcnt() != 1 ) { | |
659 bol = bol->clone(); | |
660 register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl)); | |
661 _igvn.hash_delete(main_end); | |
662 main_end->set_req(CountedLoopEndNode::TestValue, bol); | |
663 } | |
664 // Need only 1 user of 'cmp' because I will be hacking the loop bounds. | |
665 if( cmp->outcnt() != 1 ) { | |
666 cmp = cmp->clone(); | |
667 register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl)); | |
668 _igvn.hash_delete(bol); | |
669 bol->set_req(1, cmp); | |
670 } | |
671 | |
672 //------------------------------ | |
673 // Step A: Create Post-Loop. | |
674 Node* main_exit = main_end->proj_out(false); | |
675 assert( main_exit->Opcode() == Op_IfFalse, "" ); | |
676 int dd_main_exit = dom_depth(main_exit); | |
677 | |
678 // Step A1: Clone the loop body. The clone becomes the post-loop. The main | |
679 // loop pre-header illegally has 2 control users (old & new loops). | |
680 clone_loop( loop, old_new, dd_main_exit ); | |
681 assert( old_new[main_end ->_idx]->Opcode() == Op_CountedLoopEnd, "" ); | |
682 CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop(); | |
683 post_head->set_post_loop(main_head); | |
684 | |
400
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
685 // Reduce the post-loop trip count. |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
686 CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd(); |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
687 post_end->_prob = PROB_FAIR; |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
688 |
0 | 689 // Build the main-loop normal exit. |
690 IfFalseNode *new_main_exit = new (C, 1) IfFalseNode(main_end); | |
691 _igvn.register_new_node_with_optimizer( new_main_exit ); | |
692 set_idom(new_main_exit, main_end, dd_main_exit ); | |
693 set_loop(new_main_exit, loop->_parent); | |
694 | |
695 // Step A2: Build a zero-trip guard for the post-loop. After leaving the | |
696 // main-loop, the post-loop may not execute at all. We 'opaque' the incr | |
697 // (the main-loop trip-counter exit value) because we will be changing | |
698 // the exit value (via unrolling) so we cannot constant-fold away the zero | |
699 // trip guard until all unrolling is done. | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
113
diff
changeset
|
700 Node *zer_opaq = new (C, 2) Opaque1Node(C, incr); |
0 | 701 Node *zer_cmp = new (C, 3) CmpINode( zer_opaq, limit ); |
702 Node *zer_bol = new (C, 2) BoolNode( zer_cmp, b_test ); | |
703 register_new_node( zer_opaq, new_main_exit ); | |
704 register_new_node( zer_cmp , new_main_exit ); | |
705 register_new_node( zer_bol , new_main_exit ); | |
706 | |
707 // Build the IfNode | |
708 IfNode *zer_iff = new (C, 2) IfNode( new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN ); | |
709 _igvn.register_new_node_with_optimizer( zer_iff ); | |
710 set_idom(zer_iff, new_main_exit, dd_main_exit); | |
711 set_loop(zer_iff, loop->_parent); | |
712 | |
713 // Plug in the false-path, taken if we need to skip post-loop | |
714 _igvn.hash_delete( main_exit ); | |
715 main_exit->set_req(0, zer_iff); | |
716 _igvn._worklist.push(main_exit); | |
717 set_idom(main_exit, zer_iff, dd_main_exit); | |
718 set_idom(main_exit->unique_out(), zer_iff, dd_main_exit); | |
719 // Make the true-path, must enter the post loop | |
720 Node *zer_taken = new (C, 1) IfTrueNode( zer_iff ); | |
721 _igvn.register_new_node_with_optimizer( zer_taken ); | |
722 set_idom(zer_taken, zer_iff, dd_main_exit); | |
723 set_loop(zer_taken, loop->_parent); | |
724 // Plug in the true path | |
725 _igvn.hash_delete( post_head ); | |
726 post_head->set_req(LoopNode::EntryControl, zer_taken); | |
727 set_idom(post_head, zer_taken, dd_main_exit); | |
728 | |
729 // Step A3: Make the fall-in values to the post-loop come from the | |
730 // fall-out values of the main-loop. | |
731 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) { | |
732 Node* main_phi = main_head->fast_out(i); | |
733 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) { | |
734 Node *post_phi = old_new[main_phi->_idx]; | |
735 Node *fallmain = clone_up_backedge_goo(main_head->back_control(), | |
736 post_head->init_control(), | |
737 main_phi->in(LoopNode::LoopBackControl)); | |
738 _igvn.hash_delete(post_phi); | |
739 post_phi->set_req( LoopNode::EntryControl, fallmain ); | |
740 } | |
741 } | |
742 | |
743 // Update local caches for next stanza | |
744 main_exit = new_main_exit; | |
745 | |
746 | |
747 //------------------------------ | |
748 // Step B: Create Pre-Loop. | |
749 | |
750 // Step B1: Clone the loop body. The clone becomes the pre-loop. The main | |
751 // loop pre-header illegally has 2 control users (old & new loops). | |
752 clone_loop( loop, old_new, dd_main_head ); | |
753 CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop(); | |
754 CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd(); | |
755 pre_head->set_pre_loop(main_head); | |
756 Node *pre_incr = old_new[incr->_idx]; | |
757 | |
400
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
758 // Reduce the pre-loop trip count. |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
759 pre_end->_prob = PROB_FAIR; |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
760 |
0 | 761 // Find the pre-loop normal exit. |
762 Node* pre_exit = pre_end->proj_out(false); | |
763 assert( pre_exit->Opcode() == Op_IfFalse, "" ); | |
764 IfFalseNode *new_pre_exit = new (C, 1) IfFalseNode(pre_end); | |
765 _igvn.register_new_node_with_optimizer( new_pre_exit ); | |
766 set_idom(new_pre_exit, pre_end, dd_main_head); | |
767 set_loop(new_pre_exit, loop->_parent); | |
768 | |
769 // Step B2: Build a zero-trip guard for the main-loop. After leaving the | |
770 // pre-loop, the main-loop may not execute at all. Later in life this | |
771 // zero-trip guard will become the minimum-trip guard when we unroll | |
772 // the main-loop. | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
113
diff
changeset
|
773 Node *min_opaq = new (C, 2) Opaque1Node(C, limit); |
0 | 774 Node *min_cmp = new (C, 3) CmpINode( pre_incr, min_opaq ); |
775 Node *min_bol = new (C, 2) BoolNode( min_cmp, b_test ); | |
776 register_new_node( min_opaq, new_pre_exit ); | |
777 register_new_node( min_cmp , new_pre_exit ); | |
778 register_new_node( min_bol , new_pre_exit ); | |
779 | |
400
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
780 // Build the IfNode (assume the main-loop is executed always). |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
781 IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN ); |
0 | 782 _igvn.register_new_node_with_optimizer( min_iff ); |
783 set_idom(min_iff, new_pre_exit, dd_main_head); | |
784 set_loop(min_iff, loop->_parent); | |
785 | |
786 // Plug in the false-path, taken if we need to skip main-loop | |
787 _igvn.hash_delete( pre_exit ); | |
788 pre_exit->set_req(0, min_iff); | |
789 set_idom(pre_exit, min_iff, dd_main_head); | |
790 set_idom(pre_exit->unique_out(), min_iff, dd_main_head); | |
791 // Make the true-path, must enter the main loop | |
792 Node *min_taken = new (C, 1) IfTrueNode( min_iff ); | |
793 _igvn.register_new_node_with_optimizer( min_taken ); | |
794 set_idom(min_taken, min_iff, dd_main_head); | |
795 set_loop(min_taken, loop->_parent); | |
796 // Plug in the true path | |
797 _igvn.hash_delete( main_head ); | |
798 main_head->set_req(LoopNode::EntryControl, min_taken); | |
799 set_idom(main_head, min_taken, dd_main_head); | |
800 | |
801 // Step B3: Make the fall-in values to the main-loop come from the | |
802 // fall-out values of the pre-loop. | |
803 for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) { | |
804 Node* main_phi = main_head->fast_out(i2); | |
805 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) { | |
806 Node *pre_phi = old_new[main_phi->_idx]; | |
807 Node *fallpre = clone_up_backedge_goo(pre_head->back_control(), | |
808 main_head->init_control(), | |
809 pre_phi->in(LoopNode::LoopBackControl)); | |
810 _igvn.hash_delete(main_phi); | |
811 main_phi->set_req( LoopNode::EntryControl, fallpre ); | |
812 } | |
813 } | |
814 | |
815 // Step B4: Shorten the pre-loop to run only 1 iteration (for now). | |
816 // RCE and alignment may change this later. | |
817 Node *cmp_end = pre_end->cmp_node(); | |
818 assert( cmp_end->in(2) == limit, "" ); | |
819 Node *pre_limit = new (C, 3) AddINode( init, stride ); | |
820 | |
821 // Save the original loop limit in this Opaque1 node for | |
822 // use by range check elimination. | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
113
diff
changeset
|
823 Node *pre_opaq = new (C, 3) Opaque1Node(C, pre_limit, limit); |
0 | 824 |
825 register_new_node( pre_limit, pre_head->in(0) ); | |
826 register_new_node( pre_opaq , pre_head->in(0) ); | |
827 | |
828 // Since no other users of pre-loop compare, I can hack limit directly | |
829 assert( cmp_end->outcnt() == 1, "no other users" ); | |
830 _igvn.hash_delete(cmp_end); | |
831 cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq); | |
832 | |
833 // Special case for not-equal loop bounds: | |
834 // Change pre loop test, main loop test, and the | |
835 // main loop guard test to use lt or gt depending on stride | |
836 // direction: | |
837 // positive stride use < | |
838 // negative stride use > | |
839 | |
840 if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) { | |
841 | |
842 BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt; | |
843 // Modify pre loop end condition | |
844 Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool(); | |
845 BoolNode* new_bol0 = new (C, 2) BoolNode(pre_bol->in(1), new_test); | |
846 register_new_node( new_bol0, pre_head->in(0) ); | |
847 _igvn.hash_delete(pre_end); | |
848 pre_end->set_req(CountedLoopEndNode::TestValue, new_bol0); | |
849 // Modify main loop guard condition | |
850 assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay"); | |
851 BoolNode* new_bol1 = new (C, 2) BoolNode(min_bol->in(1), new_test); | |
852 register_new_node( new_bol1, new_pre_exit ); | |
853 _igvn.hash_delete(min_iff); | |
854 min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1); | |
855 // Modify main loop end condition | |
856 BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool(); | |
857 BoolNode* new_bol2 = new (C, 2) BoolNode(main_bol->in(1), new_test); | |
858 register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) ); | |
859 _igvn.hash_delete(main_end); | |
860 main_end->set_req(CountedLoopEndNode::TestValue, new_bol2); | |
861 } | |
862 | |
863 // Flag main loop | |
864 main_head->set_main_loop(); | |
865 if( peel_only ) main_head->set_main_no_pre_loop(); | |
866 | |
867 // It's difficult to be precise about the trip-counts | |
868 // for the pre/post loops. They are usually very short, | |
869 // so guess that 4 trips is a reasonable value. | |
870 post_head->set_profile_trip_cnt(4.0); | |
871 pre_head->set_profile_trip_cnt(4.0); | |
872 | |
873 // Now force out all loop-invariant dominating tests. The optimizer | |
874 // finds some, but we _know_ they are all useless. | |
875 peeled_dom_test_elim(loop,old_new); | |
876 } | |
877 | |
878 //------------------------------is_invariant----------------------------- | |
879 // Return true if n is invariant | |
880 bool IdealLoopTree::is_invariant(Node* n) const { | |
1172 | 881 Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; |
0 | 882 if (n_c->is_top()) return false; |
883 return !is_member(_phase->get_loop(n_c)); | |
884 } | |
885 | |
886 | |
887 //------------------------------do_unroll-------------------------------------- | |
888 // Unroll the loop body one step - make each trip do 2 iterations. | |
889 void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) { | |
890 assert( LoopUnrollLimit, "" ); | |
891 #ifndef PRODUCT | |
892 if( PrintOpto && VerifyLoopOptimizations ) { | |
893 tty->print("Unrolling "); | |
894 loop->dump_head(); | |
895 } | |
896 #endif | |
897 CountedLoopNode *loop_head = loop->_head->as_CountedLoop(); | |
898 CountedLoopEndNode *loop_end = loop_head->loopexit(); | |
899 assert( loop_end, "" ); | |
900 | |
901 // Remember loop node count before unrolling to detect | |
902 // if rounds of unroll,optimize are making progress | |
903 loop_head->set_node_count_before_unroll(loop->_body.size()); | |
904 | |
905 Node *ctrl = loop_head->in(LoopNode::EntryControl); | |
906 Node *limit = loop_head->limit(); | |
907 Node *init = loop_head->init_trip(); | |
908 Node *strid = loop_head->stride(); | |
909 | |
910 Node *opaq = NULL; | |
911 if( adjust_min_trip ) { // If not maximally unrolling, need adjustment | |
912 assert( loop_head->is_main_loop(), "" ); | |
913 assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" ); | |
914 Node *iff = ctrl->in(0); | |
915 assert( iff->Opcode() == Op_If, "" ); | |
916 Node *bol = iff->in(1); | |
917 assert( bol->Opcode() == Op_Bool, "" ); | |
918 Node *cmp = bol->in(1); | |
919 assert( cmp->Opcode() == Op_CmpI, "" ); | |
920 opaq = cmp->in(2); | |
921 // Occasionally it's possible for a pre-loop Opaque1 node to be | |
922 // optimized away and then another round of loop opts attempted. | |
923 // We can not optimize this particular loop in that case. | |
924 if( opaq->Opcode() != Op_Opaque1 ) | |
925 return; // Cannot find pre-loop! Bail out! | |
926 } | |
927 | |
928 C->set_major_progress(); | |
929 | |
930 // Adjust max trip count. The trip count is intentionally rounded | |
931 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll, | |
932 // the main, unrolled, part of the loop will never execute as it is protected | |
933 // by the min-trip test. See bug 4834191 for a case where we over-unrolled | |
934 // and later determined that part of the unrolled loop was dead. | |
935 loop_head->set_trip_count(loop_head->trip_count() / 2); | |
936 | |
937 // Double the count of original iterations in the unrolled loop body. | |
938 loop_head->double_unrolled_count(); | |
939 | |
940 // ----------- | |
941 // Step 2: Cut back the trip counter for an unroll amount of 2. | |
942 // Loop will normally trip (limit - init)/stride_con. Since it's a | |
943 // CountedLoop this is exact (stride divides limit-init exactly). | |
944 // We are going to double the loop body, so we want to knock off any | |
945 // odd iteration: (trip_cnt & ~1). Then back compute a new limit. | |
946 Node *span = new (C, 3) SubINode( limit, init ); | |
947 register_new_node( span, ctrl ); | |
948 Node *trip = new (C, 3) DivINode( 0, span, strid ); | |
949 register_new_node( trip, ctrl ); | |
950 Node *mtwo = _igvn.intcon(-2); | |
951 set_ctrl(mtwo, C->root()); | |
952 Node *rond = new (C, 3) AndINode( trip, mtwo ); | |
953 register_new_node( rond, ctrl ); | |
954 Node *spn2 = new (C, 3) MulINode( rond, strid ); | |
955 register_new_node( spn2, ctrl ); | |
956 Node *lim2 = new (C, 3) AddINode( spn2, init ); | |
957 register_new_node( lim2, ctrl ); | |
958 | |
959 // Hammer in the new limit | |
960 Node *ctrl2 = loop_end->in(0); | |
961 Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), lim2 ); | |
962 register_new_node( cmp2, ctrl2 ); | |
963 Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() ); | |
964 register_new_node( bol2, ctrl2 ); | |
965 _igvn.hash_delete(loop_end); | |
966 loop_end->set_req(CountedLoopEndNode::TestValue, bol2); | |
967 | |
968 // Step 3: Find the min-trip test guaranteed before a 'main' loop. | |
969 // Make it a 1-trip test (means at least 2 trips). | |
970 if( adjust_min_trip ) { | |
971 // Guard test uses an 'opaque' node which is not shared. Hence I | |
972 // can edit it's inputs directly. Hammer in the new limit for the | |
973 // minimum-trip guard. | |
974 assert( opaq->outcnt() == 1, "" ); | |
975 _igvn.hash_delete(opaq); | |
976 opaq->set_req(1, lim2); | |
977 } | |
978 | |
979 // --------- | |
980 // Step 4: Clone the loop body. Move it inside the loop. This loop body | |
981 // represents the odd iterations; since the loop trips an even number of | |
982 // times its backedge is never taken. Kill the backedge. | |
983 uint dd = dom_depth(loop_head); | |
984 clone_loop( loop, old_new, dd ); | |
985 | |
986 // Make backedges of the clone equal to backedges of the original. | |
987 // Make the fall-in from the original come from the fall-out of the clone. | |
988 for (DUIterator_Fast jmax, j = loop_head->fast_outs(jmax); j < jmax; j++) { | |
989 Node* phi = loop_head->fast_out(j); | |
990 if( phi->is_Phi() && phi->in(0) == loop_head && phi->outcnt() > 0 ) { | |
991 Node *newphi = old_new[phi->_idx]; | |
992 _igvn.hash_delete( phi ); | |
993 _igvn.hash_delete( newphi ); | |
994 | |
995 phi ->set_req(LoopNode:: EntryControl, newphi->in(LoopNode::LoopBackControl)); | |
996 newphi->set_req(LoopNode::LoopBackControl, phi ->in(LoopNode::LoopBackControl)); | |
997 phi ->set_req(LoopNode::LoopBackControl, C->top()); | |
998 } | |
999 } | |
1000 Node *clone_head = old_new[loop_head->_idx]; | |
1001 _igvn.hash_delete( clone_head ); | |
1002 loop_head ->set_req(LoopNode:: EntryControl, clone_head->in(LoopNode::LoopBackControl)); | |
1003 clone_head->set_req(LoopNode::LoopBackControl, loop_head ->in(LoopNode::LoopBackControl)); | |
1004 loop_head ->set_req(LoopNode::LoopBackControl, C->top()); | |
1005 loop->_head = clone_head; // New loop header | |
1006 | |
1007 set_idom(loop_head, loop_head ->in(LoopNode::EntryControl), dd); | |
1008 set_idom(clone_head, clone_head->in(LoopNode::EntryControl), dd); | |
1009 | |
1010 // Kill the clone's backedge | |
1011 Node *newcle = old_new[loop_end->_idx]; | |
1012 _igvn.hash_delete( newcle ); | |
1013 Node *one = _igvn.intcon(1); | |
1014 set_ctrl(one, C->root()); | |
1015 newcle->set_req(1, one); | |
1016 // Force clone into same loop body | |
1017 uint max = loop->_body.size(); | |
1018 for( uint k = 0; k < max; k++ ) { | |
1019 Node *old = loop->_body.at(k); | |
1020 Node *nnn = old_new[old->_idx]; | |
1021 loop->_body.push(nnn); | |
1022 if (!has_ctrl(old)) | |
1023 set_loop(nnn, loop); | |
1024 } | |
367
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
235
diff
changeset
|
1025 |
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
235
diff
changeset
|
1026 loop->record_for_igvn(); |
0 | 1027 } |
1028 | |
1029 //------------------------------do_maximally_unroll---------------------------- | |
1030 | |
1031 void PhaseIdealLoop::do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ) { | |
1032 CountedLoopNode *cl = loop->_head->as_CountedLoop(); | |
1033 assert( cl->trip_count() > 0, ""); | |
1034 | |
1035 // If loop is tripping an odd number of times, peel odd iteration | |
1036 if( (cl->trip_count() & 1) == 1 ) { | |
1037 do_peeling( loop, old_new ); | |
1038 } | |
1039 | |
1040 // Now its tripping an even number of times remaining. Double loop body. | |
1041 // Do not adjust pre-guards; they are not needed and do not exist. | |
1042 if( cl->trip_count() > 0 ) { | |
1043 do_unroll( loop, old_new, false ); | |
1044 } | |
1045 } | |
1046 | |
1047 //------------------------------dominates_backedge--------------------------------- | |
1048 // Returns true if ctrl is executed on every complete iteration | |
1049 bool IdealLoopTree::dominates_backedge(Node* ctrl) { | |
1050 assert(ctrl->is_CFG(), "must be control"); | |
1051 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl); | |
1052 return _phase->dom_lca_internal(ctrl, backedge) == ctrl; | |
1053 } | |
1054 | |
1055 //------------------------------add_constraint--------------------------------- | |
1056 // Constrain the main loop iterations so the condition: | |
1057 // scale_con * I + offset < limit | |
1058 // always holds true. That is, either increase the number of iterations in | |
1059 // the pre-loop or the post-loop until the condition holds true in the main | |
1060 // loop. Stride, scale, offset and limit are all loop invariant. Further, | |
1061 // stride and scale are constants (offset and limit often are). | |
1062 void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) { | |
1063 | |
1064 // Compute "I :: (limit-offset)/scale_con" | |
1065 Node *con = new (C, 3) SubINode( limit, offset ); | |
1066 register_new_node( con, pre_ctrl ); | |
1067 Node *scale = _igvn.intcon(scale_con); | |
1068 set_ctrl(scale, C->root()); | |
1069 Node *X = new (C, 3) DivINode( 0, con, scale ); | |
1070 register_new_node( X, pre_ctrl ); | |
1071 | |
1072 // For positive stride, the pre-loop limit always uses a MAX function | |
1073 // and the main loop a MIN function. For negative stride these are | |
1074 // reversed. | |
1075 | |
1076 // Also for positive stride*scale the affine function is increasing, so the | |
1077 // pre-loop must check for underflow and the post-loop for overflow. | |
1078 // Negative stride*scale reverses this; pre-loop checks for overflow and | |
1079 // post-loop for underflow. | |
1080 if( stride_con*scale_con > 0 ) { | |
1081 // Compute I < (limit-offset)/scale_con | |
1082 // Adjust main-loop last iteration to be MIN/MAX(main_loop,X) | |
1083 *main_limit = (stride_con > 0) | |
1084 ? (Node*)(new (C, 3) MinINode( *main_limit, X )) | |
1085 : (Node*)(new (C, 3) MaxINode( *main_limit, X )); | |
1086 register_new_node( *main_limit, pre_ctrl ); | |
1087 | |
1088 } else { | |
1089 // Compute (limit-offset)/scale_con + SGN(-scale_con) <= I | |
1090 // Add the negation of the main-loop constraint to the pre-loop. | |
1091 // See footnote [++] below for a derivation of the limit expression. | |
1092 Node *incr = _igvn.intcon(scale_con > 0 ? -1 : 1); | |
1093 set_ctrl(incr, C->root()); | |
1094 Node *adj = new (C, 3) AddINode( X, incr ); | |
1095 register_new_node( adj, pre_ctrl ); | |
1096 *pre_limit = (scale_con > 0) | |
1097 ? (Node*)new (C, 3) MinINode( *pre_limit, adj ) | |
1098 : (Node*)new (C, 3) MaxINode( *pre_limit, adj ); | |
1099 register_new_node( *pre_limit, pre_ctrl ); | |
1100 | |
1101 // [++] Here's the algebra that justifies the pre-loop limit expression: | |
1102 // | |
1103 // NOT( scale_con * I + offset < limit ) | |
1104 // == | |
1105 // scale_con * I + offset >= limit | |
1106 // == | |
1107 // SGN(scale_con) * I >= (limit-offset)/|scale_con| | |
1108 // == | |
1109 // (limit-offset)/|scale_con| <= I * SGN(scale_con) | |
1110 // == | |
1111 // (limit-offset)/|scale_con|-1 < I * SGN(scale_con) | |
1112 // == | |
1113 // ( if (scale_con > 0) /*common case*/ | |
1114 // (limit-offset)/scale_con - 1 < I | |
1115 // else | |
1116 // (limit-offset)/scale_con + 1 > I | |
1117 // ) | |
1118 // ( if (scale_con > 0) /*common case*/ | |
1119 // (limit-offset)/scale_con + SGN(-scale_con) < I | |
1120 // else | |
1121 // (limit-offset)/scale_con + SGN(-scale_con) > I | |
1122 } | |
1123 } | |
1124 | |
1125 | |
1126 //------------------------------is_scaled_iv--------------------------------- | |
1127 // Return true if exp is a constant times an induction var | |
1128 bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) { | |
1129 if (exp == iv) { | |
1130 if (p_scale != NULL) { | |
1131 *p_scale = 1; | |
1132 } | |
1133 return true; | |
1134 } | |
1135 int opc = exp->Opcode(); | |
1136 if (opc == Op_MulI) { | |
1137 if (exp->in(1) == iv && exp->in(2)->is_Con()) { | |
1138 if (p_scale != NULL) { | |
1139 *p_scale = exp->in(2)->get_int(); | |
1140 } | |
1141 return true; | |
1142 } | |
1143 if (exp->in(2) == iv && exp->in(1)->is_Con()) { | |
1144 if (p_scale != NULL) { | |
1145 *p_scale = exp->in(1)->get_int(); | |
1146 } | |
1147 return true; | |
1148 } | |
1149 } else if (opc == Op_LShiftI) { | |
1150 if (exp->in(1) == iv && exp->in(2)->is_Con()) { | |
1151 if (p_scale != NULL) { | |
1152 *p_scale = 1 << exp->in(2)->get_int(); | |
1153 } | |
1154 return true; | |
1155 } | |
1156 } | |
1157 return false; | |
1158 } | |
1159 | |
1160 //-----------------------------is_scaled_iv_plus_offset------------------------------ | |
1161 // Return true if exp is a simple induction variable expression: k1*iv + (invar + k2) | |
1162 bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth) { | |
1163 if (is_scaled_iv(exp, iv, p_scale)) { | |
1164 if (p_offset != NULL) { | |
1165 Node *zero = _igvn.intcon(0); | |
1166 set_ctrl(zero, C->root()); | |
1167 *p_offset = zero; | |
1168 } | |
1169 return true; | |
1170 } | |
1171 int opc = exp->Opcode(); | |
1172 if (opc == Op_AddI) { | |
1173 if (is_scaled_iv(exp->in(1), iv, p_scale)) { | |
1174 if (p_offset != NULL) { | |
1175 *p_offset = exp->in(2); | |
1176 } | |
1177 return true; | |
1178 } | |
1179 if (exp->in(2)->is_Con()) { | |
1180 Node* offset2 = NULL; | |
1181 if (depth < 2 && | |
1182 is_scaled_iv_plus_offset(exp->in(1), iv, p_scale, | |
1183 p_offset != NULL ? &offset2 : NULL, depth+1)) { | |
1184 if (p_offset != NULL) { | |
1185 Node *ctrl_off2 = get_ctrl(offset2); | |
1186 Node* offset = new (C, 3) AddINode(offset2, exp->in(2)); | |
1187 register_new_node(offset, ctrl_off2); | |
1188 *p_offset = offset; | |
1189 } | |
1190 return true; | |
1191 } | |
1192 } | |
1193 } else if (opc == Op_SubI) { | |
1194 if (is_scaled_iv(exp->in(1), iv, p_scale)) { | |
1195 if (p_offset != NULL) { | |
1196 Node *zero = _igvn.intcon(0); | |
1197 set_ctrl(zero, C->root()); | |
1198 Node *ctrl_off = get_ctrl(exp->in(2)); | |
1199 Node* offset = new (C, 3) SubINode(zero, exp->in(2)); | |
1200 register_new_node(offset, ctrl_off); | |
1201 *p_offset = offset; | |
1202 } | |
1203 return true; | |
1204 } | |
1205 if (is_scaled_iv(exp->in(2), iv, p_scale)) { | |
1206 if (p_offset != NULL) { | |
1207 *p_scale *= -1; | |
1208 *p_offset = exp->in(1); | |
1209 } | |
1210 return true; | |
1211 } | |
1212 } | |
1213 return false; | |
1214 } | |
1215 | |
1216 //------------------------------do_range_check--------------------------------- | |
1217 // Eliminate range-checks and other trip-counter vs loop-invariant tests. | |
1218 void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) { | |
1219 #ifndef PRODUCT | |
1220 if( PrintOpto && VerifyLoopOptimizations ) { | |
1221 tty->print("Range Check Elimination "); | |
1222 loop->dump_head(); | |
1223 } | |
1224 #endif | |
1225 assert( RangeCheckElimination, "" ); | |
1226 CountedLoopNode *cl = loop->_head->as_CountedLoop(); | |
1227 assert( cl->is_main_loop(), "" ); | |
1228 | |
1229 // Find the trip counter; we are iteration splitting based on it | |
1230 Node *trip_counter = cl->phi(); | |
1231 // Find the main loop limit; we will trim it's iterations | |
1232 // to not ever trip end tests | |
1233 Node *main_limit = cl->limit(); | |
1234 // Find the pre-loop limit; we will expand it's iterations to | |
1235 // not ever trip low tests. | |
1236 Node *ctrl = cl->in(LoopNode::EntryControl); | |
1237 assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" ); | |
1238 Node *iffm = ctrl->in(0); | |
1239 assert( iffm->Opcode() == Op_If, "" ); | |
1240 Node *p_f = iffm->in(0); | |
1241 assert( p_f->Opcode() == Op_IfFalse, "" ); | |
1242 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd(); | |
1243 assert( pre_end->loopnode()->is_pre_loop(), "" ); | |
1244 Node *pre_opaq1 = pre_end->limit(); | |
1245 // Occasionally it's possible for a pre-loop Opaque1 node to be | |
1246 // optimized away and then another round of loop opts attempted. | |
1247 // We can not optimize this particular loop in that case. | |
1248 if( pre_opaq1->Opcode() != Op_Opaque1 ) | |
1249 return; | |
1250 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1; | |
1251 Node *pre_limit = pre_opaq->in(1); | |
1252 | |
1253 // Where do we put new limit calculations | |
1254 Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl); | |
1255 | |
1256 // Ensure the original loop limit is available from the | |
1257 // pre-loop Opaque1 node. | |
1258 Node *orig_limit = pre_opaq->original_loop_limit(); | |
1259 if( orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP ) | |
1260 return; | |
1261 | |
1262 // Need to find the main-loop zero-trip guard | |
1263 Node *bolzm = iffm->in(1); | |
1264 assert( bolzm->Opcode() == Op_Bool, "" ); | |
1265 Node *cmpzm = bolzm->in(1); | |
1266 assert( cmpzm->is_Cmp(), "" ); | |
1267 Node *opqzm = cmpzm->in(2); | |
1268 if( opqzm->Opcode() != Op_Opaque1 ) | |
1269 return; | |
1270 assert( opqzm->in(1) == main_limit, "do not understand situation" ); | |
1271 | |
1272 // Must know if its a count-up or count-down loop | |
1273 | |
1274 // protect against stride not being a constant | |
1275 if ( !cl->stride_is_con() ) { | |
1276 return; | |
1277 } | |
1278 int stride_con = cl->stride_con(); | |
1279 Node *zero = _igvn.intcon(0); | |
1280 Node *one = _igvn.intcon(1); | |
1281 set_ctrl(zero, C->root()); | |
1282 set_ctrl(one, C->root()); | |
1283 | |
1284 // Range checks that do not dominate the loop backedge (ie. | |
1285 // conditionally executed) can lengthen the pre loop limit beyond | |
1286 // the original loop limit. To prevent this, the pre limit is | |
1287 // (for stride > 0) MINed with the original loop limit (MAXed | |
1288 // stride < 0) when some range_check (rc) is conditionally | |
1289 // executed. | |
1290 bool conditional_rc = false; | |
1291 | |
1292 // Check loop body for tests of trip-counter plus loop-invariant vs | |
1293 // loop-invariant. | |
1294 for( uint i = 0; i < loop->_body.size(); i++ ) { | |
1295 Node *iff = loop->_body[i]; | |
1296 if( iff->Opcode() == Op_If ) { // Test? | |
1297 | |
1298 // Test is an IfNode, has 2 projections. If BOTH are in the loop | |
1299 // we need loop unswitching instead of iteration splitting. | |
1300 Node *exit = loop->is_loop_exit(iff); | |
1301 if( !exit ) continue; | |
1302 int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0; | |
1303 | |
1304 // Get boolean condition to test | |
1305 Node *i1 = iff->in(1); | |
1306 if( !i1->is_Bool() ) continue; | |
1307 BoolNode *bol = i1->as_Bool(); | |
1308 BoolTest b_test = bol->_test; | |
1309 // Flip sense of test if exit condition is flipped | |
1310 if( flip ) | |
1311 b_test = b_test.negate(); | |
1312 | |
1313 // Get compare | |
1314 Node *cmp = bol->in(1); | |
1315 | |
1316 // Look for trip_counter + offset vs limit | |
1317 Node *rc_exp = cmp->in(1); | |
1318 Node *limit = cmp->in(2); | |
1319 jint scale_con= 1; // Assume trip counter not scaled | |
1320 | |
1321 Node *limit_c = get_ctrl(limit); | |
1322 if( loop->is_member(get_loop(limit_c) ) ) { | |
1323 // Compare might have operands swapped; commute them | |
1324 b_test = b_test.commute(); | |
1325 rc_exp = cmp->in(2); | |
1326 limit = cmp->in(1); | |
1327 limit_c = get_ctrl(limit); | |
1328 if( loop->is_member(get_loop(limit_c) ) ) | |
1329 continue; // Both inputs are loop varying; cannot RCE | |
1330 } | |
1331 // Here we know 'limit' is loop invariant | |
1332 | |
1333 // 'limit' maybe pinned below the zero trip test (probably from a | |
1334 // previous round of rce), in which case, it can't be used in the | |
1335 // zero trip test expression which must occur before the zero test's if. | |
1336 if( limit_c == ctrl ) { | |
1337 continue; // Don't rce this check but continue looking for other candidates. | |
1338 } | |
1339 | |
1340 // Check for scaled induction variable plus an offset | |
1341 Node *offset = NULL; | |
1342 | |
1343 if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset)) { | |
1344 continue; | |
1345 } | |
1346 | |
1347 Node *offset_c = get_ctrl(offset); | |
1348 if( loop->is_member( get_loop(offset_c) ) ) | |
1349 continue; // Offset is not really loop invariant | |
1350 // Here we know 'offset' is loop invariant. | |
1351 | |
1352 // As above for the 'limit', the 'offset' maybe pinned below the | |
1353 // zero trip test. | |
1354 if( offset_c == ctrl ) { | |
1355 continue; // Don't rce this check but continue looking for other candidates. | |
1356 } | |
1357 | |
1358 // At this point we have the expression as: | |
1359 // scale_con * trip_counter + offset :: limit | |
1360 // where scale_con, offset and limit are loop invariant. Trip_counter | |
1361 // monotonically increases by stride_con, a constant. Both (or either) | |
1362 // stride_con and scale_con can be negative which will flip about the | |
1363 // sense of the test. | |
1364 | |
1365 // Adjust pre and main loop limits to guard the correct iteration set | |
1366 if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests | |
1367 if( b_test._test == BoolTest::lt ) { // Range checks always use lt | |
1368 // The overflow limit: scale*I+offset < limit | |
1369 add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit ); | |
1370 // The underflow limit: 0 <= scale*I+offset. | |
1371 // Some math yields: -scale*I-(offset+1) < 0 | |
1372 Node *plus_one = new (C, 3) AddINode( offset, one ); | |
1373 register_new_node( plus_one, pre_ctrl ); | |
1374 Node *neg_offset = new (C, 3) SubINode( zero, plus_one ); | |
1375 register_new_node( neg_offset, pre_ctrl ); | |
1376 add_constraint( stride_con, -scale_con, neg_offset, zero, pre_ctrl, &pre_limit, &main_limit ); | |
1377 if (!conditional_rc) { | |
1378 conditional_rc = !loop->dominates_backedge(iff); | |
1379 } | |
1380 } else { | |
1381 #ifndef PRODUCT | |
1382 if( PrintOpto ) | |
1383 tty->print_cr("missed RCE opportunity"); | |
1384 #endif | |
1385 continue; // In release mode, ignore it | |
1386 } | |
1387 } else { // Otherwise work on normal compares | |
1388 switch( b_test._test ) { | |
1389 case BoolTest::ge: // Convert X >= Y to -X <= -Y | |
1390 scale_con = -scale_con; | |
1391 offset = new (C, 3) SubINode( zero, offset ); | |
1392 register_new_node( offset, pre_ctrl ); | |
1393 limit = new (C, 3) SubINode( zero, limit ); | |
1394 register_new_node( limit, pre_ctrl ); | |
1395 // Fall into LE case | |
1396 case BoolTest::le: // Convert X <= Y to X < Y+1 | |
1397 limit = new (C, 3) AddINode( limit, one ); | |
1398 register_new_node( limit, pre_ctrl ); | |
1399 // Fall into LT case | |
1400 case BoolTest::lt: | |
1401 add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit ); | |
1402 if (!conditional_rc) { | |
1403 conditional_rc = !loop->dominates_backedge(iff); | |
1404 } | |
1405 break; | |
1406 default: | |
1407 #ifndef PRODUCT | |
1408 if( PrintOpto ) | |
1409 tty->print_cr("missed RCE opportunity"); | |
1410 #endif | |
1411 continue; // Unhandled case | |
1412 } | |
1413 } | |
1414 | |
1415 // Kill the eliminated test | |
1416 C->set_major_progress(); | |
1417 Node *kill_con = _igvn.intcon( 1-flip ); | |
1418 set_ctrl(kill_con, C->root()); | |
1419 _igvn.hash_delete(iff); | |
1420 iff->set_req(1, kill_con); | |
1421 _igvn._worklist.push(iff); | |
1422 // Find surviving projection | |
1423 assert(iff->is_If(), ""); | |
1424 ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip); | |
1425 // Find loads off the surviving projection; remove their control edge | |
1426 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) { | |
1427 Node* cd = dp->fast_out(i); // Control-dependent node | |
1428 if( cd->is_Load() ) { // Loads can now float around in the loop | |
1429 _igvn.hash_delete(cd); | |
1430 // Allow the load to float around in the loop, or before it | |
1431 // but NOT before the pre-loop. | |
1432 cd->set_req(0, ctrl); // ctrl, not NULL | |
1433 _igvn._worklist.push(cd); | |
1434 --i; | |
1435 --imax; | |
1436 } | |
1437 } | |
1438 | |
1439 } // End of is IF | |
1440 | |
1441 } | |
1442 | |
1443 // Update loop limits | |
1444 if (conditional_rc) { | |
1445 pre_limit = (stride_con > 0) ? (Node*)new (C,3) MinINode(pre_limit, orig_limit) | |
1446 : (Node*)new (C,3) MaxINode(pre_limit, orig_limit); | |
1447 register_new_node(pre_limit, pre_ctrl); | |
1448 } | |
1449 _igvn.hash_delete(pre_opaq); | |
1450 pre_opaq->set_req(1, pre_limit); | |
1451 | |
1452 // Note:: we are making the main loop limit no longer precise; | |
1453 // need to round up based on stride. | |
1454 if( stride_con != 1 && stride_con != -1 ) { // Cutout for common case | |
1455 // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init | |
1456 // Hopefully, compiler will optimize for powers of 2. | |
1457 Node *ctrl = get_ctrl(main_limit); | |
1458 Node *stride = cl->stride(); | |
1459 Node *init = cl->init_trip(); | |
1460 Node *span = new (C, 3) SubINode(main_limit,init); | |
1461 register_new_node(span,ctrl); | |
1462 Node *rndup = _igvn.intcon(stride_con + ((stride_con>0)?-1:1)); | |
1463 Node *add = new (C, 3) AddINode(span,rndup); | |
1464 register_new_node(add,ctrl); | |
1465 Node *div = new (C, 3) DivINode(0,add,stride); | |
1466 register_new_node(div,ctrl); | |
1467 Node *mul = new (C, 3) MulINode(div,stride); | |
1468 register_new_node(mul,ctrl); | |
1469 Node *newlim = new (C, 3) AddINode(mul,init); | |
1470 register_new_node(newlim,ctrl); | |
1471 main_limit = newlim; | |
1472 } | |
1473 | |
1474 Node *main_cle = cl->loopexit(); | |
1475 Node *main_bol = main_cle->in(1); | |
1476 // Hacking loop bounds; need private copies of exit test | |
1477 if( main_bol->outcnt() > 1 ) {// BoolNode shared? | |
1478 _igvn.hash_delete(main_cle); | |
1479 main_bol = main_bol->clone();// Clone a private BoolNode | |
1480 register_new_node( main_bol, main_cle->in(0) ); | |
1481 main_cle->set_req(1,main_bol); | |
1482 } | |
1483 Node *main_cmp = main_bol->in(1); | |
1484 if( main_cmp->outcnt() > 1 ) { // CmpNode shared? | |
1485 _igvn.hash_delete(main_bol); | |
1486 main_cmp = main_cmp->clone();// Clone a private CmpNode | |
1487 register_new_node( main_cmp, main_cle->in(0) ); | |
1488 main_bol->set_req(1,main_cmp); | |
1489 } | |
1490 // Hack the now-private loop bounds | |
1491 _igvn.hash_delete(main_cmp); | |
1492 main_cmp->set_req(2, main_limit); | |
1493 _igvn._worklist.push(main_cmp); | |
1494 // The OpaqueNode is unshared by design | |
1495 _igvn.hash_delete(opqzm); | |
1496 assert( opqzm->outcnt() == 1, "cannot hack shared node" ); | |
1497 opqzm->set_req(1,main_limit); | |
1498 _igvn._worklist.push(opqzm); | |
1499 } | |
1500 | |
1501 //------------------------------DCE_loop_body---------------------------------- | |
1502 // Remove simplistic dead code from loop body | |
1503 void IdealLoopTree::DCE_loop_body() { | |
1504 for( uint i = 0; i < _body.size(); i++ ) | |
1505 if( _body.at(i)->outcnt() == 0 ) | |
1506 _body.map( i--, _body.pop() ); | |
1507 } | |
1508 | |
1509 | |
1510 //------------------------------adjust_loop_exit_prob-------------------------- | |
1511 // Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage. | |
1512 // Replace with a 1-in-10 exit guess. | |
1513 void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) { | |
1514 Node *test = tail(); | |
1515 while( test != _head ) { | |
1516 uint top = test->Opcode(); | |
1517 if( top == Op_IfTrue || top == Op_IfFalse ) { | |
1518 int test_con = ((ProjNode*)test)->_con; | |
1519 assert(top == (uint)(test_con? Op_IfTrue: Op_IfFalse), "sanity"); | |
1520 IfNode *iff = test->in(0)->as_If(); | |
1521 if( iff->outcnt() == 2 ) { // Ignore dead tests | |
1522 Node *bol = iff->in(1); | |
1523 if( bol && bol->req() > 1 && bol->in(1) && | |
1524 ((bol->in(1)->Opcode() == Op_StorePConditional ) || | |
420
a1980da045cc
6462850: generate biased locking code in C2 ideal graph
kvn
parents:
401
diff
changeset
|
1525 (bol->in(1)->Opcode() == Op_StoreIConditional ) || |
0 | 1526 (bol->in(1)->Opcode() == Op_StoreLConditional ) || |
1527 (bol->in(1)->Opcode() == Op_CompareAndSwapI ) || | |
1528 (bol->in(1)->Opcode() == Op_CompareAndSwapL ) || | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
39
diff
changeset
|
1529 (bol->in(1)->Opcode() == Op_CompareAndSwapP ) || |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
39
diff
changeset
|
1530 (bol->in(1)->Opcode() == Op_CompareAndSwapN ))) |
0 | 1531 return; // Allocation loops RARELY take backedge |
1532 // Find the OTHER exit path from the IF | |
1533 Node* ex = iff->proj_out(1-test_con); | |
1534 float p = iff->_prob; | |
1535 if( !phase->is_member( this, ex ) && iff->_fcnt == COUNT_UNKNOWN ) { | |
1536 if( top == Op_IfTrue ) { | |
1537 if( p < (PROB_FAIR + PROB_UNLIKELY_MAG(3))) { | |
1538 iff->_prob = PROB_STATIC_FREQUENT; | |
1539 } | |
1540 } else { | |
1541 if( p > (PROB_FAIR - PROB_UNLIKELY_MAG(3))) { | |
1542 iff->_prob = PROB_STATIC_INFREQUENT; | |
1543 } | |
1544 } | |
1545 } | |
1546 } | |
1547 } | |
1548 test = phase->idom(test); | |
1549 } | |
1550 } | |
1551 | |
1552 | |
1553 //------------------------------policy_do_remove_empty_loop-------------------- | |
1554 // Micro-benchmark spamming. Policy is to always remove empty loops. | |
1555 // The 'DO' part is to replace the trip counter with the value it will | |
1556 // have on the last iteration. This will break the loop. | |
1557 bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) { | |
1558 // Minimum size must be empty loop | |
1559 if( _body.size() > 7/*number of nodes in an empty loop*/ ) return false; | |
1560 | |
1561 if( !_head->is_CountedLoop() ) return false; // Dead loop | |
1562 CountedLoopNode *cl = _head->as_CountedLoop(); | |
1563 if( !cl->loopexit() ) return false; // Malformed loop | |
1564 if( !phase->is_member(this,phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue)) ) ) | |
1565 return false; // Infinite loop | |
1566 #ifndef PRODUCT | |
1567 if( PrintOpto ) | |
1568 tty->print_cr("Removing empty loop"); | |
1569 #endif | |
1570 #ifdef ASSERT | |
1571 // Ensure only one phi which is the iv. | |
1572 Node* iv = NULL; | |
1573 for (DUIterator_Fast imax, i = cl->fast_outs(imax); i < imax; i++) { | |
1574 Node* n = cl->fast_out(i); | |
1575 if (n->Opcode() == Op_Phi) { | |
1576 assert(iv == NULL, "Too many phis" ); | |
1577 iv = n; | |
1578 } | |
1579 } | |
1580 assert(iv == cl->phi(), "Wrong phi" ); | |
1581 #endif | |
1582 // Replace the phi at loop head with the final value of the last | |
1583 // iteration. Then the CountedLoopEnd will collapse (backedge never | |
1584 // taken) and all loop-invariant uses of the exit values will be correct. | |
1585 Node *phi = cl->phi(); | |
1586 Node *final = new (phase->C, 3) SubINode( cl->limit(), cl->stride() ); | |
1587 phase->register_new_node(final,cl->in(LoopNode::EntryControl)); | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
1588 phase->_igvn.replace_node(phi,final); |
0 | 1589 phase->C->set_major_progress(); |
1590 return true; | |
1591 } | |
1592 | |
1593 | |
1594 //============================================================================= | |
1595 //------------------------------iteration_split_impl--------------------------- | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1596 bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) { |
0 | 1597 // Check and remove empty loops (spam micro-benchmarks) |
1598 if( policy_do_remove_empty_loop(phase) ) | |
1172 | 1599 return true; // Here we removed an empty loop |
0 | 1600 |
1601 bool should_peel = policy_peeling(phase); // Should we peel? | |
1602 | |
1603 bool should_unswitch = policy_unswitching(phase); | |
1604 | |
1605 // Non-counted loops may be peeled; exactly 1 iteration is peeled. | |
1606 // This removes loop-invariant tests (usually null checks). | |
1607 if( !_head->is_CountedLoop() ) { // Non-counted loop | |
1608 if (PartialPeelLoop && phase->partial_peel(this, old_new)) { | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1609 // Partial peel succeeded so terminate this round of loop opts |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1610 return false; |
0 | 1611 } |
1612 if( should_peel ) { // Should we peel? | |
1613 #ifndef PRODUCT | |
1614 if (PrintOpto) tty->print_cr("should_peel"); | |
1615 #endif | |
1616 phase->do_peeling(this,old_new); | |
1617 } else if( should_unswitch ) { | |
1618 phase->do_unswitching(this, old_new); | |
1619 } | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1620 return true; |
0 | 1621 } |
1622 CountedLoopNode *cl = _head->as_CountedLoop(); | |
1623 | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1624 if( !cl->loopexit() ) return true; // Ignore various kinds of broken loops |
0 | 1625 |
1626 // Do nothing special to pre- and post- loops | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1627 if( cl->is_pre_loop() || cl->is_post_loop() ) return true; |
0 | 1628 |
1629 // Compute loop trip count from profile data | |
1630 compute_profile_trip_cnt(phase); | |
1631 | |
1632 // Before attempting fancy unrolling, RCE or alignment, see if we want | |
1633 // to completely unroll this loop or do loop unswitching. | |
1634 if( cl->is_normal_loop() ) { | |
789
273b2358ef1a
6837146: Should perform unswitch before maximally unroll in loop transformation
cfang
parents:
605
diff
changeset
|
1635 if (should_unswitch) { |
273b2358ef1a
6837146: Should perform unswitch before maximally unroll in loop transformation
cfang
parents:
605
diff
changeset
|
1636 phase->do_unswitching(this, old_new); |
273b2358ef1a
6837146: Should perform unswitch before maximally unroll in loop transformation
cfang
parents:
605
diff
changeset
|
1637 return true; |
273b2358ef1a
6837146: Should perform unswitch before maximally unroll in loop transformation
cfang
parents:
605
diff
changeset
|
1638 } |
0 | 1639 bool should_maximally_unroll = policy_maximally_unroll(phase); |
1640 if( should_maximally_unroll ) { | |
1641 // Here we did some unrolling and peeling. Eventually we will | |
1642 // completely unroll this loop and it will no longer be a loop. | |
1643 phase->do_maximally_unroll(this,old_new); | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1644 return true; |
0 | 1645 } |
1646 } | |
1647 | |
1648 | |
1649 // Counted loops may be peeled, may need some iterations run up | |
1650 // front for RCE, and may want to align loop refs to a cache | |
1651 // line. Thus we clone a full loop up front whose trip count is | |
1652 // at least 1 (if peeling), but may be several more. | |
1653 | |
1654 // The main loop will start cache-line aligned with at least 1 | |
1655 // iteration of the unrolled body (zero-trip test required) and | |
1656 // will have some range checks removed. | |
1657 | |
1658 // A post-loop will finish any odd iterations (leftover after | |
1659 // unrolling), plus any needed for RCE purposes. | |
1660 | |
1661 bool should_unroll = policy_unroll(phase); | |
1662 | |
1663 bool should_rce = policy_range_check(phase); | |
1664 | |
1665 bool should_align = policy_align(phase); | |
1666 | |
1667 // If not RCE'ing (iteration splitting) or Aligning, then we do not | |
1668 // need a pre-loop. We may still need to peel an initial iteration but | |
1669 // we will not be needing an unknown number of pre-iterations. | |
1670 // | |
1671 // Basically, if may_rce_align reports FALSE first time through, | |
1672 // we will not be able to later do RCE or Aligning on this loop. | |
1673 bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align; | |
1674 | |
1675 // If we have any of these conditions (RCE, alignment, unrolling) met, then | |
1676 // we switch to the pre-/main-/post-loop model. This model also covers | |
1677 // peeling. | |
1678 if( should_rce || should_align || should_unroll ) { | |
1679 if( cl->is_normal_loop() ) // Convert to 'pre/main/post' loops | |
1680 phase->insert_pre_post_loops(this,old_new, !may_rce_align); | |
1681 | |
1682 // Adjust the pre- and main-loop limits to let the pre and post loops run | |
1683 // with full checks, but the main-loop with no checks. Remove said | |
1684 // checks from the main body. | |
1685 if( should_rce ) | |
1686 phase->do_range_check(this,old_new); | |
1687 | |
1688 // Double loop body for unrolling. Adjust the minimum-trip test (will do | |
1689 // twice as many iterations as before) and the main body limit (only do | |
1690 // an even number of trips). If we are peeling, we might enable some RCE | |
1691 // and we'd rather unroll the post-RCE'd loop SO... do not unroll if | |
1692 // peeling. | |
1172 | 1693 if( should_unroll && !should_peel ) |
1694 phase->do_unroll(this,old_new, true); | |
0 | 1695 |
1696 // Adjust the pre-loop limits to align the main body | |
1697 // iterations. | |
1698 if( should_align ) | |
1699 Unimplemented(); | |
1700 | |
1701 } else { // Else we have an unchanged counted loop | |
1702 if( should_peel ) // Might want to peel but do nothing else | |
1703 phase->do_peeling(this,old_new); | |
1704 } | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1705 return true; |
0 | 1706 } |
1707 | |
1708 | |
1709 //============================================================================= | |
1710 //------------------------------iteration_split-------------------------------- | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1711 bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) { |
0 | 1712 // Recursively iteration split nested loops |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1713 if( _child && !_child->iteration_split( phase, old_new )) |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1714 return false; |
0 | 1715 |
1716 // Clean out prior deadwood | |
1717 DCE_loop_body(); | |
1718 | |
1719 | |
1720 // Look for loop-exit tests with my 50/50 guesses from the Parsing stage. | |
1721 // Replace with a 1-in-10 exit guess. | |
1722 if( _parent /*not the root loop*/ && | |
1723 !_irreducible && | |
1724 // Also ignore the occasional dead backedge | |
1725 !tail()->is_top() ) { | |
1726 adjust_loop_exit_prob(phase); | |
1727 } | |
1728 | |
1729 | |
1730 // Gate unrolling, RCE and peeling efforts. | |
1731 if( !_child && // If not an inner loop, do not split | |
1732 !_irreducible && | |
39
76256d272075
6667612: (Escape Analysis) disable loop cloning if it has a scalar replaceable allocation
kvn
parents:
0
diff
changeset
|
1733 _allow_optimizations && |
0 | 1734 !tail()->is_top() ) { // Also ignore the occasional dead backedge |
1735 if (!_has_call) { | |
1172 | 1736 if (!iteration_split_impl( phase, old_new )) { |
1737 return false; | |
1738 } | |
0 | 1739 } else if (policy_unswitching(phase)) { |
1740 phase->do_unswitching(this, old_new); | |
1741 } | |
1742 } | |
1743 | |
1744 // Minor offset re-organization to remove loop-fallout uses of | |
1745 // trip counter. | |
1746 if( _head->is_CountedLoop() ) phase->reorg_offsets( this ); | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1747 if( _next && !_next->iteration_split( phase, old_new )) |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1748 return false; |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1749 return true; |
0 | 1750 } |
1172 | 1751 |
1752 //-------------------------------is_uncommon_trap_proj---------------------------- | |
1753 // Return true if proj is the form of "proj->[region->..]call_uct" | |
1754 bool PhaseIdealLoop::is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate) { | |
1755 int path_limit = 10; | |
1756 assert(proj, "invalid argument"); | |
1757 Node* out = proj; | |
1758 for (int ct = 0; ct < path_limit; ct++) { | |
1759 out = out->unique_ctrl_out(); | |
1760 if (out == NULL || out->is_Root() || out->is_Start()) | |
1761 return false; | |
1762 if (out->is_CallStaticJava()) { | |
1763 int req = out->as_CallStaticJava()->uncommon_trap_request(); | |
1764 if (req != 0) { | |
1765 Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(req); | |
1766 if (!must_reason_predicate || reason == Deoptimization::Reason_predicate){ | |
1767 return true; | |
1768 } | |
1769 } | |
1770 return false; // don't do further after call | |
1771 } | |
1772 } | |
1773 return false; | |
1774 } | |
1775 | |
1776 //-------------------------------is_uncommon_trap_if_pattern------------------------- | |
1777 // Return true for "if(test)-> proj -> ... | |
1778 // | | |
1779 // V | |
1780 // other_proj->[region->..]call_uct" | |
1781 // | |
1782 // "must_reason_predicate" means the uct reason must be Reason_predicate | |
1783 bool PhaseIdealLoop::is_uncommon_trap_if_pattern(ProjNode *proj, bool must_reason_predicate) { | |
1784 Node *in0 = proj->in(0); | |
1785 if (!in0->is_If()) return false; | |
1280
336c6c200f5f
6930116: loop predication code does not handle If nodes with only one projection
kvn
parents:
1275
diff
changeset
|
1786 // Variation of a dead If node. |
336c6c200f5f
6930116: loop predication code does not handle If nodes with only one projection
kvn
parents:
1275
diff
changeset
|
1787 if (in0->outcnt() < 2) return false; |
1172 | 1788 IfNode* iff = in0->as_If(); |
1789 | |
1790 // we need "If(Conv2B(Opaque1(...)))" pattern for must_reason_predicate | |
1791 if (must_reason_predicate) { | |
1792 if (iff->in(1)->Opcode() != Op_Conv2B || | |
1793 iff->in(1)->in(1)->Opcode() != Op_Opaque1) { | |
1794 return false; | |
1795 } | |
1796 } | |
1797 | |
1798 ProjNode* other_proj = iff->proj_out(1-proj->_con)->as_Proj(); | |
1799 return is_uncommon_trap_proj(other_proj, must_reason_predicate); | |
1800 } | |
1801 | |
1802 //------------------------------create_new_if_for_predicate------------------------ | |
1803 // create a new if above the uct_if_pattern for the predicate to be promoted. | |
1804 // | |
1805 // before after | |
1806 // ---------- ---------- | |
1807 // ctrl ctrl | |
1808 // | | | |
1809 // | | | |
1810 // v v | |
1811 // iff new_iff | |
1812 // / \ / \ | |
1813 // / \ / \ | |
1814 // v v v v | |
1815 // uncommon_proj cont_proj if_uct if_cont | |
1816 // \ | | | | | |
1817 // \ | | | | | |
1818 // v v v | v | |
1819 // rgn loop | iff | |
1820 // | | / \ | |
1821 // | | / \ | |
1822 // v | v v | |
1823 // uncommon_trap | uncommon_proj cont_proj | |
1824 // \ \ | | | |
1825 // \ \ | | | |
1826 // v v v v | |
1827 // rgn loop | |
1828 // | | |
1829 // | | |
1830 // v | |
1831 // uncommon_trap | |
1832 // | |
1833 // | |
1834 // We will create a region to guard the uct call if there is no one there. | |
1835 // The true projecttion (if_cont) of the new_iff is returned. | |
1836 ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj) { | |
1837 assert(is_uncommon_trap_if_pattern(cont_proj, true), "must be a uct if pattern!"); | |
1838 IfNode* iff = cont_proj->in(0)->as_If(); | |
1839 | |
1840 ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con); | |
1841 Node *rgn = uncommon_proj->unique_ctrl_out(); | |
1842 assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); | |
1843 | |
1844 if (!rgn->is_Region()) { // create a region to guard the call | |
1845 assert(rgn->is_Call(), "must be call uct"); | |
1846 CallNode* call = rgn->as_Call(); | |
1847 rgn = new (C, 1) RegionNode(1); | |
1848 _igvn.set_type(rgn, rgn->bottom_type()); | |
1849 rgn->add_req(uncommon_proj); | |
1850 set_idom(rgn, idom(uncommon_proj), dom_depth(uncommon_proj)+1); | |
1851 _igvn.hash_delete(call); | |
1852 call->set_req(0, rgn); | |
1853 } | |
1854 | |
1855 // Create new_iff | |
1856 uint iffdd = dom_depth(iff); | |
1857 IdealLoopTree* lp = get_loop(iff); | |
1858 IfNode *new_iff = new (C, 2) IfNode(iff->in(0), NULL, iff->_prob, iff->_fcnt); | |
1859 register_node(new_iff, lp, idom(iff), iffdd); | |
1860 Node *if_cont = new (C, 1) IfTrueNode(new_iff); | |
1861 Node *if_uct = new (C, 1) IfFalseNode(new_iff); | |
1862 if (cont_proj->is_IfFalse()) { | |
1863 // Swap | |
1864 Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp; | |
1865 } | |
1866 register_node(if_cont, lp, new_iff, iffdd); | |
1867 register_node(if_uct, get_loop(rgn), new_iff, iffdd); | |
1868 | |
1869 // if_cont to iff | |
1870 _igvn.hash_delete(iff); | |
1871 iff->set_req(0, if_cont); | |
1872 set_idom(iff, if_cont, dom_depth(iff)); | |
1873 | |
1874 // if_uct to rgn | |
1875 _igvn.hash_delete(rgn); | |
1876 rgn->add_req(if_uct); | |
1877 Node* ridom = idom(rgn); | |
1878 Node* nrdom = dom_lca(ridom, new_iff); | |
1879 set_idom(rgn, nrdom, dom_depth(rgn)); | |
1880 | |
1881 // rgn must have no phis | |
1882 assert(!rgn->as_Region()->has_phi(), "region must have no phis"); | |
1883 | |
1884 return if_cont->as_Proj(); | |
1885 } | |
1886 | |
1887 //------------------------------find_predicate_insertion_point-------------------------- | |
1888 // Find a good location to insert a predicate | |
1889 ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c) { | |
1890 if (start_c == C->root() || !start_c->is_Proj()) | |
1891 return NULL; | |
1892 if (is_uncommon_trap_if_pattern(start_c->as_Proj(), true/*Reason_Predicate*/)) { | |
1893 return start_c->as_Proj(); | |
1894 } | |
1895 return NULL; | |
1896 } | |
1897 | |
1898 //------------------------------Invariance----------------------------------- | |
1899 // Helper class for loop_predication_impl to compute invariance on the fly and | |
1900 // clone invariants. | |
1901 class Invariance : public StackObj { | |
1902 VectorSet _visited, _invariant; | |
1903 Node_Stack _stack; | |
1904 VectorSet _clone_visited; | |
1905 Node_List _old_new; // map of old to new (clone) | |
1906 IdealLoopTree* _lpt; | |
1907 PhaseIdealLoop* _phase; | |
1908 | |
1909 // Helper function to set up the invariance for invariance computation | |
1910 // If n is a known invariant, set up directly. Otherwise, look up the | |
1911 // the possibility to push n onto the stack for further processing. | |
1912 void visit(Node* use, Node* n) { | |
1913 if (_lpt->is_invariant(n)) { // known invariant | |
1914 _invariant.set(n->_idx); | |
1915 } else if (!n->is_CFG()) { | |
1916 Node *n_ctrl = _phase->ctrl_or_self(n); | |
1917 Node *u_ctrl = _phase->ctrl_or_self(use); // self if use is a CFG | |
1918 if (_phase->is_dominator(n_ctrl, u_ctrl)) { | |
1919 _stack.push(n, n->in(0) == NULL ? 1 : 0); | |
1920 } | |
1921 } | |
1922 } | |
1923 | |
1924 // Compute invariance for "the_node" and (possibly) all its inputs recursively | |
1925 // on the fly | |
1926 void compute_invariance(Node* n) { | |
1927 assert(_visited.test(n->_idx), "must be"); | |
1928 visit(n, n); | |
1929 while (_stack.is_nonempty()) { | |
1930 Node* n = _stack.node(); | |
1931 uint idx = _stack.index(); | |
1932 if (idx == n->req()) { // all inputs are processed | |
1933 _stack.pop(); | |
1934 // n is invariant if it's inputs are all invariant | |
1935 bool all_inputs_invariant = true; | |
1936 for (uint i = 0; i < n->req(); i++) { | |
1937 Node* in = n->in(i); | |
1938 if (in == NULL) continue; | |
1939 assert(_visited.test(in->_idx), "must have visited input"); | |
1940 if (!_invariant.test(in->_idx)) { // bad guy | |
1941 all_inputs_invariant = false; | |
1942 break; | |
1943 } | |
1944 } | |
1945 if (all_inputs_invariant) { | |
1946 _invariant.set(n->_idx); // I am a invariant too | |
1947 } | |
1948 } else { // process next input | |
1949 _stack.set_index(idx + 1); | |
1950 Node* m = n->in(idx); | |
1951 if (m != NULL && !_visited.test_set(m->_idx)) { | |
1952 visit(n, m); | |
1953 } | |
1954 } | |
1955 } | |
1956 } | |
1957 | |
1958 // Helper function to set up _old_new map for clone_nodes. | |
1959 // If n is a known invariant, set up directly ("clone" of n == n). | |
1960 // Otherwise, push n onto the stack for real cloning. | |
1961 void clone_visit(Node* n) { | |
1962 assert(_invariant.test(n->_idx), "must be invariant"); | |
1963 if (_lpt->is_invariant(n)) { // known invariant | |
1964 _old_new.map(n->_idx, n); | |
1965 } else{ // to be cloned | |
1966 assert (!n->is_CFG(), "should not see CFG here"); | |
1967 _stack.push(n, n->in(0) == NULL ? 1 : 0); | |
1968 } | |
1969 } | |
1970 | |
1971 // Clone "n" and (possibly) all its inputs recursively | |
1972 void clone_nodes(Node* n, Node* ctrl) { | |
1973 clone_visit(n); | |
1974 while (_stack.is_nonempty()) { | |
1975 Node* n = _stack.node(); | |
1976 uint idx = _stack.index(); | |
1977 if (idx == n->req()) { // all inputs processed, clone n! | |
1978 _stack.pop(); | |
1979 // clone invariant node | |
1980 Node* n_cl = n->clone(); | |
1981 _old_new.map(n->_idx, n_cl); | |
1982 _phase->register_new_node(n_cl, ctrl); | |
1983 for (uint i = 0; i < n->req(); i++) { | |
1984 Node* in = n_cl->in(i); | |
1985 if (in == NULL) continue; | |
1986 n_cl->set_req(i, _old_new[in->_idx]); | |
1987 } | |
1988 } else { // process next input | |
1989 _stack.set_index(idx + 1); | |
1990 Node* m = n->in(idx); | |
1991 if (m != NULL && !_clone_visited.test_set(m->_idx)) { | |
1992 clone_visit(m); // visit the input | |
1993 } | |
1994 } | |
1995 } | |
1996 } | |
1997 | |
1998 public: | |
1999 Invariance(Arena* area, IdealLoopTree* lpt) : | |
2000 _lpt(lpt), _phase(lpt->_phase), | |
2001 _visited(area), _invariant(area), _stack(area, 10 /* guess */), | |
2002 _clone_visited(area), _old_new(area) | |
2003 {} | |
2004 | |
2005 // Map old to n for invariance computation and clone | |
2006 void map_ctrl(Node* old, Node* n) { | |
2007 assert(old->is_CFG() && n->is_CFG(), "must be"); | |
2008 _old_new.map(old->_idx, n); // "clone" of old is n | |
2009 _invariant.set(old->_idx); // old is invariant | |
2010 _clone_visited.set(old->_idx); | |
2011 } | |
2012 | |
2013 // Driver function to compute invariance | |
2014 bool is_invariant(Node* n) { | |
2015 if (!_visited.test_set(n->_idx)) | |
2016 compute_invariance(n); | |
2017 return (_invariant.test(n->_idx) != 0); | |
2018 } | |
2019 | |
2020 // Driver function to clone invariant | |
2021 Node* clone(Node* n, Node* ctrl) { | |
2022 assert(ctrl->is_CFG(), "must be"); | |
2023 assert(_invariant.test(n->_idx), "must be an invariant"); | |
2024 if (!_clone_visited.test(n->_idx)) | |
2025 clone_nodes(n, ctrl); | |
2026 return _old_new[n->_idx]; | |
2027 } | |
2028 }; | |
2029 | |
2030 //------------------------------is_range_check_if ----------------------------------- | |
2031 // Returns true if the predicate of iff is in "scale*iv + offset u< load_range(ptr)" format | |
2032 // Note: this function is particularly designed for loop predication. We require load_range | |
2033 // and offset to be loop invariant computed on the fly by "invar" | |
2034 bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const { | |
2035 if (!is_loop_exit(iff)) { | |
2036 return false; | |
2037 } | |
2038 if (!iff->in(1)->is_Bool()) { | |
2039 return false; | |
2040 } | |
2041 const BoolNode *bol = iff->in(1)->as_Bool(); | |
2042 if (bol->_test._test != BoolTest::lt) { | |
2043 return false; | |
2044 } | |
2045 if (!bol->in(1)->is_Cmp()) { | |
2046 return false; | |
2047 } | |
2048 const CmpNode *cmp = bol->in(1)->as_Cmp(); | |
2049 if (cmp->Opcode() != Op_CmpU ) { | |
2050 return false; | |
2051 } | |
2052 if (cmp->in(2)->Opcode() != Op_LoadRange) { | |
2053 return false; | |
2054 } | |
2055 LoadRangeNode* lr = (LoadRangeNode*)cmp->in(2); | |
2056 if (!invar.is_invariant(lr)) { // loadRange must be invariant | |
2057 return false; | |
2058 } | |
2059 Node *iv = _head->as_CountedLoop()->phi(); | |
2060 int scale = 0; | |
2061 Node *offset = NULL; | |
2062 if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset)) { | |
2063 return false; | |
2064 } | |
2065 if(offset && !invar.is_invariant(offset)) { // offset must be invariant | |
2066 return false; | |
2067 } | |
2068 return true; | |
2069 } | |
2070 | |
2071 //------------------------------rc_predicate----------------------------------- | |
2072 // Create a range check predicate | |
2073 // | |
2074 // for (i = init; i < limit; i += stride) { | |
2075 // a[scale*i+offset] | |
2076 // } | |
2077 // | |
2078 // Compute max(scale*i + offset) for init <= i < limit and build the predicate | |
2079 // as "max(scale*i + offset) u< a.length". | |
2080 // | |
2081 // There are two cases for max(scale*i + offset): | |
2082 // (1) stride*scale > 0 | |
2083 // max(scale*i + offset) = scale*(limit-stride) + offset | |
2084 // (2) stride*scale < 0 | |
2085 // max(scale*i + offset) = scale*init + offset | |
2086 BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl, | |
2087 int scale, Node* offset, | |
2088 Node* init, Node* limit, Node* stride, | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2089 Node* range, bool upper) { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2090 DEBUG_ONLY(ttyLocker ttyl); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2091 if (TraceLoopPredicate) tty->print("rc_predicate "); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2092 |
1172 | 2093 Node* max_idx_expr = init; |
2094 int stride_con = stride->get_int(); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2095 if ((stride_con > 0) == (scale > 0) == upper) { |
1172 | 2096 max_idx_expr = new (C, 3) SubINode(limit, stride); |
2097 register_new_node(max_idx_expr, ctrl); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2098 if (TraceLoopPredicate) tty->print("(limit - stride) "); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2099 } else { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2100 if (TraceLoopPredicate) tty->print("init "); |
1172 | 2101 } |
2102 | |
2103 if (scale != 1) { | |
2104 ConNode* con_scale = _igvn.intcon(scale); | |
2105 max_idx_expr = new (C, 3) MulINode(max_idx_expr, con_scale); | |
2106 register_new_node(max_idx_expr, ctrl); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2107 if (TraceLoopPredicate) tty->print("* %d ", scale); |
1172 | 2108 } |
2109 | |
2110 if (offset && (!offset->is_Con() || offset->get_int() != 0)){ | |
2111 max_idx_expr = new (C, 3) AddINode(max_idx_expr, offset); | |
2112 register_new_node(max_idx_expr, ctrl); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2113 if (TraceLoopPredicate) |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2114 if (offset->is_Con()) tty->print("+ %d ", offset->get_int()); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2115 else tty->print("+ offset "); |
1172 | 2116 } |
2117 | |
2118 CmpUNode* cmp = new (C, 3) CmpUNode(max_idx_expr, range); | |
2119 register_new_node(cmp, ctrl); | |
2120 BoolNode* bol = new (C, 2) BoolNode(cmp, BoolTest::lt); | |
2121 register_new_node(bol, ctrl); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2122 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2123 if (TraceLoopPredicate) tty->print_cr("<u range"); |
1172 | 2124 return bol; |
2125 } | |
2126 | |
2127 //------------------------------ loop_predication_impl-------------------------- | |
2128 // Insert loop predicates for null checks and range checks | |
2129 bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) { | |
2130 if (!UseLoopPredicate) return false; | |
2131 | |
1275 | 2132 if (!loop->_head->is_Loop()) { |
2133 // Could be a simple region when irreducible loops are present. | |
2134 return false; | |
2135 } | |
2136 | |
2137 CountedLoopNode *cl = NULL; | |
2138 if (loop->_head->is_CountedLoop()) { | |
2139 cl = loop->_head->as_CountedLoop(); | |
2140 // do nothing for iteration-splitted loops | |
2141 if (!cl->is_normal_loop()) return false; | |
2142 } | |
2143 | |
1172 | 2144 // Too many traps seen? |
2145 bool tmt = C->too_many_traps(C->method(), 0, Deoptimization::Reason_predicate); | |
2146 int tc = C->trap_count(Deoptimization::Reason_predicate); | |
2147 if (tmt || tc > 0) { | |
2148 if (TraceLoopPredicate) { | |
2149 tty->print_cr("too many predicate traps: %d", tc); | |
2150 C->method()->print(); // which method has too many predicate traps | |
2151 tty->print_cr(""); | |
2152 } | |
2153 return false; | |
2154 } | |
2155 | |
2156 LoopNode *lpn = loop->_head->as_Loop(); | |
2157 Node* entry = lpn->in(LoopNode::EntryControl); | |
2158 | |
2159 ProjNode *predicate_proj = find_predicate_insertion_point(entry); | |
2160 if (!predicate_proj){ | |
2161 #ifndef PRODUCT | |
2162 if (TraceLoopPredicate) { | |
2163 tty->print("missing predicate:"); | |
2164 loop->dump_head(); | |
2165 } | |
2166 #endif | |
2167 return false; | |
2168 } | |
2169 | |
2170 ConNode* zero = _igvn.intcon(0); | |
2171 set_ctrl(zero, C->root()); | |
2172 Node *cond_false = new (C, 2) Conv2BNode(zero); | |
2173 register_new_node(cond_false, C->root()); | |
2174 ConNode* one = _igvn.intcon(1); | |
2175 set_ctrl(one, C->root()); | |
2176 Node *cond_true = new (C, 2) Conv2BNode(one); | |
2177 register_new_node(cond_true, C->root()); | |
2178 | |
2179 ResourceArea *area = Thread::current()->resource_area(); | |
2180 Invariance invar(area, loop); | |
2181 | |
2182 // Create list of if-projs such that a newer proj dominates all older | |
2183 // projs in the list, and they all dominate loop->tail() | |
2184 Node_List if_proj_list(area); | |
2185 LoopNode *head = loop->_head->as_Loop(); | |
2186 Node *current_proj = loop->tail(); //start from tail | |
2187 while ( current_proj != head ) { | |
2188 if (loop == get_loop(current_proj) && // still in the loop ? | |
2189 current_proj->is_Proj() && // is a projection ? | |
2190 current_proj->in(0)->Opcode() == Op_If) { // is a if projection ? | |
2191 if_proj_list.push(current_proj); | |
2192 } | |
2193 current_proj = idom(current_proj); | |
2194 } | |
2195 | |
2196 bool hoisted = false; // true if at least one proj is promoted | |
2197 while (if_proj_list.size() > 0) { | |
2198 // Following are changed to nonnull when a predicate can be hoisted | |
2199 ProjNode* new_predicate_proj = NULL; | |
2200 | |
2201 ProjNode* proj = if_proj_list.pop()->as_Proj(); | |
2202 IfNode* iff = proj->in(0)->as_If(); | |
2203 | |
2204 if (!is_uncommon_trap_if_pattern(proj)) { | |
2205 if (loop->is_loop_exit(iff)) { | |
2206 // stop processing the remaining projs in the list because the execution of them | |
2207 // depends on the condition of "iff" (iff->in(1)). | |
2208 break; | |
2209 } else { | |
2210 // Both arms are inside the loop. There are two cases: | |
2211 // (1) there is one backward branch. In this case, any remaining proj | |
2212 // in the if_proj list post-dominates "iff". So, the condition of "iff" | |
2213 // does not determine the execution the remining projs directly, and we | |
2214 // can safely continue. | |
2215 // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj" | |
2216 // does not dominate loop->tail(), so it can not be in the if_proj list. | |
2217 continue; | |
2218 } | |
2219 } | |
2220 | |
2221 Node* test = iff->in(1); | |
2222 if (!test->is_Bool()){ //Conv2B, ... | |
2223 continue; | |
2224 } | |
2225 BoolNode* bol = test->as_Bool(); | |
2226 if (invar.is_invariant(bol)) { | |
2227 // Invariant test | |
2228 new_predicate_proj = create_new_if_for_predicate(predicate_proj); | |
2229 Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2230 BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2231 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2232 // Negate test if necessary |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2233 bool negated = false; |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2234 if (proj->_con != predicate_proj->_con) { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2235 new_predicate_bol = new (C, 2) BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate()); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2236 register_new_node(new_predicate_bol, ctrl); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2237 negated = true; |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2238 } |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2239 IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2240 _igvn.hash_delete(new_predicate_iff); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2241 new_predicate_iff->set_req(1, new_predicate_bol); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2242 if (TraceLoopPredicate) tty->print_cr("invariant if%s: %d", negated ? " negated" : "", new_predicate_iff->_idx); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2243 |
1172 | 2244 } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) { |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2245 assert(proj->_con == predicate_proj->_con, "must match"); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2246 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2247 // Range check for counted loops |
1172 | 2248 const Node* cmp = bol->in(1)->as_Cmp(); |
2249 Node* idx = cmp->in(1); | |
2250 assert(!invar.is_invariant(idx), "index is variant"); | |
2251 assert(cmp->in(2)->Opcode() == Op_LoadRange, "must be"); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2252 Node* ld_rng = cmp->in(2); // LoadRangeNode |
1172 | 2253 assert(invar.is_invariant(ld_rng), "load range must be invariant"); |
2254 int scale = 1; | |
2255 Node* offset = zero; | |
2256 bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset); | |
2257 assert(ok, "must be index expression"); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2258 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2259 Node* init = cl->init_trip(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2260 Node* limit = cl->limit(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2261 Node* stride = cl->stride(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2262 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2263 // Build if's for the upper and lower bound tests. The |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2264 // lower_bound test will dominate the upper bound test and all |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2265 // cloned or created nodes will use the lower bound test as |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2266 // their declared control. |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2267 ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2268 ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2269 assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate"); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2270 Node *ctrl = lower_bound_proj->in(0)->as_If()->in(0); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2271 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2272 // Perform cloning to keep Invariance state correct since the |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2273 // late schedule will place invariant things in the loop. |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2274 ld_rng = invar.clone(ld_rng, ctrl); |
1172 | 2275 if (offset && offset != zero) { |
2276 assert(invar.is_invariant(offset), "offset must be loop invariant"); | |
2277 offset = invar.clone(offset, ctrl); | |
2278 } | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2279 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2280 // Test the lower bound |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2281 Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng, false); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2282 IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2283 _igvn.hash_delete(lower_bound_iff); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2284 lower_bound_iff->set_req(1, lower_bound_bol); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2285 if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx); |
1172 | 2286 |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2287 // Test the upper bound |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2288 Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng, true); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2289 IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2290 _igvn.hash_delete(upper_bound_iff); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2291 upper_bound_iff->set_req(1, upper_bound_bol); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2292 if (TraceLoopPredicate) tty->print_cr("upper bound check if: %d", lower_bound_iff->_idx); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2293 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2294 // Fall through into rest of the clean up code which will move |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2295 // any dependent nodes onto the upper bound test. |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2296 new_predicate_proj = upper_bound_proj; |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2297 } else { |
1172 | 2298 // The other proj of the "iff" is a uncommon trap projection, and we can assume |
2299 // the other proj will not be executed ("executed" means uct raised). | |
2300 continue; | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2301 } |
1172 | 2302 |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2303 // Success - attach condition (new_predicate_bol) to predicate if |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2304 invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate |
1172 | 2305 |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2306 // Eliminate the old if in the loop body |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2307 _igvn.hash_delete(iff); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2308 iff->set_req(1, proj->is_IfFalse() ? cond_false : cond_true); |
1172 | 2309 |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2310 Node* ctrl = new_predicate_proj; // new control |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2311 ProjNode* dp = proj; // old control |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2312 assert(get_loop(dp) == loop, "guaranteed at the time of collecting proj"); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2313 // Find nodes (depends only on the test) off the surviving projection; |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2314 // move them outside the loop with the control of proj_clone |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2315 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2316 Node* cd = dp->fast_out(i); // Control-dependent node |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2317 if (cd->depends_only_on_test()) { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2318 assert(cd->in(0) == dp, ""); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2319 _igvn.hash_delete(cd); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2320 cd->set_req(0, ctrl); // ctrl, not NULL |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2321 set_early_ctrl(cd); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2322 _igvn._worklist.push(cd); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2323 IdealLoopTree *new_loop = get_loop(get_ctrl(cd)); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2324 if (new_loop != loop) { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2325 if (!loop->_child) loop->_body.yank(cd); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2326 if (!new_loop->_child ) new_loop->_body.push(cd); |
1172 | 2327 } |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2328 --i; |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2329 --imax; |
1172 | 2330 } |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2331 } |
1172 | 2332 |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2333 hoisted = true; |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2334 C->set_major_progress(); |
1172 | 2335 } // end while |
2336 | |
2337 #ifndef PRODUCT | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2338 // report that the loop predication has been actually performed |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2339 // for this loop |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2340 if (TraceLoopPredicate && hoisted) { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2341 tty->print("Loop Predication Performed:"); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2342 loop->dump_head(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2343 } |
1172 | 2344 #endif |
2345 | |
2346 return hoisted; | |
2347 } | |
2348 | |
2349 //------------------------------loop_predication-------------------------------- | |
2350 // driver routine for loop predication optimization | |
2351 bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) { | |
2352 bool hoisted = false; | |
2353 // Recursively promote predicates | |
2354 if ( _child ) { | |
2355 hoisted = _child->loop_predication( phase); | |
2356 } | |
2357 | |
2358 // self | |
2359 if (!_irreducible && !tail()->is_top()) { | |
2360 hoisted |= phase->loop_predication_impl(this); | |
2361 } | |
2362 | |
2363 if ( _next ) { //sibling | |
2364 hoisted |= _next->loop_predication( phase); | |
2365 } | |
2366 | |
2367 return hoisted; | |
2368 } |