Mercurial > hg > graal-compiler
annotate src/share/vm/opto/loopTransform.cpp @ 1941:79d04223b8a5
Added caching for resolved types and resolved fields.
This is crucial, because the local load elimination will lead to wrong results, if field equality (of two RiField objects with the same object and the same RiType) is not given. The caching makes sure that the default equals implementation is sufficient.
author | Thomas Wuerthinger <wuerthinger@ssw.jku.at> |
---|---|
date | Tue, 28 Dec 2010 18:33:26 +0100 |
parents | 75588558f1bf |
children | f95d63e2154a |
rev | line source |
---|---|
0 | 1 /* |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
2 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
25 #include "incls/_precompiled.incl" | |
26 #include "incls/_loopTransform.cpp.incl" | |
27 | |
28 //------------------------------is_loop_exit----------------------------------- | |
29 // Given an IfNode, return the loop-exiting projection or NULL if both | |
30 // arms remain in the loop. | |
31 Node *IdealLoopTree::is_loop_exit(Node *iff) const { | |
32 if( iff->outcnt() != 2 ) return NULL; // Ignore partially dead tests | |
33 PhaseIdealLoop *phase = _phase; | |
34 // Test is an IfNode, has 2 projections. If BOTH are in the loop | |
35 // we need loop unswitching instead of peeling. | |
36 if( !is_member(phase->get_loop( iff->raw_out(0) )) ) | |
37 return iff->raw_out(0); | |
38 if( !is_member(phase->get_loop( iff->raw_out(1) )) ) | |
39 return iff->raw_out(1); | |
40 return NULL; | |
41 } | |
42 | |
43 | |
44 //============================================================================= | |
45 | |
46 | |
47 //------------------------------record_for_igvn---------------------------- | |
48 // Put loop body on igvn work list | |
49 void IdealLoopTree::record_for_igvn() { | |
50 for( uint i = 0; i < _body.size(); i++ ) { | |
51 Node *n = _body.at(i); | |
52 _phase->_igvn._worklist.push(n); | |
53 } | |
54 } | |
55 | |
56 //------------------------------compute_profile_trip_cnt---------------------------- | |
57 // Compute loop trip count from profile data as | |
58 // (backedge_count + loop_exit_count) / loop_exit_count | |
59 void IdealLoopTree::compute_profile_trip_cnt( PhaseIdealLoop *phase ) { | |
60 if (!_head->is_CountedLoop()) { | |
61 return; | |
62 } | |
63 CountedLoopNode* head = _head->as_CountedLoop(); | |
64 if (head->profile_trip_cnt() != COUNT_UNKNOWN) { | |
65 return; // Already computed | |
66 } | |
67 float trip_cnt = (float)max_jint; // default is big | |
68 | |
69 Node* back = head->in(LoopNode::LoopBackControl); | |
70 while (back != head) { | |
71 if ((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) && | |
72 back->in(0) && | |
73 back->in(0)->is_If() && | |
74 back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN && | |
75 back->in(0)->as_If()->_prob != PROB_UNKNOWN) { | |
76 break; | |
77 } | |
78 back = phase->idom(back); | |
79 } | |
80 if (back != head) { | |
81 assert((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) && | |
82 back->in(0), "if-projection exists"); | |
83 IfNode* back_if = back->in(0)->as_If(); | |
84 float loop_back_cnt = back_if->_fcnt * back_if->_prob; | |
85 | |
86 // Now compute a loop exit count | |
87 float loop_exit_cnt = 0.0f; | |
88 for( uint i = 0; i < _body.size(); i++ ) { | |
89 Node *n = _body[i]; | |
90 if( n->is_If() ) { | |
91 IfNode *iff = n->as_If(); | |
92 if( iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN ) { | |
93 Node *exit = is_loop_exit(iff); | |
94 if( exit ) { | |
95 float exit_prob = iff->_prob; | |
96 if (exit->Opcode() == Op_IfFalse) exit_prob = 1.0 - exit_prob; | |
97 if (exit_prob > PROB_MIN) { | |
98 float exit_cnt = iff->_fcnt * exit_prob; | |
99 loop_exit_cnt += exit_cnt; | |
100 } | |
101 } | |
102 } | |
103 } | |
104 } | |
105 if (loop_exit_cnt > 0.0f) { | |
106 trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt; | |
107 } else { | |
108 // No exit count so use | |
109 trip_cnt = loop_back_cnt; | |
110 } | |
111 } | |
112 #ifndef PRODUCT | |
113 if (TraceProfileTripCount) { | |
114 tty->print_cr("compute_profile_trip_cnt lp: %d cnt: %f\n", head->_idx, trip_cnt); | |
115 } | |
116 #endif | |
117 head->set_profile_trip_cnt(trip_cnt); | |
118 } | |
119 | |
120 //---------------------is_invariant_addition----------------------------- | |
121 // Return nonzero index of invariant operand for an Add or Sub | |
605 | 122 // of (nonconstant) invariant and variant values. Helper for reassociate_invariants. |
0 | 123 int IdealLoopTree::is_invariant_addition(Node* n, PhaseIdealLoop *phase) { |
124 int op = n->Opcode(); | |
125 if (op == Op_AddI || op == Op_SubI) { | |
126 bool in1_invar = this->is_invariant(n->in(1)); | |
127 bool in2_invar = this->is_invariant(n->in(2)); | |
128 if (in1_invar && !in2_invar) return 1; | |
129 if (!in1_invar && in2_invar) return 2; | |
130 } | |
131 return 0; | |
132 } | |
133 | |
134 //---------------------reassociate_add_sub----------------------------- | |
135 // Reassociate invariant add and subtract expressions: | |
136 // | |
137 // inv1 + (x + inv2) => ( inv1 + inv2) + x | |
138 // (x + inv2) + inv1 => ( inv1 + inv2) + x | |
139 // inv1 + (x - inv2) => ( inv1 - inv2) + x | |
140 // inv1 - (inv2 - x) => ( inv1 - inv2) + x | |
141 // (x + inv2) - inv1 => (-inv1 + inv2) + x | |
142 // (x - inv2) + inv1 => ( inv1 - inv2) + x | |
143 // (x - inv2) - inv1 => (-inv1 - inv2) + x | |
144 // inv1 + (inv2 - x) => ( inv1 + inv2) - x | |
145 // inv1 - (x - inv2) => ( inv1 + inv2) - x | |
146 // (inv2 - x) + inv1 => ( inv1 + inv2) - x | |
147 // (inv2 - x) - inv1 => (-inv1 + inv2) - x | |
148 // inv1 - (x + inv2) => ( inv1 - inv2) - x | |
149 // | |
150 Node* IdealLoopTree::reassociate_add_sub(Node* n1, PhaseIdealLoop *phase) { | |
151 if (!n1->is_Add() && !n1->is_Sub() || n1->outcnt() == 0) return NULL; | |
152 if (is_invariant(n1)) return NULL; | |
153 int inv1_idx = is_invariant_addition(n1, phase); | |
154 if (!inv1_idx) return NULL; | |
155 // Don't mess with add of constant (igvn moves them to expression tree root.) | |
156 if (n1->is_Add() && n1->in(2)->is_Con()) return NULL; | |
157 Node* inv1 = n1->in(inv1_idx); | |
158 Node* n2 = n1->in(3 - inv1_idx); | |
159 int inv2_idx = is_invariant_addition(n2, phase); | |
160 if (!inv2_idx) return NULL; | |
161 Node* x = n2->in(3 - inv2_idx); | |
162 Node* inv2 = n2->in(inv2_idx); | |
163 | |
164 bool neg_x = n2->is_Sub() && inv2_idx == 1; | |
165 bool neg_inv2 = n2->is_Sub() && inv2_idx == 2; | |
166 bool neg_inv1 = n1->is_Sub() && inv1_idx == 2; | |
167 if (n1->is_Sub() && inv1_idx == 1) { | |
168 neg_x = !neg_x; | |
169 neg_inv2 = !neg_inv2; | |
170 } | |
171 Node* inv1_c = phase->get_ctrl(inv1); | |
172 Node* inv2_c = phase->get_ctrl(inv2); | |
173 Node* n_inv1; | |
174 if (neg_inv1) { | |
175 Node *zero = phase->_igvn.intcon(0); | |
176 phase->set_ctrl(zero, phase->C->root()); | |
177 n_inv1 = new (phase->C, 3) SubINode(zero, inv1); | |
178 phase->register_new_node(n_inv1, inv1_c); | |
179 } else { | |
180 n_inv1 = inv1; | |
181 } | |
182 Node* inv; | |
183 if (neg_inv2) { | |
184 inv = new (phase->C, 3) SubINode(n_inv1, inv2); | |
185 } else { | |
186 inv = new (phase->C, 3) AddINode(n_inv1, inv2); | |
187 } | |
188 phase->register_new_node(inv, phase->get_early_ctrl(inv)); | |
189 | |
190 Node* addx; | |
191 if (neg_x) { | |
192 addx = new (phase->C, 3) SubINode(inv, x); | |
193 } else { | |
194 addx = new (phase->C, 3) AddINode(x, inv); | |
195 } | |
196 phase->register_new_node(addx, phase->get_ctrl(x)); | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
197 phase->_igvn.replace_node(n1, addx); |
0 | 198 return addx; |
199 } | |
200 | |
201 //---------------------reassociate_invariants----------------------------- | |
202 // Reassociate invariant expressions: | |
203 void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) { | |
204 for (int i = _body.size() - 1; i >= 0; i--) { | |
205 Node *n = _body.at(i); | |
206 for (int j = 0; j < 5; j++) { | |
207 Node* nn = reassociate_add_sub(n, phase); | |
208 if (nn == NULL) break; | |
209 n = nn; // again | |
210 }; | |
211 } | |
212 } | |
213 | |
214 //------------------------------policy_peeling--------------------------------- | |
215 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can | |
216 // make some loop-invariant test (usually a null-check) happen before the loop. | |
217 bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const { | |
218 Node *test = ((IdealLoopTree*)this)->tail(); | |
219 int body_size = ((IdealLoopTree*)this)->_body.size(); | |
220 int uniq = phase->C->unique(); | |
221 // Peeling does loop cloning which can result in O(N^2) node construction | |
222 if( body_size > 255 /* Prevent overflow for large body_size */ | |
223 || (body_size * body_size + uniq > MaxNodeLimit) ) { | |
224 return false; // too large to safely clone | |
225 } | |
226 while( test != _head ) { // Scan till run off top of loop | |
227 if( test->is_If() ) { // Test? | |
228 Node *ctrl = phase->get_ctrl(test->in(1)); | |
229 if (ctrl->is_top()) | |
230 return false; // Found dead test on live IF? No peeling! | |
231 // Standard IF only has one input value to check for loop invariance | |
232 assert( test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added"); | |
233 // Condition is not a member of this loop? | |
234 if( !is_member(phase->get_loop(ctrl)) && | |
235 is_loop_exit(test) ) | |
236 return true; // Found reason to peel! | |
237 } | |
238 // Walk up dominators to loop _head looking for test which is | |
239 // executed on every path thru loop. | |
240 test = phase->idom(test); | |
241 } | |
242 return false; | |
243 } | |
244 | |
245 //------------------------------peeled_dom_test_elim--------------------------- | |
246 // If we got the effect of peeling, either by actually peeling or by making | |
247 // a pre-loop which must execute at least once, we can remove all | |
248 // loop-invariant dominated tests in the main body. | |
249 void PhaseIdealLoop::peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new ) { | |
250 bool progress = true; | |
251 while( progress ) { | |
252 progress = false; // Reset for next iteration | |
253 Node *prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail(); | |
254 Node *test = prev->in(0); | |
255 while( test != loop->_head ) { // Scan till run off top of loop | |
256 | |
257 int p_op = prev->Opcode(); | |
258 if( (p_op == Op_IfFalse || p_op == Op_IfTrue) && | |
259 test->is_If() && // Test? | |
260 !test->in(1)->is_Con() && // And not already obvious? | |
261 // Condition is not a member of this loop? | |
262 !loop->is_member(get_loop(get_ctrl(test->in(1))))){ | |
263 // Walk loop body looking for instances of this test | |
264 for( uint i = 0; i < loop->_body.size(); i++ ) { | |
265 Node *n = loop->_body.at(i); | |
266 if( n->is_If() && n->in(1) == test->in(1) /*&& n != loop->tail()->in(0)*/ ) { | |
267 // IfNode was dominated by version in peeled loop body | |
268 progress = true; | |
269 dominated_by( old_new[prev->_idx], n ); | |
270 } | |
271 } | |
272 } | |
273 prev = test; | |
274 test = idom(test); | |
275 } // End of scan tests in loop | |
276 | |
277 } // End of while( progress ) | |
278 } | |
279 | |
280 //------------------------------do_peeling------------------------------------- | |
281 // Peel the first iteration of the given loop. | |
282 // Step 1: Clone the loop body. The clone becomes the peeled iteration. | |
283 // The pre-loop illegally has 2 control users (old & new loops). | |
284 // Step 2: Make the old-loop fall-in edges point to the peeled iteration. | |
285 // Do this by making the old-loop fall-in edges act as if they came | |
286 // around the loopback from the prior iteration (follow the old-loop | |
287 // backedges) and then map to the new peeled iteration. This leaves | |
288 // the pre-loop with only 1 user (the new peeled iteration), but the | |
289 // peeled-loop backedge has 2 users. | |
290 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the | |
291 // extra backedge user. | |
292 void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) { | |
293 | |
294 C->set_major_progress(); | |
295 // Peeling a 'main' loop in a pre/main/post situation obfuscates the | |
296 // 'pre' loop from the main and the 'pre' can no longer have it's | |
297 // iterations adjusted. Therefore, we need to declare this loop as | |
298 // no longer a 'main' loop; it will need new pre and post loops before | |
299 // we can do further RCE. | |
300 Node *h = loop->_head; | |
301 if( h->is_CountedLoop() ) { | |
302 CountedLoopNode *cl = h->as_CountedLoop(); | |
303 assert(cl->trip_count() > 0, "peeling a fully unrolled loop"); | |
304 cl->set_trip_count(cl->trip_count() - 1); | |
305 if( cl->is_main_loop() ) { | |
306 cl->set_normal_loop(); | |
307 #ifndef PRODUCT | |
308 if( PrintOpto && VerifyLoopOptimizations ) { | |
309 tty->print("Peeling a 'main' loop; resetting to 'normal' "); | |
310 loop->dump_head(); | |
311 } | |
312 #endif | |
313 } | |
314 } | |
315 | |
316 // Step 1: Clone the loop body. The clone becomes the peeled iteration. | |
317 // The pre-loop illegally has 2 control users (old & new loops). | |
318 clone_loop( loop, old_new, dom_depth(loop->_head) ); | |
319 | |
320 | |
321 // Step 2: Make the old-loop fall-in edges point to the peeled iteration. | |
322 // Do this by making the old-loop fall-in edges act as if they came | |
323 // around the loopback from the prior iteration (follow the old-loop | |
324 // backedges) and then map to the new peeled iteration. This leaves | |
325 // the pre-loop with only 1 user (the new peeled iteration), but the | |
326 // peeled-loop backedge has 2 users. | |
327 for (DUIterator_Fast jmax, j = loop->_head->fast_outs(jmax); j < jmax; j++) { | |
328 Node* old = loop->_head->fast_out(j); | |
329 if( old->in(0) == loop->_head && old->req() == 3 && | |
330 (old->is_Loop() || old->is_Phi()) ) { | |
331 Node *new_exit_value = old_new[old->in(LoopNode::LoopBackControl)->_idx]; | |
332 if( !new_exit_value ) // Backedge value is ALSO loop invariant? | |
333 // Then loop body backedge value remains the same. | |
334 new_exit_value = old->in(LoopNode::LoopBackControl); | |
335 _igvn.hash_delete(old); | |
336 old->set_req(LoopNode::EntryControl, new_exit_value); | |
337 } | |
338 } | |
339 | |
340 | |
341 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the | |
342 // extra backedge user. | |
343 Node *nnn = old_new[loop->_head->_idx]; | |
344 _igvn.hash_delete(nnn); | |
345 nnn->set_req(LoopNode::LoopBackControl, C->top()); | |
346 for (DUIterator_Fast j2max, j2 = nnn->fast_outs(j2max); j2 < j2max; j2++) { | |
347 Node* use = nnn->fast_out(j2); | |
348 if( use->in(0) == nnn && use->req() == 3 && use->is_Phi() ) { | |
349 _igvn.hash_delete(use); | |
350 use->set_req(LoopNode::LoopBackControl, C->top()); | |
351 } | |
352 } | |
353 | |
354 | |
355 // Step 4: Correct dom-depth info. Set to loop-head depth. | |
356 int dd = dom_depth(loop->_head); | |
357 set_idom(loop->_head, loop->_head->in(1), dd); | |
358 for (uint j3 = 0; j3 < loop->_body.size(); j3++) { | |
359 Node *old = loop->_body.at(j3); | |
360 Node *nnn = old_new[old->_idx]; | |
361 if (!has_ctrl(nnn)) | |
362 set_idom(nnn, idom(nnn), dd-1); | |
363 // While we're at it, remove any SafePoints from the peeled code | |
364 if( old->Opcode() == Op_SafePoint ) { | |
365 Node *nnn = old_new[old->_idx]; | |
366 lazy_replace(nnn,nnn->in(TypeFunc::Control)); | |
367 } | |
368 } | |
369 | |
370 // Now force out all loop-invariant dominating tests. The optimizer | |
371 // finds some, but we _know_ they are all useless. | |
372 peeled_dom_test_elim(loop,old_new); | |
373 | |
374 loop->record_for_igvn(); | |
375 } | |
376 | |
377 //------------------------------policy_maximally_unroll------------------------ | |
378 // Return exact loop trip count, or 0 if not maximally unrolling | |
379 bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const { | |
380 CountedLoopNode *cl = _head->as_CountedLoop(); | |
381 assert( cl->is_normal_loop(), "" ); | |
382 | |
383 Node *init_n = cl->init_trip(); | |
384 Node *limit_n = cl->limit(); | |
385 | |
386 // Non-constant bounds | |
387 if( init_n == NULL || !init_n->is_Con() || | |
388 limit_n == NULL || !limit_n->is_Con() || | |
389 // protect against stride not being a constant | |
390 !cl->stride_is_con() ) { | |
391 return false; | |
392 } | |
393 int init = init_n->get_int(); | |
394 int limit = limit_n->get_int(); | |
395 int span = limit - init; | |
396 int stride = cl->stride_con(); | |
397 | |
398 if (init >= limit || stride > span) { | |
399 // return a false (no maximally unroll) and the regular unroll/peel | |
400 // route will make a small mess which CCP will fold away. | |
401 return false; | |
402 } | |
403 uint trip_count = span/stride; // trip_count can be greater than 2 Gig. | |
404 assert( (int)trip_count*stride == span, "must divide evenly" ); | |
405 | |
406 // Real policy: if we maximally unroll, does it get too big? | |
407 // Allow the unrolled mess to get larger than standard loop | |
408 // size. After all, it will no longer be a loop. | |
409 uint body_size = _body.size(); | |
410 uint unroll_limit = (uint)LoopUnrollLimit * 4; | |
411 assert( (intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits"); | |
412 cl->set_trip_count(trip_count); | |
413 if( trip_count <= unroll_limit && body_size <= unroll_limit ) { | |
414 uint new_body_size = body_size * trip_count; | |
415 if (new_body_size <= unroll_limit && | |
416 body_size == new_body_size / trip_count && | |
417 // Unrolling can result in a large amount of node construction | |
418 new_body_size < MaxNodeLimit - phase->C->unique()) { | |
419 return true; // maximally unroll | |
420 } | |
421 } | |
422 | |
423 return false; // Do not maximally unroll | |
424 } | |
425 | |
426 | |
427 //------------------------------policy_unroll---------------------------------- | |
428 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if | |
429 // the loop is a CountedLoop and the body is small enough. | |
430 bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const { | |
431 | |
432 CountedLoopNode *cl = _head->as_CountedLoop(); | |
433 assert( cl->is_normal_loop() || cl->is_main_loop(), "" ); | |
434 | |
435 // protect against stride not being a constant | |
436 if( !cl->stride_is_con() ) return false; | |
437 | |
438 // protect against over-unrolling | |
439 if( cl->trip_count() <= 1 ) return false; | |
440 | |
441 int future_unroll_ct = cl->unrolled_count() * 2; | |
442 | |
443 // Don't unroll if the next round of unrolling would push us | |
444 // over the expected trip count of the loop. One is subtracted | |
445 // from the expected trip count because the pre-loop normally | |
446 // executes 1 iteration. | |
447 if (UnrollLimitForProfileCheck > 0 && | |
448 cl->profile_trip_cnt() != COUNT_UNKNOWN && | |
449 future_unroll_ct > UnrollLimitForProfileCheck && | |
450 (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) { | |
451 return false; | |
452 } | |
453 | |
454 // When unroll count is greater than LoopUnrollMin, don't unroll if: | |
455 // the residual iterations are more than 10% of the trip count | |
456 // and rounds of "unroll,optimize" are not making significant progress | |
457 // Progress defined as current size less than 20% larger than previous size. | |
458 if (UseSuperWord && cl->node_count_before_unroll() > 0 && | |
459 future_unroll_ct > LoopUnrollMin && | |
460 (future_unroll_ct - 1) * 10.0 > cl->profile_trip_cnt() && | |
461 1.2 * cl->node_count_before_unroll() < (double)_body.size()) { | |
462 return false; | |
463 } | |
464 | |
465 Node *init_n = cl->init_trip(); | |
466 Node *limit_n = cl->limit(); | |
467 // Non-constant bounds. | |
468 // Protect against over-unrolling when init or/and limit are not constant | |
469 // (so that trip_count's init value is maxint) but iv range is known. | |
470 if( init_n == NULL || !init_n->is_Con() || | |
471 limit_n == NULL || !limit_n->is_Con() ) { | |
472 Node* phi = cl->phi(); | |
473 if( phi != NULL ) { | |
474 assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi."); | |
475 const TypeInt* iv_type = phase->_igvn.type(phi)->is_int(); | |
476 int next_stride = cl->stride_con() * 2; // stride after this unroll | |
477 if( next_stride > 0 ) { | |
478 if( iv_type->_lo + next_stride <= iv_type->_lo || // overflow | |
479 iv_type->_lo + next_stride > iv_type->_hi ) { | |
480 return false; // over-unrolling | |
481 } | |
482 } else if( next_stride < 0 ) { | |
483 if( iv_type->_hi + next_stride >= iv_type->_hi || // overflow | |
484 iv_type->_hi + next_stride < iv_type->_lo ) { | |
485 return false; // over-unrolling | |
486 } | |
487 } | |
488 } | |
489 } | |
490 | |
491 // Adjust body_size to determine if we unroll or not | |
492 uint body_size = _body.size(); | |
493 // Key test to unroll CaffeineMark's Logic test | |
494 int xors_in_loop = 0; | |
495 // Also count ModL, DivL and MulL which expand mightly | |
496 for( uint k = 0; k < _body.size(); k++ ) { | |
497 switch( _body.at(k)->Opcode() ) { | |
498 case Op_XorI: xors_in_loop++; break; // CaffeineMark's Logic test | |
499 case Op_ModL: body_size += 30; break; | |
500 case Op_DivL: body_size += 30; break; | |
501 case Op_MulL: body_size += 10; break; | |
502 } | |
503 } | |
504 | |
505 // Check for being too big | |
506 if( body_size > (uint)LoopUnrollLimit ) { | |
507 if( xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; | |
508 // Normal case: loop too big | |
509 return false; | |
510 } | |
511 | |
512 // Check for stride being a small enough constant | |
513 if( abs(cl->stride_con()) > (1<<3) ) return false; | |
514 | |
515 // Unroll once! (Each trip will soon do double iterations) | |
516 return true; | |
517 } | |
518 | |
519 //------------------------------policy_align----------------------------------- | |
520 // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the | |
521 // expression that does the alignment. Note that only one array base can be | |
605 | 522 // aligned in a loop (unless the VM guarantees mutual alignment). Note that |
0 | 523 // if we vectorize short memory ops into longer memory ops, we may want to |
524 // increase alignment. | |
525 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const { | |
526 return false; | |
527 } | |
528 | |
529 //------------------------------policy_range_check----------------------------- | |
530 // Return TRUE or FALSE if the loop should be range-check-eliminated. | |
531 // Actually we do iteration-splitting, a more powerful form of RCE. | |
532 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const { | |
533 if( !RangeCheckElimination ) return false; | |
534 | |
535 CountedLoopNode *cl = _head->as_CountedLoop(); | |
536 // If we unrolled with no intention of doing RCE and we later | |
537 // changed our minds, we got no pre-loop. Either we need to | |
538 // make a new pre-loop, or we gotta disallow RCE. | |
539 if( cl->is_main_no_pre_loop() ) return false; // Disallowed for now. | |
540 Node *trip_counter = cl->phi(); | |
541 | |
542 // Check loop body for tests of trip-counter plus loop-invariant vs | |
543 // loop-invariant. | |
544 for( uint i = 0; i < _body.size(); i++ ) { | |
545 Node *iff = _body[i]; | |
546 if( iff->Opcode() == Op_If ) { // Test? | |
547 | |
548 // Comparing trip+off vs limit | |
549 Node *bol = iff->in(1); | |
550 if( bol->req() != 2 ) continue; // dead constant test | |
1172 | 551 if (!bol->is_Bool()) { |
552 assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only"); | |
553 continue; | |
554 } | |
0 | 555 Node *cmp = bol->in(1); |
556 | |
557 Node *rc_exp = cmp->in(1); | |
558 Node *limit = cmp->in(2); | |
559 | |
560 Node *limit_c = phase->get_ctrl(limit); | |
561 if( limit_c == phase->C->top() ) | |
562 return false; // Found dead test on live IF? No RCE! | |
563 if( is_member(phase->get_loop(limit_c) ) ) { | |
564 // Compare might have operands swapped; commute them | |
565 rc_exp = cmp->in(2); | |
566 limit = cmp->in(1); | |
567 limit_c = phase->get_ctrl(limit); | |
568 if( is_member(phase->get_loop(limit_c) ) ) | |
569 continue; // Both inputs are loop varying; cannot RCE | |
570 } | |
571 | |
572 if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) { | |
573 continue; | |
574 } | |
575 // Yeah! Found a test like 'trip+off vs limit' | |
576 // Test is an IfNode, has 2 projections. If BOTH are in the loop | |
577 // we need loop unswitching instead of iteration splitting. | |
578 if( is_loop_exit(iff) ) | |
579 return true; // Found reason to split iterations | |
580 } // End of is IF | |
581 } | |
582 | |
583 return false; | |
584 } | |
585 | |
586 //------------------------------policy_peel_only------------------------------- | |
587 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful | |
588 // for unrolling loops with NO array accesses. | |
589 bool IdealLoopTree::policy_peel_only( PhaseIdealLoop *phase ) const { | |
590 | |
591 for( uint i = 0; i < _body.size(); i++ ) | |
592 if( _body[i]->is_Mem() ) | |
593 return false; | |
594 | |
595 // No memory accesses at all! | |
596 return true; | |
597 } | |
598 | |
599 //------------------------------clone_up_backedge_goo-------------------------- | |
600 // If Node n lives in the back_ctrl block and cannot float, we clone a private | |
601 // version of n in preheader_ctrl block and return that, otherwise return n. | |
602 Node *PhaseIdealLoop::clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n ) { | |
603 if( get_ctrl(n) != back_ctrl ) return n; | |
604 | |
605 Node *x = NULL; // If required, a clone of 'n' | |
606 // Check for 'n' being pinned in the backedge. | |
607 if( n->in(0) && n->in(0) == back_ctrl ) { | |
608 x = n->clone(); // Clone a copy of 'n' to preheader | |
609 x->set_req( 0, preheader_ctrl ); // Fix x's control input to preheader | |
610 } | |
611 | |
612 // Recursive fixup any other input edges into x. | |
613 // If there are no changes we can just return 'n', otherwise | |
614 // we need to clone a private copy and change it. | |
615 for( uint i = 1; i < n->req(); i++ ) { | |
616 Node *g = clone_up_backedge_goo( back_ctrl, preheader_ctrl, n->in(i) ); | |
617 if( g != n->in(i) ) { | |
618 if( !x ) | |
619 x = n->clone(); | |
620 x->set_req(i, g); | |
621 } | |
622 } | |
623 if( x ) { // x can legally float to pre-header location | |
624 register_new_node( x, preheader_ctrl ); | |
625 return x; | |
626 } else { // raise n to cover LCA of uses | |
627 set_ctrl( n, find_non_split_ctrl(back_ctrl->in(0)) ); | |
628 } | |
629 return n; | |
630 } | |
631 | |
632 //------------------------------insert_pre_post_loops-------------------------- | |
633 // Insert pre and post loops. If peel_only is set, the pre-loop can not have | |
634 // more iterations added. It acts as a 'peel' only, no lower-bound RCE, no | |
635 // alignment. Useful to unroll loops that do no array accesses. | |
636 void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) { | |
637 | |
638 C->set_major_progress(); | |
639 | |
640 // Find common pieces of the loop being guarded with pre & post loops | |
641 CountedLoopNode *main_head = loop->_head->as_CountedLoop(); | |
642 assert( main_head->is_normal_loop(), "" ); | |
643 CountedLoopEndNode *main_end = main_head->loopexit(); | |
644 assert( main_end->outcnt() == 2, "1 true, 1 false path only" ); | |
645 uint dd_main_head = dom_depth(main_head); | |
646 uint max = main_head->outcnt(); | |
647 | |
648 Node *pre_header= main_head->in(LoopNode::EntryControl); | |
649 Node *init = main_head->init_trip(); | |
650 Node *incr = main_end ->incr(); | |
651 Node *limit = main_end ->limit(); | |
652 Node *stride = main_end ->stride(); | |
653 Node *cmp = main_end ->cmp_node(); | |
654 BoolTest::mask b_test = main_end->test_trip(); | |
655 | |
656 // Need only 1 user of 'bol' because I will be hacking the loop bounds. | |
657 Node *bol = main_end->in(CountedLoopEndNode::TestValue); | |
658 if( bol->outcnt() != 1 ) { | |
659 bol = bol->clone(); | |
660 register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl)); | |
661 _igvn.hash_delete(main_end); | |
662 main_end->set_req(CountedLoopEndNode::TestValue, bol); | |
663 } | |
664 // Need only 1 user of 'cmp' because I will be hacking the loop bounds. | |
665 if( cmp->outcnt() != 1 ) { | |
666 cmp = cmp->clone(); | |
667 register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl)); | |
668 _igvn.hash_delete(bol); | |
669 bol->set_req(1, cmp); | |
670 } | |
671 | |
672 //------------------------------ | |
673 // Step A: Create Post-Loop. | |
674 Node* main_exit = main_end->proj_out(false); | |
675 assert( main_exit->Opcode() == Op_IfFalse, "" ); | |
676 int dd_main_exit = dom_depth(main_exit); | |
677 | |
678 // Step A1: Clone the loop body. The clone becomes the post-loop. The main | |
679 // loop pre-header illegally has 2 control users (old & new loops). | |
680 clone_loop( loop, old_new, dd_main_exit ); | |
681 assert( old_new[main_end ->_idx]->Opcode() == Op_CountedLoopEnd, "" ); | |
682 CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop(); | |
683 post_head->set_post_loop(main_head); | |
684 | |
400
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
685 // Reduce the post-loop trip count. |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
686 CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd(); |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
687 post_end->_prob = PROB_FAIR; |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
688 |
0 | 689 // Build the main-loop normal exit. |
690 IfFalseNode *new_main_exit = new (C, 1) IfFalseNode(main_end); | |
691 _igvn.register_new_node_with_optimizer( new_main_exit ); | |
692 set_idom(new_main_exit, main_end, dd_main_exit ); | |
693 set_loop(new_main_exit, loop->_parent); | |
694 | |
695 // Step A2: Build a zero-trip guard for the post-loop. After leaving the | |
696 // main-loop, the post-loop may not execute at all. We 'opaque' the incr | |
697 // (the main-loop trip-counter exit value) because we will be changing | |
698 // the exit value (via unrolling) so we cannot constant-fold away the zero | |
699 // trip guard until all unrolling is done. | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
113
diff
changeset
|
700 Node *zer_opaq = new (C, 2) Opaque1Node(C, incr); |
0 | 701 Node *zer_cmp = new (C, 3) CmpINode( zer_opaq, limit ); |
702 Node *zer_bol = new (C, 2) BoolNode( zer_cmp, b_test ); | |
703 register_new_node( zer_opaq, new_main_exit ); | |
704 register_new_node( zer_cmp , new_main_exit ); | |
705 register_new_node( zer_bol , new_main_exit ); | |
706 | |
707 // Build the IfNode | |
708 IfNode *zer_iff = new (C, 2) IfNode( new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN ); | |
709 _igvn.register_new_node_with_optimizer( zer_iff ); | |
710 set_idom(zer_iff, new_main_exit, dd_main_exit); | |
711 set_loop(zer_iff, loop->_parent); | |
712 | |
713 // Plug in the false-path, taken if we need to skip post-loop | |
714 _igvn.hash_delete( main_exit ); | |
715 main_exit->set_req(0, zer_iff); | |
716 _igvn._worklist.push(main_exit); | |
717 set_idom(main_exit, zer_iff, dd_main_exit); | |
718 set_idom(main_exit->unique_out(), zer_iff, dd_main_exit); | |
719 // Make the true-path, must enter the post loop | |
720 Node *zer_taken = new (C, 1) IfTrueNode( zer_iff ); | |
721 _igvn.register_new_node_with_optimizer( zer_taken ); | |
722 set_idom(zer_taken, zer_iff, dd_main_exit); | |
723 set_loop(zer_taken, loop->_parent); | |
724 // Plug in the true path | |
725 _igvn.hash_delete( post_head ); | |
726 post_head->set_req(LoopNode::EntryControl, zer_taken); | |
727 set_idom(post_head, zer_taken, dd_main_exit); | |
728 | |
729 // Step A3: Make the fall-in values to the post-loop come from the | |
730 // fall-out values of the main-loop. | |
731 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) { | |
732 Node* main_phi = main_head->fast_out(i); | |
733 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) { | |
734 Node *post_phi = old_new[main_phi->_idx]; | |
735 Node *fallmain = clone_up_backedge_goo(main_head->back_control(), | |
736 post_head->init_control(), | |
737 main_phi->in(LoopNode::LoopBackControl)); | |
738 _igvn.hash_delete(post_phi); | |
739 post_phi->set_req( LoopNode::EntryControl, fallmain ); | |
740 } | |
741 } | |
742 | |
743 // Update local caches for next stanza | |
744 main_exit = new_main_exit; | |
745 | |
746 | |
747 //------------------------------ | |
748 // Step B: Create Pre-Loop. | |
749 | |
750 // Step B1: Clone the loop body. The clone becomes the pre-loop. The main | |
751 // loop pre-header illegally has 2 control users (old & new loops). | |
752 clone_loop( loop, old_new, dd_main_head ); | |
753 CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop(); | |
754 CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd(); | |
755 pre_head->set_pre_loop(main_head); | |
756 Node *pre_incr = old_new[incr->_idx]; | |
757 | |
400
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
758 // Reduce the pre-loop trip count. |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
759 pre_end->_prob = PROB_FAIR; |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
760 |
0 | 761 // Find the pre-loop normal exit. |
762 Node* pre_exit = pre_end->proj_out(false); | |
763 assert( pre_exit->Opcode() == Op_IfFalse, "" ); | |
764 IfFalseNode *new_pre_exit = new (C, 1) IfFalseNode(pre_end); | |
765 _igvn.register_new_node_with_optimizer( new_pre_exit ); | |
766 set_idom(new_pre_exit, pre_end, dd_main_head); | |
767 set_loop(new_pre_exit, loop->_parent); | |
768 | |
769 // Step B2: Build a zero-trip guard for the main-loop. After leaving the | |
770 // pre-loop, the main-loop may not execute at all. Later in life this | |
771 // zero-trip guard will become the minimum-trip guard when we unroll | |
772 // the main-loop. | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
113
diff
changeset
|
773 Node *min_opaq = new (C, 2) Opaque1Node(C, limit); |
0 | 774 Node *min_cmp = new (C, 3) CmpINode( pre_incr, min_opaq ); |
775 Node *min_bol = new (C, 2) BoolNode( min_cmp, b_test ); | |
776 register_new_node( min_opaq, new_pre_exit ); | |
777 register_new_node( min_cmp , new_pre_exit ); | |
778 register_new_node( min_bol , new_pre_exit ); | |
779 | |
400
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
780 // Build the IfNode (assume the main-loop is executed always). |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
781 IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN ); |
0 | 782 _igvn.register_new_node_with_optimizer( min_iff ); |
783 set_idom(min_iff, new_pre_exit, dd_main_head); | |
784 set_loop(min_iff, loop->_parent); | |
785 | |
786 // Plug in the false-path, taken if we need to skip main-loop | |
787 _igvn.hash_delete( pre_exit ); | |
788 pre_exit->set_req(0, min_iff); | |
789 set_idom(pre_exit, min_iff, dd_main_head); | |
790 set_idom(pre_exit->unique_out(), min_iff, dd_main_head); | |
791 // Make the true-path, must enter the main loop | |
792 Node *min_taken = new (C, 1) IfTrueNode( min_iff ); | |
793 _igvn.register_new_node_with_optimizer( min_taken ); | |
794 set_idom(min_taken, min_iff, dd_main_head); | |
795 set_loop(min_taken, loop->_parent); | |
796 // Plug in the true path | |
797 _igvn.hash_delete( main_head ); | |
798 main_head->set_req(LoopNode::EntryControl, min_taken); | |
799 set_idom(main_head, min_taken, dd_main_head); | |
800 | |
801 // Step B3: Make the fall-in values to the main-loop come from the | |
802 // fall-out values of the pre-loop. | |
803 for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) { | |
804 Node* main_phi = main_head->fast_out(i2); | |
805 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) { | |
806 Node *pre_phi = old_new[main_phi->_idx]; | |
807 Node *fallpre = clone_up_backedge_goo(pre_head->back_control(), | |
808 main_head->init_control(), | |
809 pre_phi->in(LoopNode::LoopBackControl)); | |
810 _igvn.hash_delete(main_phi); | |
811 main_phi->set_req( LoopNode::EntryControl, fallpre ); | |
812 } | |
813 } | |
814 | |
815 // Step B4: Shorten the pre-loop to run only 1 iteration (for now). | |
816 // RCE and alignment may change this later. | |
817 Node *cmp_end = pre_end->cmp_node(); | |
818 assert( cmp_end->in(2) == limit, "" ); | |
819 Node *pre_limit = new (C, 3) AddINode( init, stride ); | |
820 | |
821 // Save the original loop limit in this Opaque1 node for | |
822 // use by range check elimination. | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
113
diff
changeset
|
823 Node *pre_opaq = new (C, 3) Opaque1Node(C, pre_limit, limit); |
0 | 824 |
825 register_new_node( pre_limit, pre_head->in(0) ); | |
826 register_new_node( pre_opaq , pre_head->in(0) ); | |
827 | |
828 // Since no other users of pre-loop compare, I can hack limit directly | |
829 assert( cmp_end->outcnt() == 1, "no other users" ); | |
830 _igvn.hash_delete(cmp_end); | |
831 cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq); | |
832 | |
833 // Special case for not-equal loop bounds: | |
834 // Change pre loop test, main loop test, and the | |
835 // main loop guard test to use lt or gt depending on stride | |
836 // direction: | |
837 // positive stride use < | |
838 // negative stride use > | |
839 | |
840 if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) { | |
841 | |
842 BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt; | |
843 // Modify pre loop end condition | |
844 Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool(); | |
845 BoolNode* new_bol0 = new (C, 2) BoolNode(pre_bol->in(1), new_test); | |
846 register_new_node( new_bol0, pre_head->in(0) ); | |
847 _igvn.hash_delete(pre_end); | |
848 pre_end->set_req(CountedLoopEndNode::TestValue, new_bol0); | |
849 // Modify main loop guard condition | |
850 assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay"); | |
851 BoolNode* new_bol1 = new (C, 2) BoolNode(min_bol->in(1), new_test); | |
852 register_new_node( new_bol1, new_pre_exit ); | |
853 _igvn.hash_delete(min_iff); | |
854 min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1); | |
855 // Modify main loop end condition | |
856 BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool(); | |
857 BoolNode* new_bol2 = new (C, 2) BoolNode(main_bol->in(1), new_test); | |
858 register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) ); | |
859 _igvn.hash_delete(main_end); | |
860 main_end->set_req(CountedLoopEndNode::TestValue, new_bol2); | |
861 } | |
862 | |
863 // Flag main loop | |
864 main_head->set_main_loop(); | |
865 if( peel_only ) main_head->set_main_no_pre_loop(); | |
866 | |
867 // It's difficult to be precise about the trip-counts | |
868 // for the pre/post loops. They are usually very short, | |
869 // so guess that 4 trips is a reasonable value. | |
870 post_head->set_profile_trip_cnt(4.0); | |
871 pre_head->set_profile_trip_cnt(4.0); | |
872 | |
873 // Now force out all loop-invariant dominating tests. The optimizer | |
874 // finds some, but we _know_ they are all useless. | |
875 peeled_dom_test_elim(loop,old_new); | |
876 } | |
877 | |
878 //------------------------------is_invariant----------------------------- | |
879 // Return true if n is invariant | |
880 bool IdealLoopTree::is_invariant(Node* n) const { | |
1172 | 881 Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; |
0 | 882 if (n_c->is_top()) return false; |
883 return !is_member(_phase->get_loop(n_c)); | |
884 } | |
885 | |
886 | |
887 //------------------------------do_unroll-------------------------------------- | |
888 // Unroll the loop body one step - make each trip do 2 iterations. | |
889 void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) { | |
890 assert( LoopUnrollLimit, "" ); | |
891 #ifndef PRODUCT | |
892 if( PrintOpto && VerifyLoopOptimizations ) { | |
893 tty->print("Unrolling "); | |
894 loop->dump_head(); | |
895 } | |
896 #endif | |
897 CountedLoopNode *loop_head = loop->_head->as_CountedLoop(); | |
898 CountedLoopEndNode *loop_end = loop_head->loopexit(); | |
899 assert( loop_end, "" ); | |
900 | |
901 // Remember loop node count before unrolling to detect | |
902 // if rounds of unroll,optimize are making progress | |
903 loop_head->set_node_count_before_unroll(loop->_body.size()); | |
904 | |
905 Node *ctrl = loop_head->in(LoopNode::EntryControl); | |
906 Node *limit = loop_head->limit(); | |
907 Node *init = loop_head->init_trip(); | |
908 Node *strid = loop_head->stride(); | |
909 | |
910 Node *opaq = NULL; | |
911 if( adjust_min_trip ) { // If not maximally unrolling, need adjustment | |
912 assert( loop_head->is_main_loop(), "" ); | |
913 assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" ); | |
914 Node *iff = ctrl->in(0); | |
915 assert( iff->Opcode() == Op_If, "" ); | |
916 Node *bol = iff->in(1); | |
917 assert( bol->Opcode() == Op_Bool, "" ); | |
918 Node *cmp = bol->in(1); | |
919 assert( cmp->Opcode() == Op_CmpI, "" ); | |
920 opaq = cmp->in(2); | |
921 // Occasionally it's possible for a pre-loop Opaque1 node to be | |
922 // optimized away and then another round of loop opts attempted. | |
923 // We can not optimize this particular loop in that case. | |
924 if( opaq->Opcode() != Op_Opaque1 ) | |
925 return; // Cannot find pre-loop! Bail out! | |
926 } | |
927 | |
928 C->set_major_progress(); | |
929 | |
930 // Adjust max trip count. The trip count is intentionally rounded | |
931 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll, | |
932 // the main, unrolled, part of the loop will never execute as it is protected | |
933 // by the min-trip test. See bug 4834191 for a case where we over-unrolled | |
934 // and later determined that part of the unrolled loop was dead. | |
935 loop_head->set_trip_count(loop_head->trip_count() / 2); | |
936 | |
937 // Double the count of original iterations in the unrolled loop body. | |
938 loop_head->double_unrolled_count(); | |
939 | |
940 // ----------- | |
941 // Step 2: Cut back the trip counter for an unroll amount of 2. | |
942 // Loop will normally trip (limit - init)/stride_con. Since it's a | |
943 // CountedLoop this is exact (stride divides limit-init exactly). | |
944 // We are going to double the loop body, so we want to knock off any | |
945 // odd iteration: (trip_cnt & ~1). Then back compute a new limit. | |
946 Node *span = new (C, 3) SubINode( limit, init ); | |
947 register_new_node( span, ctrl ); | |
948 Node *trip = new (C, 3) DivINode( 0, span, strid ); | |
949 register_new_node( trip, ctrl ); | |
950 Node *mtwo = _igvn.intcon(-2); | |
951 set_ctrl(mtwo, C->root()); | |
952 Node *rond = new (C, 3) AndINode( trip, mtwo ); | |
953 register_new_node( rond, ctrl ); | |
954 Node *spn2 = new (C, 3) MulINode( rond, strid ); | |
955 register_new_node( spn2, ctrl ); | |
956 Node *lim2 = new (C, 3) AddINode( spn2, init ); | |
957 register_new_node( lim2, ctrl ); | |
958 | |
959 // Hammer in the new limit | |
960 Node *ctrl2 = loop_end->in(0); | |
961 Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), lim2 ); | |
962 register_new_node( cmp2, ctrl2 ); | |
963 Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() ); | |
964 register_new_node( bol2, ctrl2 ); | |
965 _igvn.hash_delete(loop_end); | |
966 loop_end->set_req(CountedLoopEndNode::TestValue, bol2); | |
967 | |
968 // Step 3: Find the min-trip test guaranteed before a 'main' loop. | |
969 // Make it a 1-trip test (means at least 2 trips). | |
970 if( adjust_min_trip ) { | |
971 // Guard test uses an 'opaque' node which is not shared. Hence I | |
972 // can edit it's inputs directly. Hammer in the new limit for the | |
973 // minimum-trip guard. | |
974 assert( opaq->outcnt() == 1, "" ); | |
975 _igvn.hash_delete(opaq); | |
976 opaq->set_req(1, lim2); | |
977 } | |
978 | |
979 // --------- | |
980 // Step 4: Clone the loop body. Move it inside the loop. This loop body | |
981 // represents the odd iterations; since the loop trips an even number of | |
982 // times its backedge is never taken. Kill the backedge. | |
983 uint dd = dom_depth(loop_head); | |
984 clone_loop( loop, old_new, dd ); | |
985 | |
986 // Make backedges of the clone equal to backedges of the original. | |
987 // Make the fall-in from the original come from the fall-out of the clone. | |
988 for (DUIterator_Fast jmax, j = loop_head->fast_outs(jmax); j < jmax; j++) { | |
989 Node* phi = loop_head->fast_out(j); | |
990 if( phi->is_Phi() && phi->in(0) == loop_head && phi->outcnt() > 0 ) { | |
991 Node *newphi = old_new[phi->_idx]; | |
992 _igvn.hash_delete( phi ); | |
993 _igvn.hash_delete( newphi ); | |
994 | |
995 phi ->set_req(LoopNode:: EntryControl, newphi->in(LoopNode::LoopBackControl)); | |
996 newphi->set_req(LoopNode::LoopBackControl, phi ->in(LoopNode::LoopBackControl)); | |
997 phi ->set_req(LoopNode::LoopBackControl, C->top()); | |
998 } | |
999 } | |
1000 Node *clone_head = old_new[loop_head->_idx]; | |
1001 _igvn.hash_delete( clone_head ); | |
1002 loop_head ->set_req(LoopNode:: EntryControl, clone_head->in(LoopNode::LoopBackControl)); | |
1003 clone_head->set_req(LoopNode::LoopBackControl, loop_head ->in(LoopNode::LoopBackControl)); | |
1004 loop_head ->set_req(LoopNode::LoopBackControl, C->top()); | |
1005 loop->_head = clone_head; // New loop header | |
1006 | |
1007 set_idom(loop_head, loop_head ->in(LoopNode::EntryControl), dd); | |
1008 set_idom(clone_head, clone_head->in(LoopNode::EntryControl), dd); | |
1009 | |
1010 // Kill the clone's backedge | |
1011 Node *newcle = old_new[loop_end->_idx]; | |
1012 _igvn.hash_delete( newcle ); | |
1013 Node *one = _igvn.intcon(1); | |
1014 set_ctrl(one, C->root()); | |
1015 newcle->set_req(1, one); | |
1016 // Force clone into same loop body | |
1017 uint max = loop->_body.size(); | |
1018 for( uint k = 0; k < max; k++ ) { | |
1019 Node *old = loop->_body.at(k); | |
1020 Node *nnn = old_new[old->_idx]; | |
1021 loop->_body.push(nnn); | |
1022 if (!has_ctrl(old)) | |
1023 set_loop(nnn, loop); | |
1024 } | |
367
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
235
diff
changeset
|
1025 |
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
235
diff
changeset
|
1026 loop->record_for_igvn(); |
0 | 1027 } |
1028 | |
1029 //------------------------------do_maximally_unroll---------------------------- | |
1030 | |
1031 void PhaseIdealLoop::do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ) { | |
1032 CountedLoopNode *cl = loop->_head->as_CountedLoop(); | |
1033 assert( cl->trip_count() > 0, ""); | |
1034 | |
1035 // If loop is tripping an odd number of times, peel odd iteration | |
1036 if( (cl->trip_count() & 1) == 1 ) { | |
1037 do_peeling( loop, old_new ); | |
1038 } | |
1039 | |
1040 // Now its tripping an even number of times remaining. Double loop body. | |
1041 // Do not adjust pre-guards; they are not needed and do not exist. | |
1042 if( cl->trip_count() > 0 ) { | |
1043 do_unroll( loop, old_new, false ); | |
1044 } | |
1045 } | |
1046 | |
1047 //------------------------------dominates_backedge--------------------------------- | |
1048 // Returns true if ctrl is executed on every complete iteration | |
1049 bool IdealLoopTree::dominates_backedge(Node* ctrl) { | |
1050 assert(ctrl->is_CFG(), "must be control"); | |
1051 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl); | |
1052 return _phase->dom_lca_internal(ctrl, backedge) == ctrl; | |
1053 } | |
1054 | |
1055 //------------------------------add_constraint--------------------------------- | |
1056 // Constrain the main loop iterations so the condition: | |
1057 // scale_con * I + offset < limit | |
1058 // always holds true. That is, either increase the number of iterations in | |
1059 // the pre-loop or the post-loop until the condition holds true in the main | |
1060 // loop. Stride, scale, offset and limit are all loop invariant. Further, | |
1061 // stride and scale are constants (offset and limit often are). | |
1062 void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) { | |
1063 | |
1064 // Compute "I :: (limit-offset)/scale_con" | |
1065 Node *con = new (C, 3) SubINode( limit, offset ); | |
1066 register_new_node( con, pre_ctrl ); | |
1067 Node *scale = _igvn.intcon(scale_con); | |
1068 set_ctrl(scale, C->root()); | |
1069 Node *X = new (C, 3) DivINode( 0, con, scale ); | |
1070 register_new_node( X, pre_ctrl ); | |
1071 | |
1072 // For positive stride, the pre-loop limit always uses a MAX function | |
1073 // and the main loop a MIN function. For negative stride these are | |
1074 // reversed. | |
1075 | |
1076 // Also for positive stride*scale the affine function is increasing, so the | |
1077 // pre-loop must check for underflow and the post-loop for overflow. | |
1078 // Negative stride*scale reverses this; pre-loop checks for overflow and | |
1079 // post-loop for underflow. | |
1080 if( stride_con*scale_con > 0 ) { | |
1081 // Compute I < (limit-offset)/scale_con | |
1082 // Adjust main-loop last iteration to be MIN/MAX(main_loop,X) | |
1083 *main_limit = (stride_con > 0) | |
1084 ? (Node*)(new (C, 3) MinINode( *main_limit, X )) | |
1085 : (Node*)(new (C, 3) MaxINode( *main_limit, X )); | |
1086 register_new_node( *main_limit, pre_ctrl ); | |
1087 | |
1088 } else { | |
1089 // Compute (limit-offset)/scale_con + SGN(-scale_con) <= I | |
1090 // Add the negation of the main-loop constraint to the pre-loop. | |
1091 // See footnote [++] below for a derivation of the limit expression. | |
1092 Node *incr = _igvn.intcon(scale_con > 0 ? -1 : 1); | |
1093 set_ctrl(incr, C->root()); | |
1094 Node *adj = new (C, 3) AddINode( X, incr ); | |
1095 register_new_node( adj, pre_ctrl ); | |
1096 *pre_limit = (scale_con > 0) | |
1097 ? (Node*)new (C, 3) MinINode( *pre_limit, adj ) | |
1098 : (Node*)new (C, 3) MaxINode( *pre_limit, adj ); | |
1099 register_new_node( *pre_limit, pre_ctrl ); | |
1100 | |
1101 // [++] Here's the algebra that justifies the pre-loop limit expression: | |
1102 // | |
1103 // NOT( scale_con * I + offset < limit ) | |
1104 // == | |
1105 // scale_con * I + offset >= limit | |
1106 // == | |
1107 // SGN(scale_con) * I >= (limit-offset)/|scale_con| | |
1108 // == | |
1109 // (limit-offset)/|scale_con| <= I * SGN(scale_con) | |
1110 // == | |
1111 // (limit-offset)/|scale_con|-1 < I * SGN(scale_con) | |
1112 // == | |
1113 // ( if (scale_con > 0) /*common case*/ | |
1114 // (limit-offset)/scale_con - 1 < I | |
1115 // else | |
1116 // (limit-offset)/scale_con + 1 > I | |
1117 // ) | |
1118 // ( if (scale_con > 0) /*common case*/ | |
1119 // (limit-offset)/scale_con + SGN(-scale_con) < I | |
1120 // else | |
1121 // (limit-offset)/scale_con + SGN(-scale_con) > I | |
1122 } | |
1123 } | |
1124 | |
1125 | |
1126 //------------------------------is_scaled_iv--------------------------------- | |
1127 // Return true if exp is a constant times an induction var | |
1128 bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) { | |
1129 if (exp == iv) { | |
1130 if (p_scale != NULL) { | |
1131 *p_scale = 1; | |
1132 } | |
1133 return true; | |
1134 } | |
1135 int opc = exp->Opcode(); | |
1136 if (opc == Op_MulI) { | |
1137 if (exp->in(1) == iv && exp->in(2)->is_Con()) { | |
1138 if (p_scale != NULL) { | |
1139 *p_scale = exp->in(2)->get_int(); | |
1140 } | |
1141 return true; | |
1142 } | |
1143 if (exp->in(2) == iv && exp->in(1)->is_Con()) { | |
1144 if (p_scale != NULL) { | |
1145 *p_scale = exp->in(1)->get_int(); | |
1146 } | |
1147 return true; | |
1148 } | |
1149 } else if (opc == Op_LShiftI) { | |
1150 if (exp->in(1) == iv && exp->in(2)->is_Con()) { | |
1151 if (p_scale != NULL) { | |
1152 *p_scale = 1 << exp->in(2)->get_int(); | |
1153 } | |
1154 return true; | |
1155 } | |
1156 } | |
1157 return false; | |
1158 } | |
1159 | |
1160 //-----------------------------is_scaled_iv_plus_offset------------------------------ | |
1161 // Return true if exp is a simple induction variable expression: k1*iv + (invar + k2) | |
1162 bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth) { | |
1163 if (is_scaled_iv(exp, iv, p_scale)) { | |
1164 if (p_offset != NULL) { | |
1165 Node *zero = _igvn.intcon(0); | |
1166 set_ctrl(zero, C->root()); | |
1167 *p_offset = zero; | |
1168 } | |
1169 return true; | |
1170 } | |
1171 int opc = exp->Opcode(); | |
1172 if (opc == Op_AddI) { | |
1173 if (is_scaled_iv(exp->in(1), iv, p_scale)) { | |
1174 if (p_offset != NULL) { | |
1175 *p_offset = exp->in(2); | |
1176 } | |
1177 return true; | |
1178 } | |
1179 if (exp->in(2)->is_Con()) { | |
1180 Node* offset2 = NULL; | |
1181 if (depth < 2 && | |
1182 is_scaled_iv_plus_offset(exp->in(1), iv, p_scale, | |
1183 p_offset != NULL ? &offset2 : NULL, depth+1)) { | |
1184 if (p_offset != NULL) { | |
1185 Node *ctrl_off2 = get_ctrl(offset2); | |
1186 Node* offset = new (C, 3) AddINode(offset2, exp->in(2)); | |
1187 register_new_node(offset, ctrl_off2); | |
1188 *p_offset = offset; | |
1189 } | |
1190 return true; | |
1191 } | |
1192 } | |
1193 } else if (opc == Op_SubI) { | |
1194 if (is_scaled_iv(exp->in(1), iv, p_scale)) { | |
1195 if (p_offset != NULL) { | |
1196 Node *zero = _igvn.intcon(0); | |
1197 set_ctrl(zero, C->root()); | |
1198 Node *ctrl_off = get_ctrl(exp->in(2)); | |
1199 Node* offset = new (C, 3) SubINode(zero, exp->in(2)); | |
1200 register_new_node(offset, ctrl_off); | |
1201 *p_offset = offset; | |
1202 } | |
1203 return true; | |
1204 } | |
1205 if (is_scaled_iv(exp->in(2), iv, p_scale)) { | |
1206 if (p_offset != NULL) { | |
1207 *p_scale *= -1; | |
1208 *p_offset = exp->in(1); | |
1209 } | |
1210 return true; | |
1211 } | |
1212 } | |
1213 return false; | |
1214 } | |
1215 | |
1216 //------------------------------do_range_check--------------------------------- | |
1217 // Eliminate range-checks and other trip-counter vs loop-invariant tests. | |
1218 void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) { | |
1219 #ifndef PRODUCT | |
1220 if( PrintOpto && VerifyLoopOptimizations ) { | |
1221 tty->print("Range Check Elimination "); | |
1222 loop->dump_head(); | |
1223 } | |
1224 #endif | |
1225 assert( RangeCheckElimination, "" ); | |
1226 CountedLoopNode *cl = loop->_head->as_CountedLoop(); | |
1227 assert( cl->is_main_loop(), "" ); | |
1228 | |
1229 // Find the trip counter; we are iteration splitting based on it | |
1230 Node *trip_counter = cl->phi(); | |
1231 // Find the main loop limit; we will trim it's iterations | |
1232 // to not ever trip end tests | |
1233 Node *main_limit = cl->limit(); | |
1234 // Find the pre-loop limit; we will expand it's iterations to | |
1235 // not ever trip low tests. | |
1236 Node *ctrl = cl->in(LoopNode::EntryControl); | |
1237 assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" ); | |
1238 Node *iffm = ctrl->in(0); | |
1239 assert( iffm->Opcode() == Op_If, "" ); | |
1240 Node *p_f = iffm->in(0); | |
1241 assert( p_f->Opcode() == Op_IfFalse, "" ); | |
1242 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd(); | |
1243 assert( pre_end->loopnode()->is_pre_loop(), "" ); | |
1244 Node *pre_opaq1 = pre_end->limit(); | |
1245 // Occasionally it's possible for a pre-loop Opaque1 node to be | |
1246 // optimized away and then another round of loop opts attempted. | |
1247 // We can not optimize this particular loop in that case. | |
1248 if( pre_opaq1->Opcode() != Op_Opaque1 ) | |
1249 return; | |
1250 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1; | |
1251 Node *pre_limit = pre_opaq->in(1); | |
1252 | |
1253 // Where do we put new limit calculations | |
1254 Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl); | |
1255 | |
1256 // Ensure the original loop limit is available from the | |
1257 // pre-loop Opaque1 node. | |
1258 Node *orig_limit = pre_opaq->original_loop_limit(); | |
1259 if( orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP ) | |
1260 return; | |
1261 | |
1262 // Need to find the main-loop zero-trip guard | |
1263 Node *bolzm = iffm->in(1); | |
1264 assert( bolzm->Opcode() == Op_Bool, "" ); | |
1265 Node *cmpzm = bolzm->in(1); | |
1266 assert( cmpzm->is_Cmp(), "" ); | |
1267 Node *opqzm = cmpzm->in(2); | |
1268 if( opqzm->Opcode() != Op_Opaque1 ) | |
1269 return; | |
1270 assert( opqzm->in(1) == main_limit, "do not understand situation" ); | |
1271 | |
1272 // Must know if its a count-up or count-down loop | |
1273 | |
1274 // protect against stride not being a constant | |
1275 if ( !cl->stride_is_con() ) { | |
1276 return; | |
1277 } | |
1278 int stride_con = cl->stride_con(); | |
1279 Node *zero = _igvn.intcon(0); | |
1280 Node *one = _igvn.intcon(1); | |
1281 set_ctrl(zero, C->root()); | |
1282 set_ctrl(one, C->root()); | |
1283 | |
1284 // Range checks that do not dominate the loop backedge (ie. | |
1285 // conditionally executed) can lengthen the pre loop limit beyond | |
1286 // the original loop limit. To prevent this, the pre limit is | |
1287 // (for stride > 0) MINed with the original loop limit (MAXed | |
1288 // stride < 0) when some range_check (rc) is conditionally | |
1289 // executed. | |
1290 bool conditional_rc = false; | |
1291 | |
1292 // Check loop body for tests of trip-counter plus loop-invariant vs | |
1293 // loop-invariant. | |
1294 for( uint i = 0; i < loop->_body.size(); i++ ) { | |
1295 Node *iff = loop->_body[i]; | |
1296 if( iff->Opcode() == Op_If ) { // Test? | |
1297 | |
1298 // Test is an IfNode, has 2 projections. If BOTH are in the loop | |
1299 // we need loop unswitching instead of iteration splitting. | |
1300 Node *exit = loop->is_loop_exit(iff); | |
1301 if( !exit ) continue; | |
1302 int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0; | |
1303 | |
1304 // Get boolean condition to test | |
1305 Node *i1 = iff->in(1); | |
1306 if( !i1->is_Bool() ) continue; | |
1307 BoolNode *bol = i1->as_Bool(); | |
1308 BoolTest b_test = bol->_test; | |
1309 // Flip sense of test if exit condition is flipped | |
1310 if( flip ) | |
1311 b_test = b_test.negate(); | |
1312 | |
1313 // Get compare | |
1314 Node *cmp = bol->in(1); | |
1315 | |
1316 // Look for trip_counter + offset vs limit | |
1317 Node *rc_exp = cmp->in(1); | |
1318 Node *limit = cmp->in(2); | |
1319 jint scale_con= 1; // Assume trip counter not scaled | |
1320 | |
1321 Node *limit_c = get_ctrl(limit); | |
1322 if( loop->is_member(get_loop(limit_c) ) ) { | |
1323 // Compare might have operands swapped; commute them | |
1324 b_test = b_test.commute(); | |
1325 rc_exp = cmp->in(2); | |
1326 limit = cmp->in(1); | |
1327 limit_c = get_ctrl(limit); | |
1328 if( loop->is_member(get_loop(limit_c) ) ) | |
1329 continue; // Both inputs are loop varying; cannot RCE | |
1330 } | |
1331 // Here we know 'limit' is loop invariant | |
1332 | |
1333 // 'limit' maybe pinned below the zero trip test (probably from a | |
1334 // previous round of rce), in which case, it can't be used in the | |
1335 // zero trip test expression which must occur before the zero test's if. | |
1336 if( limit_c == ctrl ) { | |
1337 continue; // Don't rce this check but continue looking for other candidates. | |
1338 } | |
1339 | |
1340 // Check for scaled induction variable plus an offset | |
1341 Node *offset = NULL; | |
1342 | |
1343 if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset)) { | |
1344 continue; | |
1345 } | |
1346 | |
1347 Node *offset_c = get_ctrl(offset); | |
1348 if( loop->is_member( get_loop(offset_c) ) ) | |
1349 continue; // Offset is not really loop invariant | |
1350 // Here we know 'offset' is loop invariant. | |
1351 | |
1352 // As above for the 'limit', the 'offset' maybe pinned below the | |
1353 // zero trip test. | |
1354 if( offset_c == ctrl ) { | |
1355 continue; // Don't rce this check but continue looking for other candidates. | |
1356 } | |
1357 | |
1358 // At this point we have the expression as: | |
1359 // scale_con * trip_counter + offset :: limit | |
1360 // where scale_con, offset and limit are loop invariant. Trip_counter | |
1361 // monotonically increases by stride_con, a constant. Both (or either) | |
1362 // stride_con and scale_con can be negative which will flip about the | |
1363 // sense of the test. | |
1364 | |
1365 // Adjust pre and main loop limits to guard the correct iteration set | |
1366 if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests | |
1367 if( b_test._test == BoolTest::lt ) { // Range checks always use lt | |
1368 // The overflow limit: scale*I+offset < limit | |
1369 add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit ); | |
1370 // The underflow limit: 0 <= scale*I+offset. | |
1371 // Some math yields: -scale*I-(offset+1) < 0 | |
1372 Node *plus_one = new (C, 3) AddINode( offset, one ); | |
1373 register_new_node( plus_one, pre_ctrl ); | |
1374 Node *neg_offset = new (C, 3) SubINode( zero, plus_one ); | |
1375 register_new_node( neg_offset, pre_ctrl ); | |
1376 add_constraint( stride_con, -scale_con, neg_offset, zero, pre_ctrl, &pre_limit, &main_limit ); | |
1377 if (!conditional_rc) { | |
1378 conditional_rc = !loop->dominates_backedge(iff); | |
1379 } | |
1380 } else { | |
1381 #ifndef PRODUCT | |
1382 if( PrintOpto ) | |
1383 tty->print_cr("missed RCE opportunity"); | |
1384 #endif | |
1385 continue; // In release mode, ignore it | |
1386 } | |
1387 } else { // Otherwise work on normal compares | |
1388 switch( b_test._test ) { | |
1389 case BoolTest::ge: // Convert X >= Y to -X <= -Y | |
1390 scale_con = -scale_con; | |
1391 offset = new (C, 3) SubINode( zero, offset ); | |
1392 register_new_node( offset, pre_ctrl ); | |
1393 limit = new (C, 3) SubINode( zero, limit ); | |
1394 register_new_node( limit, pre_ctrl ); | |
1395 // Fall into LE case | |
1396 case BoolTest::le: // Convert X <= Y to X < Y+1 | |
1397 limit = new (C, 3) AddINode( limit, one ); | |
1398 register_new_node( limit, pre_ctrl ); | |
1399 // Fall into LT case | |
1400 case BoolTest::lt: | |
1401 add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit ); | |
1402 if (!conditional_rc) { | |
1403 conditional_rc = !loop->dominates_backedge(iff); | |
1404 } | |
1405 break; | |
1406 default: | |
1407 #ifndef PRODUCT | |
1408 if( PrintOpto ) | |
1409 tty->print_cr("missed RCE opportunity"); | |
1410 #endif | |
1411 continue; // Unhandled case | |
1412 } | |
1413 } | |
1414 | |
1415 // Kill the eliminated test | |
1416 C->set_major_progress(); | |
1417 Node *kill_con = _igvn.intcon( 1-flip ); | |
1418 set_ctrl(kill_con, C->root()); | |
1419 _igvn.hash_delete(iff); | |
1420 iff->set_req(1, kill_con); | |
1421 _igvn._worklist.push(iff); | |
1422 // Find surviving projection | |
1423 assert(iff->is_If(), ""); | |
1424 ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip); | |
1425 // Find loads off the surviving projection; remove their control edge | |
1426 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) { | |
1427 Node* cd = dp->fast_out(i); // Control-dependent node | |
1428 if( cd->is_Load() ) { // Loads can now float around in the loop | |
1429 _igvn.hash_delete(cd); | |
1430 // Allow the load to float around in the loop, or before it | |
1431 // but NOT before the pre-loop. | |
1432 cd->set_req(0, ctrl); // ctrl, not NULL | |
1433 _igvn._worklist.push(cd); | |
1434 --i; | |
1435 --imax; | |
1436 } | |
1437 } | |
1438 | |
1439 } // End of is IF | |
1440 | |
1441 } | |
1442 | |
1443 // Update loop limits | |
1444 if (conditional_rc) { | |
1445 pre_limit = (stride_con > 0) ? (Node*)new (C,3) MinINode(pre_limit, orig_limit) | |
1446 : (Node*)new (C,3) MaxINode(pre_limit, orig_limit); | |
1447 register_new_node(pre_limit, pre_ctrl); | |
1448 } | |
1449 _igvn.hash_delete(pre_opaq); | |
1450 pre_opaq->set_req(1, pre_limit); | |
1451 | |
1452 // Note:: we are making the main loop limit no longer precise; | |
1453 // need to round up based on stride. | |
1454 if( stride_con != 1 && stride_con != -1 ) { // Cutout for common case | |
1455 // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init | |
1456 // Hopefully, compiler will optimize for powers of 2. | |
1457 Node *ctrl = get_ctrl(main_limit); | |
1458 Node *stride = cl->stride(); | |
1459 Node *init = cl->init_trip(); | |
1460 Node *span = new (C, 3) SubINode(main_limit,init); | |
1461 register_new_node(span,ctrl); | |
1462 Node *rndup = _igvn.intcon(stride_con + ((stride_con>0)?-1:1)); | |
1463 Node *add = new (C, 3) AddINode(span,rndup); | |
1464 register_new_node(add,ctrl); | |
1465 Node *div = new (C, 3) DivINode(0,add,stride); | |
1466 register_new_node(div,ctrl); | |
1467 Node *mul = new (C, 3) MulINode(div,stride); | |
1468 register_new_node(mul,ctrl); | |
1469 Node *newlim = new (C, 3) AddINode(mul,init); | |
1470 register_new_node(newlim,ctrl); | |
1471 main_limit = newlim; | |
1472 } | |
1473 | |
1474 Node *main_cle = cl->loopexit(); | |
1475 Node *main_bol = main_cle->in(1); | |
1476 // Hacking loop bounds; need private copies of exit test | |
1477 if( main_bol->outcnt() > 1 ) {// BoolNode shared? | |
1478 _igvn.hash_delete(main_cle); | |
1479 main_bol = main_bol->clone();// Clone a private BoolNode | |
1480 register_new_node( main_bol, main_cle->in(0) ); | |
1481 main_cle->set_req(1,main_bol); | |
1482 } | |
1483 Node *main_cmp = main_bol->in(1); | |
1484 if( main_cmp->outcnt() > 1 ) { // CmpNode shared? | |
1485 _igvn.hash_delete(main_bol); | |
1486 main_cmp = main_cmp->clone();// Clone a private CmpNode | |
1487 register_new_node( main_cmp, main_cle->in(0) ); | |
1488 main_bol->set_req(1,main_cmp); | |
1489 } | |
1490 // Hack the now-private loop bounds | |
1491 _igvn.hash_delete(main_cmp); | |
1492 main_cmp->set_req(2, main_limit); | |
1493 _igvn._worklist.push(main_cmp); | |
1494 // The OpaqueNode is unshared by design | |
1495 _igvn.hash_delete(opqzm); | |
1496 assert( opqzm->outcnt() == 1, "cannot hack shared node" ); | |
1497 opqzm->set_req(1,main_limit); | |
1498 _igvn._worklist.push(opqzm); | |
1499 } | |
1500 | |
1501 //------------------------------DCE_loop_body---------------------------------- | |
1502 // Remove simplistic dead code from loop body | |
1503 void IdealLoopTree::DCE_loop_body() { | |
1504 for( uint i = 0; i < _body.size(); i++ ) | |
1505 if( _body.at(i)->outcnt() == 0 ) | |
1506 _body.map( i--, _body.pop() ); | |
1507 } | |
1508 | |
1509 | |
1510 //------------------------------adjust_loop_exit_prob-------------------------- | |
1511 // Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage. | |
1512 // Replace with a 1-in-10 exit guess. | |
1513 void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) { | |
1514 Node *test = tail(); | |
1515 while( test != _head ) { | |
1516 uint top = test->Opcode(); | |
1517 if( top == Op_IfTrue || top == Op_IfFalse ) { | |
1518 int test_con = ((ProjNode*)test)->_con; | |
1519 assert(top == (uint)(test_con? Op_IfTrue: Op_IfFalse), "sanity"); | |
1520 IfNode *iff = test->in(0)->as_If(); | |
1521 if( iff->outcnt() == 2 ) { // Ignore dead tests | |
1522 Node *bol = iff->in(1); | |
1523 if( bol && bol->req() > 1 && bol->in(1) && | |
1524 ((bol->in(1)->Opcode() == Op_StorePConditional ) || | |
420
a1980da045cc
6462850: generate biased locking code in C2 ideal graph
kvn
parents:
401
diff
changeset
|
1525 (bol->in(1)->Opcode() == Op_StoreIConditional ) || |
0 | 1526 (bol->in(1)->Opcode() == Op_StoreLConditional ) || |
1527 (bol->in(1)->Opcode() == Op_CompareAndSwapI ) || | |
1528 (bol->in(1)->Opcode() == Op_CompareAndSwapL ) || | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
39
diff
changeset
|
1529 (bol->in(1)->Opcode() == Op_CompareAndSwapP ) || |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
39
diff
changeset
|
1530 (bol->in(1)->Opcode() == Op_CompareAndSwapN ))) |
0 | 1531 return; // Allocation loops RARELY take backedge |
1532 // Find the OTHER exit path from the IF | |
1533 Node* ex = iff->proj_out(1-test_con); | |
1534 float p = iff->_prob; | |
1535 if( !phase->is_member( this, ex ) && iff->_fcnt == COUNT_UNKNOWN ) { | |
1536 if( top == Op_IfTrue ) { | |
1537 if( p < (PROB_FAIR + PROB_UNLIKELY_MAG(3))) { | |
1538 iff->_prob = PROB_STATIC_FREQUENT; | |
1539 } | |
1540 } else { | |
1541 if( p > (PROB_FAIR - PROB_UNLIKELY_MAG(3))) { | |
1542 iff->_prob = PROB_STATIC_INFREQUENT; | |
1543 } | |
1544 } | |
1545 } | |
1546 } | |
1547 } | |
1548 test = phase->idom(test); | |
1549 } | |
1550 } | |
1551 | |
1552 | |
1553 //------------------------------policy_do_remove_empty_loop-------------------- | |
1554 // Micro-benchmark spamming. Policy is to always remove empty loops. | |
1555 // The 'DO' part is to replace the trip counter with the value it will | |
1556 // have on the last iteration. This will break the loop. | |
1557 bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) { | |
1558 // Minimum size must be empty loop | |
1559 if( _body.size() > 7/*number of nodes in an empty loop*/ ) return false; | |
1560 | |
1561 if( !_head->is_CountedLoop() ) return false; // Dead loop | |
1562 CountedLoopNode *cl = _head->as_CountedLoop(); | |
1563 if( !cl->loopexit() ) return false; // Malformed loop | |
1564 if( !phase->is_member(this,phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue)) ) ) | |
1565 return false; // Infinite loop | |
1566 #ifndef PRODUCT | |
1567 if( PrintOpto ) | |
1568 tty->print_cr("Removing empty loop"); | |
1569 #endif | |
1570 #ifdef ASSERT | |
1571 // Ensure only one phi which is the iv. | |
1572 Node* iv = NULL; | |
1573 for (DUIterator_Fast imax, i = cl->fast_outs(imax); i < imax; i++) { | |
1574 Node* n = cl->fast_out(i); | |
1575 if (n->Opcode() == Op_Phi) { | |
1576 assert(iv == NULL, "Too many phis" ); | |
1577 iv = n; | |
1578 } | |
1579 } | |
1580 assert(iv == cl->phi(), "Wrong phi" ); | |
1581 #endif | |
1582 // Replace the phi at loop head with the final value of the last | |
1583 // iteration. Then the CountedLoopEnd will collapse (backedge never | |
1584 // taken) and all loop-invariant uses of the exit values will be correct. | |
1585 Node *phi = cl->phi(); | |
1586 Node *final = new (phase->C, 3) SubINode( cl->limit(), cl->stride() ); | |
1587 phase->register_new_node(final,cl->in(LoopNode::EntryControl)); | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
1588 phase->_igvn.replace_node(phi,final); |
0 | 1589 phase->C->set_major_progress(); |
1590 return true; | |
1591 } | |
1592 | |
1593 | |
1594 //============================================================================= | |
1595 //------------------------------iteration_split_impl--------------------------- | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1596 bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) { |
0 | 1597 // Check and remove empty loops (spam micro-benchmarks) |
1598 if( policy_do_remove_empty_loop(phase) ) | |
1172 | 1599 return true; // Here we removed an empty loop |
0 | 1600 |
1601 bool should_peel = policy_peeling(phase); // Should we peel? | |
1602 | |
1603 bool should_unswitch = policy_unswitching(phase); | |
1604 | |
1605 // Non-counted loops may be peeled; exactly 1 iteration is peeled. | |
1606 // This removes loop-invariant tests (usually null checks). | |
1607 if( !_head->is_CountedLoop() ) { // Non-counted loop | |
1608 if (PartialPeelLoop && phase->partial_peel(this, old_new)) { | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1609 // Partial peel succeeded so terminate this round of loop opts |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1610 return false; |
0 | 1611 } |
1612 if( should_peel ) { // Should we peel? | |
1613 #ifndef PRODUCT | |
1614 if (PrintOpto) tty->print_cr("should_peel"); | |
1615 #endif | |
1616 phase->do_peeling(this,old_new); | |
1617 } else if( should_unswitch ) { | |
1618 phase->do_unswitching(this, old_new); | |
1619 } | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1620 return true; |
0 | 1621 } |
1622 CountedLoopNode *cl = _head->as_CountedLoop(); | |
1623 | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1624 if( !cl->loopexit() ) return true; // Ignore various kinds of broken loops |
0 | 1625 |
1626 // Do nothing special to pre- and post- loops | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1627 if( cl->is_pre_loop() || cl->is_post_loop() ) return true; |
0 | 1628 |
1629 // Compute loop trip count from profile data | |
1630 compute_profile_trip_cnt(phase); | |
1631 | |
1632 // Before attempting fancy unrolling, RCE or alignment, see if we want | |
1633 // to completely unroll this loop or do loop unswitching. | |
1634 if( cl->is_normal_loop() ) { | |
789
273b2358ef1a
6837146: Should perform unswitch before maximally unroll in loop transformation
cfang
parents:
605
diff
changeset
|
1635 if (should_unswitch) { |
273b2358ef1a
6837146: Should perform unswitch before maximally unroll in loop transformation
cfang
parents:
605
diff
changeset
|
1636 phase->do_unswitching(this, old_new); |
273b2358ef1a
6837146: Should perform unswitch before maximally unroll in loop transformation
cfang
parents:
605
diff
changeset
|
1637 return true; |
273b2358ef1a
6837146: Should perform unswitch before maximally unroll in loop transformation
cfang
parents:
605
diff
changeset
|
1638 } |
0 | 1639 bool should_maximally_unroll = policy_maximally_unroll(phase); |
1640 if( should_maximally_unroll ) { | |
1641 // Here we did some unrolling and peeling. Eventually we will | |
1642 // completely unroll this loop and it will no longer be a loop. | |
1643 phase->do_maximally_unroll(this,old_new); | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1644 return true; |
0 | 1645 } |
1646 } | |
1647 | |
1648 | |
1649 // Counted loops may be peeled, may need some iterations run up | |
1650 // front for RCE, and may want to align loop refs to a cache | |
1651 // line. Thus we clone a full loop up front whose trip count is | |
1652 // at least 1 (if peeling), but may be several more. | |
1653 | |
1654 // The main loop will start cache-line aligned with at least 1 | |
1655 // iteration of the unrolled body (zero-trip test required) and | |
1656 // will have some range checks removed. | |
1657 | |
1658 // A post-loop will finish any odd iterations (leftover after | |
1659 // unrolling), plus any needed for RCE purposes. | |
1660 | |
1661 bool should_unroll = policy_unroll(phase); | |
1662 | |
1663 bool should_rce = policy_range_check(phase); | |
1664 | |
1665 bool should_align = policy_align(phase); | |
1666 | |
1667 // If not RCE'ing (iteration splitting) or Aligning, then we do not | |
1668 // need a pre-loop. We may still need to peel an initial iteration but | |
1669 // we will not be needing an unknown number of pre-iterations. | |
1670 // | |
1671 // Basically, if may_rce_align reports FALSE first time through, | |
1672 // we will not be able to later do RCE or Aligning on this loop. | |
1673 bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align; | |
1674 | |
1675 // If we have any of these conditions (RCE, alignment, unrolling) met, then | |
1676 // we switch to the pre-/main-/post-loop model. This model also covers | |
1677 // peeling. | |
1678 if( should_rce || should_align || should_unroll ) { | |
1679 if( cl->is_normal_loop() ) // Convert to 'pre/main/post' loops | |
1680 phase->insert_pre_post_loops(this,old_new, !may_rce_align); | |
1681 | |
1682 // Adjust the pre- and main-loop limits to let the pre and post loops run | |
1683 // with full checks, but the main-loop with no checks. Remove said | |
1684 // checks from the main body. | |
1685 if( should_rce ) | |
1686 phase->do_range_check(this,old_new); | |
1687 | |
1688 // Double loop body for unrolling. Adjust the minimum-trip test (will do | |
1689 // twice as many iterations as before) and the main body limit (only do | |
1690 // an even number of trips). If we are peeling, we might enable some RCE | |
1691 // and we'd rather unroll the post-RCE'd loop SO... do not unroll if | |
1692 // peeling. | |
1172 | 1693 if( should_unroll && !should_peel ) |
1694 phase->do_unroll(this,old_new, true); | |
0 | 1695 |
1696 // Adjust the pre-loop limits to align the main body | |
1697 // iterations. | |
1698 if( should_align ) | |
1699 Unimplemented(); | |
1700 | |
1701 } else { // Else we have an unchanged counted loop | |
1702 if( should_peel ) // Might want to peel but do nothing else | |
1703 phase->do_peeling(this,old_new); | |
1704 } | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1705 return true; |
0 | 1706 } |
1707 | |
1708 | |
1709 //============================================================================= | |
1710 //------------------------------iteration_split-------------------------------- | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1711 bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) { |
0 | 1712 // Recursively iteration split nested loops |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1713 if( _child && !_child->iteration_split( phase, old_new )) |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1714 return false; |
0 | 1715 |
1716 // Clean out prior deadwood | |
1717 DCE_loop_body(); | |
1718 | |
1719 | |
1720 // Look for loop-exit tests with my 50/50 guesses from the Parsing stage. | |
1721 // Replace with a 1-in-10 exit guess. | |
1722 if( _parent /*not the root loop*/ && | |
1723 !_irreducible && | |
1724 // Also ignore the occasional dead backedge | |
1725 !tail()->is_top() ) { | |
1726 adjust_loop_exit_prob(phase); | |
1727 } | |
1728 | |
1729 | |
1730 // Gate unrolling, RCE and peeling efforts. | |
1731 if( !_child && // If not an inner loop, do not split | |
1732 !_irreducible && | |
39
76256d272075
6667612: (Escape Analysis) disable loop cloning if it has a scalar replaceable allocation
kvn
parents:
0
diff
changeset
|
1733 _allow_optimizations && |
0 | 1734 !tail()->is_top() ) { // Also ignore the occasional dead backedge |
1735 if (!_has_call) { | |
1172 | 1736 if (!iteration_split_impl( phase, old_new )) { |
1737 return false; | |
1738 } | |
0 | 1739 } else if (policy_unswitching(phase)) { |
1740 phase->do_unswitching(this, old_new); | |
1741 } | |
1742 } | |
1743 | |
1744 // Minor offset re-organization to remove loop-fallout uses of | |
1745 // trip counter. | |
1746 if( _head->is_CountedLoop() ) phase->reorg_offsets( this ); | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1747 if( _next && !_next->iteration_split( phase, old_new )) |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1748 return false; |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
1749 return true; |
0 | 1750 } |
1172 | 1751 |
1752 //-------------------------------is_uncommon_trap_proj---------------------------- | |
1753 // Return true if proj is the form of "proj->[region->..]call_uct" | |
1754 bool PhaseIdealLoop::is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate) { | |
1755 int path_limit = 10; | |
1756 assert(proj, "invalid argument"); | |
1757 Node* out = proj; | |
1758 for (int ct = 0; ct < path_limit; ct++) { | |
1759 out = out->unique_ctrl_out(); | |
1760 if (out == NULL || out->is_Root() || out->is_Start()) | |
1761 return false; | |
1762 if (out->is_CallStaticJava()) { | |
1763 int req = out->as_CallStaticJava()->uncommon_trap_request(); | |
1764 if (req != 0) { | |
1765 Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(req); | |
1766 if (!must_reason_predicate || reason == Deoptimization::Reason_predicate){ | |
1767 return true; | |
1768 } | |
1769 } | |
1770 return false; // don't do further after call | |
1771 } | |
1772 } | |
1773 return false; | |
1774 } | |
1775 | |
1776 //-------------------------------is_uncommon_trap_if_pattern------------------------- | |
1777 // Return true for "if(test)-> proj -> ... | |
1778 // | | |
1779 // V | |
1780 // other_proj->[region->..]call_uct" | |
1781 // | |
1782 // "must_reason_predicate" means the uct reason must be Reason_predicate | |
1783 bool PhaseIdealLoop::is_uncommon_trap_if_pattern(ProjNode *proj, bool must_reason_predicate) { | |
1784 Node *in0 = proj->in(0); | |
1785 if (!in0->is_If()) return false; | |
1280
336c6c200f5f
6930116: loop predication code does not handle If nodes with only one projection
kvn
parents:
1275
diff
changeset
|
1786 // Variation of a dead If node. |
336c6c200f5f
6930116: loop predication code does not handle If nodes with only one projection
kvn
parents:
1275
diff
changeset
|
1787 if (in0->outcnt() < 2) return false; |
1172 | 1788 IfNode* iff = in0->as_If(); |
1789 | |
1790 // we need "If(Conv2B(Opaque1(...)))" pattern for must_reason_predicate | |
1791 if (must_reason_predicate) { | |
1792 if (iff->in(1)->Opcode() != Op_Conv2B || | |
1793 iff->in(1)->in(1)->Opcode() != Op_Opaque1) { | |
1794 return false; | |
1795 } | |
1796 } | |
1797 | |
1798 ProjNode* other_proj = iff->proj_out(1-proj->_con)->as_Proj(); | |
1799 return is_uncommon_trap_proj(other_proj, must_reason_predicate); | |
1800 } | |
1801 | |
1802 //------------------------------create_new_if_for_predicate------------------------ | |
1803 // create a new if above the uct_if_pattern for the predicate to be promoted. | |
1804 // | |
1805 // before after | |
1806 // ---------- ---------- | |
1807 // ctrl ctrl | |
1808 // | | | |
1809 // | | | |
1810 // v v | |
1811 // iff new_iff | |
1812 // / \ / \ | |
1813 // / \ / \ | |
1814 // v v v v | |
1815 // uncommon_proj cont_proj if_uct if_cont | |
1816 // \ | | | | | |
1817 // \ | | | | | |
1818 // v v v | v | |
1819 // rgn loop | iff | |
1820 // | | / \ | |
1821 // | | / \ | |
1822 // v | v v | |
1823 // uncommon_trap | uncommon_proj cont_proj | |
1824 // \ \ | | | |
1825 // \ \ | | | |
1826 // v v v v | |
1827 // rgn loop | |
1828 // | | |
1829 // | | |
1830 // v | |
1831 // uncommon_trap | |
1832 // | |
1833 // | |
1834 // We will create a region to guard the uct call if there is no one there. | |
1835 // The true projecttion (if_cont) of the new_iff is returned. | |
1836 ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj) { | |
1837 assert(is_uncommon_trap_if_pattern(cont_proj, true), "must be a uct if pattern!"); | |
1838 IfNode* iff = cont_proj->in(0)->as_If(); | |
1839 | |
1840 ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con); | |
1841 Node *rgn = uncommon_proj->unique_ctrl_out(); | |
1842 assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); | |
1843 | |
1844 if (!rgn->is_Region()) { // create a region to guard the call | |
1845 assert(rgn->is_Call(), "must be call uct"); | |
1846 CallNode* call = rgn->as_Call(); | |
1847 rgn = new (C, 1) RegionNode(1); | |
1848 _igvn.set_type(rgn, rgn->bottom_type()); | |
1849 rgn->add_req(uncommon_proj); | |
1850 set_idom(rgn, idom(uncommon_proj), dom_depth(uncommon_proj)+1); | |
1851 _igvn.hash_delete(call); | |
1852 call->set_req(0, rgn); | |
1853 } | |
1854 | |
1855 // Create new_iff | |
1856 uint iffdd = dom_depth(iff); | |
1857 IdealLoopTree* lp = get_loop(iff); | |
1858 IfNode *new_iff = new (C, 2) IfNode(iff->in(0), NULL, iff->_prob, iff->_fcnt); | |
1859 register_node(new_iff, lp, idom(iff), iffdd); | |
1860 Node *if_cont = new (C, 1) IfTrueNode(new_iff); | |
1861 Node *if_uct = new (C, 1) IfFalseNode(new_iff); | |
1862 if (cont_proj->is_IfFalse()) { | |
1863 // Swap | |
1864 Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp; | |
1865 } | |
1866 register_node(if_cont, lp, new_iff, iffdd); | |
1867 register_node(if_uct, get_loop(rgn), new_iff, iffdd); | |
1868 | |
1869 // if_cont to iff | |
1870 _igvn.hash_delete(iff); | |
1871 iff->set_req(0, if_cont); | |
1872 set_idom(iff, if_cont, dom_depth(iff)); | |
1873 | |
1874 // if_uct to rgn | |
1875 _igvn.hash_delete(rgn); | |
1876 rgn->add_req(if_uct); | |
1877 Node* ridom = idom(rgn); | |
1878 Node* nrdom = dom_lca(ridom, new_iff); | |
1879 set_idom(rgn, nrdom, dom_depth(rgn)); | |
1880 | |
1881 // rgn must have no phis | |
1882 assert(!rgn->as_Region()->has_phi(), "region must have no phis"); | |
1883 | |
1884 return if_cont->as_Proj(); | |
1885 } | |
1886 | |
1887 //------------------------------find_predicate_insertion_point-------------------------- | |
1888 // Find a good location to insert a predicate | |
1889 ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c) { | |
1890 if (start_c == C->root() || !start_c->is_Proj()) | |
1891 return NULL; | |
1892 if (is_uncommon_trap_if_pattern(start_c->as_Proj(), true/*Reason_Predicate*/)) { | |
1893 return start_c->as_Proj(); | |
1894 } | |
1895 return NULL; | |
1896 } | |
1897 | |
1898 //------------------------------Invariance----------------------------------- | |
1899 // Helper class for loop_predication_impl to compute invariance on the fly and | |
1900 // clone invariants. | |
1901 class Invariance : public StackObj { | |
1902 VectorSet _visited, _invariant; | |
1903 Node_Stack _stack; | |
1904 VectorSet _clone_visited; | |
1905 Node_List _old_new; // map of old to new (clone) | |
1906 IdealLoopTree* _lpt; | |
1907 PhaseIdealLoop* _phase; | |
1908 | |
1909 // Helper function to set up the invariance for invariance computation | |
1910 // If n is a known invariant, set up directly. Otherwise, look up the | |
1911 // the possibility to push n onto the stack for further processing. | |
1912 void visit(Node* use, Node* n) { | |
1913 if (_lpt->is_invariant(n)) { // known invariant | |
1914 _invariant.set(n->_idx); | |
1915 } else if (!n->is_CFG()) { | |
1916 Node *n_ctrl = _phase->ctrl_or_self(n); | |
1917 Node *u_ctrl = _phase->ctrl_or_self(use); // self if use is a CFG | |
1918 if (_phase->is_dominator(n_ctrl, u_ctrl)) { | |
1919 _stack.push(n, n->in(0) == NULL ? 1 : 0); | |
1920 } | |
1921 } | |
1922 } | |
1923 | |
1924 // Compute invariance for "the_node" and (possibly) all its inputs recursively | |
1925 // on the fly | |
1926 void compute_invariance(Node* n) { | |
1927 assert(_visited.test(n->_idx), "must be"); | |
1928 visit(n, n); | |
1929 while (_stack.is_nonempty()) { | |
1930 Node* n = _stack.node(); | |
1931 uint idx = _stack.index(); | |
1932 if (idx == n->req()) { // all inputs are processed | |
1933 _stack.pop(); | |
1934 // n is invariant if it's inputs are all invariant | |
1935 bool all_inputs_invariant = true; | |
1936 for (uint i = 0; i < n->req(); i++) { | |
1937 Node* in = n->in(i); | |
1938 if (in == NULL) continue; | |
1939 assert(_visited.test(in->_idx), "must have visited input"); | |
1940 if (!_invariant.test(in->_idx)) { // bad guy | |
1941 all_inputs_invariant = false; | |
1942 break; | |
1943 } | |
1944 } | |
1945 if (all_inputs_invariant) { | |
1946 _invariant.set(n->_idx); // I am a invariant too | |
1947 } | |
1948 } else { // process next input | |
1949 _stack.set_index(idx + 1); | |
1950 Node* m = n->in(idx); | |
1951 if (m != NULL && !_visited.test_set(m->_idx)) { | |
1952 visit(n, m); | |
1953 } | |
1954 } | |
1955 } | |
1956 } | |
1957 | |
1958 // Helper function to set up _old_new map for clone_nodes. | |
1959 // If n is a known invariant, set up directly ("clone" of n == n). | |
1960 // Otherwise, push n onto the stack for real cloning. | |
1961 void clone_visit(Node* n) { | |
1962 assert(_invariant.test(n->_idx), "must be invariant"); | |
1963 if (_lpt->is_invariant(n)) { // known invariant | |
1964 _old_new.map(n->_idx, n); | |
1965 } else{ // to be cloned | |
1966 assert (!n->is_CFG(), "should not see CFG here"); | |
1967 _stack.push(n, n->in(0) == NULL ? 1 : 0); | |
1968 } | |
1969 } | |
1970 | |
1971 // Clone "n" and (possibly) all its inputs recursively | |
1972 void clone_nodes(Node* n, Node* ctrl) { | |
1973 clone_visit(n); | |
1974 while (_stack.is_nonempty()) { | |
1975 Node* n = _stack.node(); | |
1976 uint idx = _stack.index(); | |
1977 if (idx == n->req()) { // all inputs processed, clone n! | |
1978 _stack.pop(); | |
1979 // clone invariant node | |
1980 Node* n_cl = n->clone(); | |
1981 _old_new.map(n->_idx, n_cl); | |
1982 _phase->register_new_node(n_cl, ctrl); | |
1983 for (uint i = 0; i < n->req(); i++) { | |
1984 Node* in = n_cl->in(i); | |
1985 if (in == NULL) continue; | |
1986 n_cl->set_req(i, _old_new[in->_idx]); | |
1987 } | |
1988 } else { // process next input | |
1989 _stack.set_index(idx + 1); | |
1990 Node* m = n->in(idx); | |
1991 if (m != NULL && !_clone_visited.test_set(m->_idx)) { | |
1992 clone_visit(m); // visit the input | |
1993 } | |
1994 } | |
1995 } | |
1996 } | |
1997 | |
1998 public: | |
1999 Invariance(Arena* area, IdealLoopTree* lpt) : | |
2000 _lpt(lpt), _phase(lpt->_phase), | |
2001 _visited(area), _invariant(area), _stack(area, 10 /* guess */), | |
2002 _clone_visited(area), _old_new(area) | |
2003 {} | |
2004 | |
2005 // Map old to n for invariance computation and clone | |
2006 void map_ctrl(Node* old, Node* n) { | |
2007 assert(old->is_CFG() && n->is_CFG(), "must be"); | |
2008 _old_new.map(old->_idx, n); // "clone" of old is n | |
2009 _invariant.set(old->_idx); // old is invariant | |
2010 _clone_visited.set(old->_idx); | |
2011 } | |
2012 | |
2013 // Driver function to compute invariance | |
2014 bool is_invariant(Node* n) { | |
2015 if (!_visited.test_set(n->_idx)) | |
2016 compute_invariance(n); | |
2017 return (_invariant.test(n->_idx) != 0); | |
2018 } | |
2019 | |
2020 // Driver function to clone invariant | |
2021 Node* clone(Node* n, Node* ctrl) { | |
2022 assert(ctrl->is_CFG(), "must be"); | |
2023 assert(_invariant.test(n->_idx), "must be an invariant"); | |
2024 if (!_clone_visited.test(n->_idx)) | |
2025 clone_nodes(n, ctrl); | |
2026 return _old_new[n->_idx]; | |
2027 } | |
2028 }; | |
2029 | |
2030 //------------------------------is_range_check_if ----------------------------------- | |
2031 // Returns true if the predicate of iff is in "scale*iv + offset u< load_range(ptr)" format | |
2032 // Note: this function is particularly designed for loop predication. We require load_range | |
2033 // and offset to be loop invariant computed on the fly by "invar" | |
2034 bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const { | |
2035 if (!is_loop_exit(iff)) { | |
2036 return false; | |
2037 } | |
2038 if (!iff->in(1)->is_Bool()) { | |
2039 return false; | |
2040 } | |
2041 const BoolNode *bol = iff->in(1)->as_Bool(); | |
2042 if (bol->_test._test != BoolTest::lt) { | |
2043 return false; | |
2044 } | |
2045 if (!bol->in(1)->is_Cmp()) { | |
2046 return false; | |
2047 } | |
2048 const CmpNode *cmp = bol->in(1)->as_Cmp(); | |
2049 if (cmp->Opcode() != Op_CmpU ) { | |
2050 return false; | |
2051 } | |
1763 | 2052 Node* range = cmp->in(2); |
2053 if (range->Opcode() != Op_LoadRange) { | |
2054 const TypeInt* tint = phase->_igvn.type(range)->isa_int(); | |
2055 if (!OptimizeFill || tint == NULL || tint->empty() || tint->_lo < 0) { | |
2056 // Allow predication on positive values that aren't LoadRanges. | |
2057 // This allows optimization of loops where the length of the | |
2058 // array is a known value and doesn't need to be loaded back | |
2059 // from the array. | |
2060 return false; | |
2061 } | |
1172 | 2062 } |
1763 | 2063 if (!invar.is_invariant(range)) { |
1172 | 2064 return false; |
2065 } | |
2066 Node *iv = _head->as_CountedLoop()->phi(); | |
2067 int scale = 0; | |
2068 Node *offset = NULL; | |
2069 if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset)) { | |
2070 return false; | |
2071 } | |
2072 if(offset && !invar.is_invariant(offset)) { // offset must be invariant | |
2073 return false; | |
2074 } | |
2075 return true; | |
2076 } | |
2077 | |
2078 //------------------------------rc_predicate----------------------------------- | |
2079 // Create a range check predicate | |
2080 // | |
2081 // for (i = init; i < limit; i += stride) { | |
2082 // a[scale*i+offset] | |
2083 // } | |
2084 // | |
2085 // Compute max(scale*i + offset) for init <= i < limit and build the predicate | |
2086 // as "max(scale*i + offset) u< a.length". | |
2087 // | |
2088 // There are two cases for max(scale*i + offset): | |
2089 // (1) stride*scale > 0 | |
2090 // max(scale*i + offset) = scale*(limit-stride) + offset | |
2091 // (2) stride*scale < 0 | |
2092 // max(scale*i + offset) = scale*init + offset | |
2093 BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl, | |
2094 int scale, Node* offset, | |
2095 Node* init, Node* limit, Node* stride, | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2096 Node* range, bool upper) { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2097 DEBUG_ONLY(ttyLocker ttyl); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2098 if (TraceLoopPredicate) tty->print("rc_predicate "); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2099 |
1172 | 2100 Node* max_idx_expr = init; |
2101 int stride_con = stride->get_int(); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2102 if ((stride_con > 0) == (scale > 0) == upper) { |
1172 | 2103 max_idx_expr = new (C, 3) SubINode(limit, stride); |
2104 register_new_node(max_idx_expr, ctrl); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2105 if (TraceLoopPredicate) tty->print("(limit - stride) "); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2106 } else { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2107 if (TraceLoopPredicate) tty->print("init "); |
1172 | 2108 } |
2109 | |
2110 if (scale != 1) { | |
2111 ConNode* con_scale = _igvn.intcon(scale); | |
2112 max_idx_expr = new (C, 3) MulINode(max_idx_expr, con_scale); | |
2113 register_new_node(max_idx_expr, ctrl); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2114 if (TraceLoopPredicate) tty->print("* %d ", scale); |
1172 | 2115 } |
2116 | |
2117 if (offset && (!offset->is_Con() || offset->get_int() != 0)){ | |
2118 max_idx_expr = new (C, 3) AddINode(max_idx_expr, offset); | |
2119 register_new_node(max_idx_expr, ctrl); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2120 if (TraceLoopPredicate) |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2121 if (offset->is_Con()) tty->print("+ %d ", offset->get_int()); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2122 else tty->print("+ offset "); |
1172 | 2123 } |
2124 | |
2125 CmpUNode* cmp = new (C, 3) CmpUNode(max_idx_expr, range); | |
2126 register_new_node(cmp, ctrl); | |
2127 BoolNode* bol = new (C, 2) BoolNode(cmp, BoolTest::lt); | |
2128 register_new_node(bol, ctrl); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2129 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2130 if (TraceLoopPredicate) tty->print_cr("<u range"); |
1172 | 2131 return bol; |
2132 } | |
2133 | |
2134 //------------------------------ loop_predication_impl-------------------------- | |
2135 // Insert loop predicates for null checks and range checks | |
2136 bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) { | |
2137 if (!UseLoopPredicate) return false; | |
2138 | |
1275 | 2139 if (!loop->_head->is_Loop()) { |
2140 // Could be a simple region when irreducible loops are present. | |
2141 return false; | |
2142 } | |
2143 | |
2144 CountedLoopNode *cl = NULL; | |
2145 if (loop->_head->is_CountedLoop()) { | |
2146 cl = loop->_head->as_CountedLoop(); | |
2147 // do nothing for iteration-splitted loops | |
2148 if (!cl->is_normal_loop()) return false; | |
2149 } | |
2150 | |
1172 | 2151 // Too many traps seen? |
2152 bool tmt = C->too_many_traps(C->method(), 0, Deoptimization::Reason_predicate); | |
2153 int tc = C->trap_count(Deoptimization::Reason_predicate); | |
2154 if (tmt || tc > 0) { | |
2155 if (TraceLoopPredicate) { | |
2156 tty->print_cr("too many predicate traps: %d", tc); | |
2157 C->method()->print(); // which method has too many predicate traps | |
2158 tty->print_cr(""); | |
2159 } | |
2160 return false; | |
2161 } | |
2162 | |
2163 LoopNode *lpn = loop->_head->as_Loop(); | |
2164 Node* entry = lpn->in(LoopNode::EntryControl); | |
2165 | |
2166 ProjNode *predicate_proj = find_predicate_insertion_point(entry); | |
2167 if (!predicate_proj){ | |
2168 #ifndef PRODUCT | |
2169 if (TraceLoopPredicate) { | |
2170 tty->print("missing predicate:"); | |
2171 loop->dump_head(); | |
2172 } | |
2173 #endif | |
2174 return false; | |
2175 } | |
2176 | |
2177 ConNode* zero = _igvn.intcon(0); | |
2178 set_ctrl(zero, C->root()); | |
2179 Node *cond_false = new (C, 2) Conv2BNode(zero); | |
2180 register_new_node(cond_false, C->root()); | |
2181 ConNode* one = _igvn.intcon(1); | |
2182 set_ctrl(one, C->root()); | |
2183 Node *cond_true = new (C, 2) Conv2BNode(one); | |
2184 register_new_node(cond_true, C->root()); | |
2185 | |
2186 ResourceArea *area = Thread::current()->resource_area(); | |
2187 Invariance invar(area, loop); | |
2188 | |
2189 // Create list of if-projs such that a newer proj dominates all older | |
2190 // projs in the list, and they all dominate loop->tail() | |
2191 Node_List if_proj_list(area); | |
2192 LoopNode *head = loop->_head->as_Loop(); | |
2193 Node *current_proj = loop->tail(); //start from tail | |
2194 while ( current_proj != head ) { | |
2195 if (loop == get_loop(current_proj) && // still in the loop ? | |
2196 current_proj->is_Proj() && // is a projection ? | |
2197 current_proj->in(0)->Opcode() == Op_If) { // is a if projection ? | |
2198 if_proj_list.push(current_proj); | |
2199 } | |
2200 current_proj = idom(current_proj); | |
2201 } | |
2202 | |
2203 bool hoisted = false; // true if at least one proj is promoted | |
2204 while (if_proj_list.size() > 0) { | |
2205 // Following are changed to nonnull when a predicate can be hoisted | |
2206 ProjNode* new_predicate_proj = NULL; | |
2207 | |
2208 ProjNode* proj = if_proj_list.pop()->as_Proj(); | |
2209 IfNode* iff = proj->in(0)->as_If(); | |
2210 | |
2211 if (!is_uncommon_trap_if_pattern(proj)) { | |
2212 if (loop->is_loop_exit(iff)) { | |
2213 // stop processing the remaining projs in the list because the execution of them | |
2214 // depends on the condition of "iff" (iff->in(1)). | |
2215 break; | |
2216 } else { | |
2217 // Both arms are inside the loop. There are two cases: | |
2218 // (1) there is one backward branch. In this case, any remaining proj | |
2219 // in the if_proj list post-dominates "iff". So, the condition of "iff" | |
2220 // does not determine the execution the remining projs directly, and we | |
2221 // can safely continue. | |
2222 // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj" | |
2223 // does not dominate loop->tail(), so it can not be in the if_proj list. | |
2224 continue; | |
2225 } | |
2226 } | |
2227 | |
2228 Node* test = iff->in(1); | |
2229 if (!test->is_Bool()){ //Conv2B, ... | |
2230 continue; | |
2231 } | |
2232 BoolNode* bol = test->as_Bool(); | |
2233 if (invar.is_invariant(bol)) { | |
2234 // Invariant test | |
2235 new_predicate_proj = create_new_if_for_predicate(predicate_proj); | |
2236 Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2237 BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2238 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2239 // Negate test if necessary |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2240 bool negated = false; |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2241 if (proj->_con != predicate_proj->_con) { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2242 new_predicate_bol = new (C, 2) BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate()); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2243 register_new_node(new_predicate_bol, ctrl); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2244 negated = true; |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2245 } |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2246 IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2247 _igvn.hash_delete(new_predicate_iff); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2248 new_predicate_iff->set_req(1, new_predicate_bol); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2249 if (TraceLoopPredicate) tty->print_cr("invariant if%s: %d", negated ? " negated" : "", new_predicate_iff->_idx); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2250 |
1172 | 2251 } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) { |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2252 assert(proj->_con == predicate_proj->_con, "must match"); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2253 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2254 // Range check for counted loops |
1172 | 2255 const Node* cmp = bol->in(1)->as_Cmp(); |
2256 Node* idx = cmp->in(1); | |
2257 assert(!invar.is_invariant(idx), "index is variant"); | |
1763 | 2258 assert(cmp->in(2)->Opcode() == Op_LoadRange || OptimizeFill, "must be"); |
2259 Node* rng = cmp->in(2); | |
2260 assert(invar.is_invariant(rng), "range must be invariant"); | |
1172 | 2261 int scale = 1; |
2262 Node* offset = zero; | |
2263 bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset); | |
2264 assert(ok, "must be index expression"); | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2265 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2266 Node* init = cl->init_trip(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2267 Node* limit = cl->limit(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2268 Node* stride = cl->stride(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2269 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2270 // Build if's for the upper and lower bound tests. The |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2271 // lower_bound test will dominate the upper bound test and all |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2272 // cloned or created nodes will use the lower bound test as |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2273 // their declared control. |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2274 ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2275 ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2276 assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate"); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2277 Node *ctrl = lower_bound_proj->in(0)->as_If()->in(0); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2278 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2279 // Perform cloning to keep Invariance state correct since the |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2280 // late schedule will place invariant things in the loop. |
1763 | 2281 rng = invar.clone(rng, ctrl); |
1172 | 2282 if (offset && offset != zero) { |
2283 assert(invar.is_invariant(offset), "offset must be loop invariant"); | |
2284 offset = invar.clone(offset, ctrl); | |
2285 } | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2286 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2287 // Test the lower bound |
1763 | 2288 Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, false); |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2289 IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2290 _igvn.hash_delete(lower_bound_iff); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2291 lower_bound_iff->set_req(1, lower_bound_bol); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2292 if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx); |
1172 | 2293 |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2294 // Test the upper bound |
1763 | 2295 Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, true); |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2296 IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2297 _igvn.hash_delete(upper_bound_iff); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2298 upper_bound_iff->set_req(1, upper_bound_bol); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2299 if (TraceLoopPredicate) tty->print_cr("upper bound check if: %d", lower_bound_iff->_idx); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2300 |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2301 // Fall through into rest of the clean up code which will move |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2302 // any dependent nodes onto the upper bound test. |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2303 new_predicate_proj = upper_bound_proj; |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2304 } else { |
1172 | 2305 // The other proj of the "iff" is a uncommon trap projection, and we can assume |
2306 // the other proj will not be executed ("executed" means uct raised). | |
2307 continue; | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2308 } |
1172 | 2309 |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2310 // Success - attach condition (new_predicate_bol) to predicate if |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2311 invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate |
1172 | 2312 |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2313 // Eliminate the old if in the loop body |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2314 _igvn.hash_delete(iff); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2315 iff->set_req(1, proj->is_IfFalse() ? cond_false : cond_true); |
1172 | 2316 |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2317 Node* ctrl = new_predicate_proj; // new control |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2318 ProjNode* dp = proj; // old control |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2319 assert(get_loop(dp) == loop, "guaranteed at the time of collecting proj"); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2320 // Find nodes (depends only on the test) off the surviving projection; |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2321 // move them outside the loop with the control of proj_clone |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2322 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2323 Node* cd = dp->fast_out(i); // Control-dependent node |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2324 if (cd->depends_only_on_test()) { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2325 assert(cd->in(0) == dp, ""); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2326 _igvn.hash_delete(cd); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2327 cd->set_req(0, ctrl); // ctrl, not NULL |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2328 set_early_ctrl(cd); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2329 _igvn._worklist.push(cd); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2330 IdealLoopTree *new_loop = get_loop(get_ctrl(cd)); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2331 if (new_loop != loop) { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2332 if (!loop->_child) loop->_body.yank(cd); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2333 if (!new_loop->_child ) new_loop->_body.push(cd); |
1172 | 2334 } |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2335 --i; |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2336 --imax; |
1172 | 2337 } |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2338 } |
1172 | 2339 |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2340 hoisted = true; |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2341 C->set_major_progress(); |
1172 | 2342 } // end while |
2343 | |
2344 #ifndef PRODUCT | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2345 // report that the loop predication has been actually performed |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2346 // for this loop |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2347 if (TraceLoopPredicate && hoisted) { |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2348 tty->print("Loop Predication Performed:"); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2349 loop->dump_head(); |
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1280
diff
changeset
|
2350 } |
1172 | 2351 #endif |
2352 | |
2353 return hoisted; | |
2354 } | |
2355 | |
2356 //------------------------------loop_predication-------------------------------- | |
2357 // driver routine for loop predication optimization | |
2358 bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) { | |
2359 bool hoisted = false; | |
2360 // Recursively promote predicates | |
2361 if ( _child ) { | |
2362 hoisted = _child->loop_predication( phase); | |
2363 } | |
2364 | |
2365 // self | |
2366 if (!_irreducible && !tail()->is_top()) { | |
2367 hoisted |= phase->loop_predication_impl(this); | |
2368 } | |
2369 | |
2370 if ( _next ) { //sibling | |
2371 hoisted |= _next->loop_predication( phase); | |
2372 } | |
2373 | |
2374 return hoisted; | |
2375 } | |
1763 | 2376 |
2377 | |
2378 // Process all the loops in the loop tree and replace any fill | |
2379 // patterns with an intrisc version. | |
2380 bool PhaseIdealLoop::do_intrinsify_fill() { | |
2381 bool changed = false; | |
2382 for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { | |
2383 IdealLoopTree* lpt = iter.current(); | |
2384 changed |= intrinsify_fill(lpt); | |
2385 } | |
2386 return changed; | |
2387 } | |
2388 | |
2389 | |
2390 // Examine an inner loop looking for a a single store of an invariant | |
2391 // value in a unit stride loop, | |
2392 bool PhaseIdealLoop::match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value, | |
2393 Node*& shift, Node*& con) { | |
2394 const char* msg = NULL; | |
2395 Node* msg_node = NULL; | |
2396 | |
2397 store_value = NULL; | |
2398 con = NULL; | |
2399 shift = NULL; | |
2400 | |
2401 // Process the loop looking for stores. If there are multiple | |
2402 // stores or extra control flow give at this point. | |
2403 CountedLoopNode* head = lpt->_head->as_CountedLoop(); | |
2404 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) { | |
2405 Node* n = lpt->_body.at(i); | |
2406 if (n->outcnt() == 0) continue; // Ignore dead | |
2407 if (n->is_Store()) { | |
2408 if (store != NULL) { | |
2409 msg = "multiple stores"; | |
2410 break; | |
2411 } | |
2412 int opc = n->Opcode(); | |
2413 if (opc == Op_StoreP || opc == Op_StoreN || opc == Op_StoreCM) { | |
2414 msg = "oop fills not handled"; | |
2415 break; | |
2416 } | |
2417 Node* value = n->in(MemNode::ValueIn); | |
2418 if (!lpt->is_invariant(value)) { | |
2419 msg = "variant store value"; | |
1785
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2420 } else if (!_igvn.type(n->in(MemNode::Address))->isa_aryptr()) { |
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2421 msg = "not array address"; |
1763 | 2422 } |
2423 store = n; | |
2424 store_value = value; | |
2425 } else if (n->is_If() && n != head->loopexit()) { | |
2426 msg = "extra control flow"; | |
2427 msg_node = n; | |
2428 } | |
2429 } | |
2430 | |
2431 if (store == NULL) { | |
2432 // No store in loop | |
2433 return false; | |
2434 } | |
2435 | |
2436 if (msg == NULL && head->stride_con() != 1) { | |
2437 // could handle negative strides too | |
2438 if (head->stride_con() < 0) { | |
2439 msg = "negative stride"; | |
2440 } else { | |
2441 msg = "non-unit stride"; | |
2442 } | |
2443 } | |
2444 | |
2445 if (msg == NULL && !store->in(MemNode::Address)->is_AddP()) { | |
2446 msg = "can't handle store address"; | |
2447 msg_node = store->in(MemNode::Address); | |
2448 } | |
2449 | |
1813
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2450 if (msg == NULL && |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2451 (!store->in(MemNode::Memory)->is_Phi() || |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2452 store->in(MemNode::Memory)->in(LoopNode::LoopBackControl) != store)) { |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2453 msg = "store memory isn't proper phi"; |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2454 msg_node = store->in(MemNode::Memory); |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2455 } |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2456 |
1763 | 2457 // Make sure there is an appropriate fill routine |
2458 BasicType t = store->as_Mem()->memory_type(); | |
2459 const char* fill_name; | |
2460 if (msg == NULL && | |
2461 StubRoutines::select_fill_function(t, false, fill_name) == NULL) { | |
2462 msg = "unsupported store"; | |
2463 msg_node = store; | |
2464 } | |
2465 | |
2466 if (msg != NULL) { | |
2467 #ifndef PRODUCT | |
2468 if (TraceOptimizeFill) { | |
2469 tty->print_cr("not fill intrinsic candidate: %s", msg); | |
2470 if (msg_node != NULL) msg_node->dump(); | |
2471 } | |
2472 #endif | |
2473 return false; | |
2474 } | |
2475 | |
2476 // Make sure the address expression can be handled. It should be | |
2477 // head->phi * elsize + con. head->phi might have a ConvI2L. | |
2478 Node* elements[4]; | |
2479 Node* conv = NULL; | |
1785
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2480 bool found_index = false; |
1763 | 2481 int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements)); |
2482 for (int e = 0; e < count; e++) { | |
2483 Node* n = elements[e]; | |
2484 if (n->is_Con() && con == NULL) { | |
2485 con = n; | |
2486 } else if (n->Opcode() == Op_LShiftX && shift == NULL) { | |
2487 Node* value = n->in(1); | |
2488 #ifdef _LP64 | |
2489 if (value->Opcode() == Op_ConvI2L) { | |
2490 conv = value; | |
2491 value = value->in(1); | |
2492 } | |
2493 #endif | |
2494 if (value != head->phi()) { | |
2495 msg = "unhandled shift in address"; | |
2496 } else { | |
1785
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2497 found_index = true; |
1763 | 2498 shift = n; |
2499 assert(type2aelembytes(store->as_Mem()->memory_type(), true) == 1 << shift->in(2)->get_int(), "scale should match"); | |
2500 } | |
2501 } else if (n->Opcode() == Op_ConvI2L && conv == NULL) { | |
2502 if (n->in(1) == head->phi()) { | |
1785
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2503 found_index = true; |
1763 | 2504 conv = n; |
2505 } else { | |
2506 msg = "unhandled input to ConvI2L"; | |
2507 } | |
2508 } else if (n == head->phi()) { | |
2509 // no shift, check below for allowed cases | |
1785
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2510 found_index = true; |
1763 | 2511 } else { |
2512 msg = "unhandled node in address"; | |
2513 msg_node = n; | |
2514 } | |
2515 } | |
2516 | |
2517 if (count == -1) { | |
2518 msg = "malformed address expression"; | |
2519 msg_node = store; | |
2520 } | |
2521 | |
1785
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2522 if (!found_index) { |
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2523 msg = "missing use of index"; |
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2524 } |
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2525 |
1763 | 2526 // byte sized items won't have a shift |
2527 if (msg == NULL && shift == NULL && t != T_BYTE && t != T_BOOLEAN) { | |
2528 msg = "can't find shift"; | |
2529 msg_node = store; | |
2530 } | |
2531 | |
2532 if (msg != NULL) { | |
2533 #ifndef PRODUCT | |
2534 if (TraceOptimizeFill) { | |
2535 tty->print_cr("not fill intrinsic: %s", msg); | |
2536 if (msg_node != NULL) msg_node->dump(); | |
2537 } | |
2538 #endif | |
2539 return false; | |
2540 } | |
2541 | |
2542 // No make sure all the other nodes in the loop can be handled | |
2543 VectorSet ok(Thread::current()->resource_area()); | |
2544 | |
2545 // store related values are ok | |
2546 ok.set(store->_idx); | |
2547 ok.set(store->in(MemNode::Memory)->_idx); | |
2548 | |
2549 // Loop structure is ok | |
2550 ok.set(head->_idx); | |
2551 ok.set(head->loopexit()->_idx); | |
2552 ok.set(head->phi()->_idx); | |
2553 ok.set(head->incr()->_idx); | |
2554 ok.set(head->loopexit()->cmp_node()->_idx); | |
2555 ok.set(head->loopexit()->in(1)->_idx); | |
2556 | |
2557 // Address elements are ok | |
2558 if (con) ok.set(con->_idx); | |
2559 if (shift) ok.set(shift->_idx); | |
2560 if (conv) ok.set(conv->_idx); | |
2561 | |
2562 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) { | |
2563 Node* n = lpt->_body.at(i); | |
2564 if (n->outcnt() == 0) continue; // Ignore dead | |
2565 if (ok.test(n->_idx)) continue; | |
2566 // Backedge projection is ok | |
2567 if (n->is_IfTrue() && n->in(0) == head->loopexit()) continue; | |
2568 if (!n->is_AddP()) { | |
2569 msg = "unhandled node"; | |
2570 msg_node = n; | |
2571 break; | |
2572 } | |
2573 } | |
2574 | |
2575 // Make sure no unexpected values are used outside the loop | |
2576 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) { | |
2577 Node* n = lpt->_body.at(i); | |
2578 // These values can be replaced with other nodes if they are used | |
2579 // outside the loop. | |
1813
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2580 if (n == store || n == head->loopexit() || n == head->incr() || n == store->in(MemNode::Memory)) continue; |
1763 | 2581 for (SimpleDUIterator iter(n); iter.has_next(); iter.next()) { |
2582 Node* use = iter.get(); | |
2583 if (!lpt->_body.contains(use)) { | |
2584 msg = "node is used outside loop"; | |
2585 // lpt->_body.dump(); | |
2586 msg_node = n; | |
2587 break; | |
2588 } | |
2589 } | |
2590 } | |
2591 | |
2592 #ifdef ASSERT | |
2593 if (TraceOptimizeFill) { | |
2594 if (msg != NULL) { | |
2595 tty->print_cr("no fill intrinsic: %s", msg); | |
2596 if (msg_node != NULL) msg_node->dump(); | |
2597 } else { | |
2598 tty->print_cr("fill intrinsic for:"); | |
2599 } | |
2600 store->dump(); | |
2601 if (Verbose) { | |
2602 lpt->_body.dump(); | |
2603 } | |
2604 } | |
2605 #endif | |
2606 | |
2607 return msg == NULL; | |
2608 } | |
2609 | |
2610 | |
2611 | |
2612 bool PhaseIdealLoop::intrinsify_fill(IdealLoopTree* lpt) { | |
2613 // Only for counted inner loops | |
2614 if (!lpt->is_counted() || !lpt->is_inner()) { | |
2615 return false; | |
2616 } | |
2617 | |
2618 // Must have constant stride | |
2619 CountedLoopNode* head = lpt->_head->as_CountedLoop(); | |
2620 if (!head->stride_is_con() || !head->is_normal_loop()) { | |
2621 return false; | |
2622 } | |
2623 | |
2624 // Check that the body only contains a store of a loop invariant | |
2625 // value that is indexed by the loop phi. | |
2626 Node* store = NULL; | |
2627 Node* store_value = NULL; | |
2628 Node* shift = NULL; | |
2629 Node* offset = NULL; | |
2630 if (!match_fill_loop(lpt, store, store_value, shift, offset)) { | |
2631 return false; | |
2632 } | |
2633 | |
2634 // Now replace the whole loop body by a call to a fill routine that | |
2635 // covers the same region as the loop. | |
2636 Node* base = store->in(MemNode::Address)->as_AddP()->in(AddPNode::Base); | |
2637 | |
2638 // Build an expression for the beginning of the copy region | |
2639 Node* index = head->init_trip(); | |
2640 #ifdef _LP64 | |
2641 index = new (C, 2) ConvI2LNode(index); | |
2642 _igvn.register_new_node_with_optimizer(index); | |
2643 #endif | |
2644 if (shift != NULL) { | |
2645 // byte arrays don't require a shift but others do. | |
2646 index = new (C, 3) LShiftXNode(index, shift->in(2)); | |
2647 _igvn.register_new_node_with_optimizer(index); | |
2648 } | |
2649 index = new (C, 4) AddPNode(base, base, index); | |
2650 _igvn.register_new_node_with_optimizer(index); | |
2651 Node* from = new (C, 4) AddPNode(base, index, offset); | |
2652 _igvn.register_new_node_with_optimizer(from); | |
2653 // Compute the number of elements to copy | |
2654 Node* len = new (C, 3) SubINode(head->limit(), head->init_trip()); | |
2655 _igvn.register_new_node_with_optimizer(len); | |
2656 | |
2657 BasicType t = store->as_Mem()->memory_type(); | |
2658 bool aligned = false; | |
2659 if (offset != NULL && head->init_trip()->is_Con()) { | |
2660 int element_size = type2aelembytes(t); | |
2661 aligned = (offset->find_intptr_t_type()->get_con() + head->init_trip()->get_int() * element_size) % HeapWordSize == 0; | |
2662 } | |
2663 | |
2664 // Build a call to the fill routine | |
2665 const char* fill_name; | |
2666 address fill = StubRoutines::select_fill_function(t, aligned, fill_name); | |
2667 assert(fill != NULL, "what?"); | |
2668 | |
2669 // Convert float/double to int/long for fill routines | |
2670 if (t == T_FLOAT) { | |
2671 store_value = new (C, 2) MoveF2INode(store_value); | |
2672 _igvn.register_new_node_with_optimizer(store_value); | |
2673 } else if (t == T_DOUBLE) { | |
2674 store_value = new (C, 2) MoveD2LNode(store_value); | |
2675 _igvn.register_new_node_with_optimizer(store_value); | |
2676 } | |
2677 | |
2678 Node* mem_phi = store->in(MemNode::Memory); | |
2679 Node* result_ctrl; | |
2680 Node* result_mem; | |
2681 const TypeFunc* call_type = OptoRuntime::array_fill_Type(); | |
2682 int size = call_type->domain()->cnt(); | |
2683 CallLeafNode *call = new (C, size) CallLeafNoFPNode(call_type, fill, | |
2684 fill_name, TypeAryPtr::get_array_body_type(t)); | |
2685 call->init_req(TypeFunc::Parms+0, from); | |
2686 call->init_req(TypeFunc::Parms+1, store_value); | |
1844
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2687 #ifdef _LP64 |
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2688 len = new (C, 2) ConvI2LNode(len); |
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2689 _igvn.register_new_node_with_optimizer(len); |
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2690 #endif |
1763 | 2691 call->init_req(TypeFunc::Parms+2, len); |
1844
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2692 #ifdef _LP64 |
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2693 call->init_req(TypeFunc::Parms+3, C->top()); |
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2694 #endif |
1763 | 2695 call->init_req( TypeFunc::Control, head->init_control()); |
2696 call->init_req( TypeFunc::I_O , C->top() ) ; // does no i/o | |
2697 call->init_req( TypeFunc::Memory , mem_phi->in(LoopNode::EntryControl) ); | |
2698 call->init_req( TypeFunc::ReturnAdr, C->start()->proj_out(TypeFunc::ReturnAdr) ); | |
2699 call->init_req( TypeFunc::FramePtr, C->start()->proj_out(TypeFunc::FramePtr) ); | |
2700 _igvn.register_new_node_with_optimizer(call); | |
2701 result_ctrl = new (C, 1) ProjNode(call,TypeFunc::Control); | |
2702 _igvn.register_new_node_with_optimizer(result_ctrl); | |
2703 result_mem = new (C, 1) ProjNode(call,TypeFunc::Memory); | |
2704 _igvn.register_new_node_with_optimizer(result_mem); | |
2705 | |
2706 // If this fill is tightly coupled to an allocation and overwrites | |
2707 // the whole body, allow it to take over the zeroing. | |
2708 AllocateNode* alloc = AllocateNode::Ideal_allocation(base, this); | |
2709 if (alloc != NULL && alloc->is_AllocateArray()) { | |
2710 Node* length = alloc->as_AllocateArray()->Ideal_length(); | |
2711 if (head->limit() == length && | |
2712 head->init_trip() == _igvn.intcon(0)) { | |
2713 if (TraceOptimizeFill) { | |
2714 tty->print_cr("Eliminated zeroing in allocation"); | |
2715 } | |
2716 alloc->maybe_set_complete(&_igvn); | |
2717 } else { | |
2718 #ifdef ASSERT | |
2719 if (TraceOptimizeFill) { | |
2720 tty->print_cr("filling array but bounds don't match"); | |
2721 alloc->dump(); | |
2722 head->init_trip()->dump(); | |
2723 head->limit()->dump(); | |
2724 length->dump(); | |
2725 } | |
2726 #endif | |
2727 } | |
2728 } | |
2729 | |
2730 // Redirect the old control and memory edges that are outside the loop. | |
2731 Node* exit = head->loopexit()->proj_out(0); | |
1813
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2732 // Sometimes the memory phi of the head is used as the outgoing |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2733 // state of the loop. It's safe in this case to replace it with the |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2734 // result_mem. |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2735 _igvn.replace_node(store->in(MemNode::Memory), result_mem); |
1763 | 2736 _igvn.replace_node(exit, result_ctrl); |
2737 _igvn.replace_node(store, result_mem); | |
2738 // Any uses the increment outside of the loop become the loop limit. | |
2739 _igvn.replace_node(head->incr(), head->limit()); | |
2740 | |
2741 // Disconnect the head from the loop. | |
2742 for (uint i = 0; i < lpt->_body.size(); i++) { | |
2743 Node* n = lpt->_body.at(i); | |
2744 _igvn.replace_node(n, C->top()); | |
2745 } | |
2746 | |
2747 return true; | |
2748 } |