Mercurial > hg > truffle
annotate src/share/vm/opto/loopTransform.cpp @ 4710:41406797186b
7113012: G1: rename not-fully-young GCs as "mixed"
Summary: Renamed partially-young GCs as mixed and fully-young GCs as young. Change all external output that includes those terms (GC log and GC ergo log) as well as any comments, fields, methods, etc. The changeset also includes very minor code tidying up (added some curly brackets).
Reviewed-by: johnc, brutisso
author | tonyp |
---|---|
date | Fri, 16 Dec 2011 02:14:27 -0500 |
parents | da6a29fb0da5 |
children | 5e990493719e |
rev | line source |
---|---|
0 | 1 /* |
2426
1d1603768966
7010070: Update all 2010 Oracle-changed OpenJDK files to have the proper copyright dates - second pass
trims
parents:
2417
diff
changeset
|
2 * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "compiler/compileLog.hpp" | |
27 #include "memory/allocation.inline.hpp" | |
28 #include "opto/addnode.hpp" | |
29 #include "opto/callnode.hpp" | |
30 #include "opto/connode.hpp" | |
31 #include "opto/divnode.hpp" | |
32 #include "opto/loopnode.hpp" | |
33 #include "opto/mulnode.hpp" | |
34 #include "opto/rootnode.hpp" | |
35 #include "opto/runtime.hpp" | |
36 #include "opto/subnode.hpp" | |
0 | 37 |
38 //------------------------------is_loop_exit----------------------------------- | |
39 // Given an IfNode, return the loop-exiting projection or NULL if both | |
40 // arms remain in the loop. | |
41 Node *IdealLoopTree::is_loop_exit(Node *iff) const { | |
42 if( iff->outcnt() != 2 ) return NULL; // Ignore partially dead tests | |
43 PhaseIdealLoop *phase = _phase; | |
44 // Test is an IfNode, has 2 projections. If BOTH are in the loop | |
45 // we need loop unswitching instead of peeling. | |
46 if( !is_member(phase->get_loop( iff->raw_out(0) )) ) | |
47 return iff->raw_out(0); | |
48 if( !is_member(phase->get_loop( iff->raw_out(1) )) ) | |
49 return iff->raw_out(1); | |
50 return NULL; | |
51 } | |
52 | |
53 | |
54 //============================================================================= | |
55 | |
56 | |
57 //------------------------------record_for_igvn---------------------------- | |
58 // Put loop body on igvn work list | |
59 void IdealLoopTree::record_for_igvn() { | |
60 for( uint i = 0; i < _body.size(); i++ ) { | |
61 Node *n = _body.at(i); | |
62 _phase->_igvn._worklist.push(n); | |
63 } | |
64 } | |
65 | |
2465 | 66 //------------------------------compute_exact_trip_count----------------------- |
67 // Compute loop exact trip count if possible. Do not recalculate trip count for | |
68 // split loops (pre-main-post) which have their limits and inits behind Opaque node. | |
69 void IdealLoopTree::compute_exact_trip_count( PhaseIdealLoop *phase ) { | |
70 if (!_head->as_Loop()->is_valid_counted_loop()) { | |
71 return; | |
72 } | |
73 CountedLoopNode* cl = _head->as_CountedLoop(); | |
74 // Trip count may become nonexact for iteration split loops since | |
75 // RCE modifies limits. Note, _trip_count value is not reset since | |
76 // it is used to limit unrolling of main loop. | |
77 cl->set_nonexact_trip_count(); | |
78 | |
79 // Loop's test should be part of loop. | |
80 if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue)))) | |
81 return; // Infinite loop | |
82 | |
83 #ifdef ASSERT | |
84 BoolTest::mask bt = cl->loopexit()->test_trip(); | |
85 assert(bt == BoolTest::lt || bt == BoolTest::gt || | |
3782 | 86 bt == BoolTest::ne, "canonical test is expected"); |
2465 | 87 #endif |
88 | |
89 Node* init_n = cl->init_trip(); | |
90 Node* limit_n = cl->limit(); | |
91 if (init_n != NULL && init_n->is_Con() && | |
92 limit_n != NULL && limit_n->is_Con()) { | |
93 // Use longs to avoid integer overflow. | |
94 int stride_con = cl->stride_con(); | |
95 long init_con = cl->init_trip()->get_int(); | |
96 long limit_con = cl->limit()->get_int(); | |
97 int stride_m = stride_con - (stride_con > 0 ? 1 : -1); | |
98 long trip_count = (limit_con - init_con + stride_m)/stride_con; | |
99 if (trip_count > 0 && (julong)trip_count < (julong)max_juint) { | |
100 // Set exact trip count. | |
101 cl->set_exact_trip_count((uint)trip_count); | |
102 } | |
103 } | |
104 } | |
105 | |
0 | 106 //------------------------------compute_profile_trip_cnt---------------------------- |
107 // Compute loop trip count from profile data as | |
108 // (backedge_count + loop_exit_count) / loop_exit_count | |
109 void IdealLoopTree::compute_profile_trip_cnt( PhaseIdealLoop *phase ) { | |
110 if (!_head->is_CountedLoop()) { | |
111 return; | |
112 } | |
113 CountedLoopNode* head = _head->as_CountedLoop(); | |
114 if (head->profile_trip_cnt() != COUNT_UNKNOWN) { | |
115 return; // Already computed | |
116 } | |
117 float trip_cnt = (float)max_jint; // default is big | |
118 | |
119 Node* back = head->in(LoopNode::LoopBackControl); | |
120 while (back != head) { | |
121 if ((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) && | |
122 back->in(0) && | |
123 back->in(0)->is_If() && | |
124 back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN && | |
125 back->in(0)->as_If()->_prob != PROB_UNKNOWN) { | |
126 break; | |
127 } | |
128 back = phase->idom(back); | |
129 } | |
130 if (back != head) { | |
131 assert((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) && | |
132 back->in(0), "if-projection exists"); | |
133 IfNode* back_if = back->in(0)->as_If(); | |
134 float loop_back_cnt = back_if->_fcnt * back_if->_prob; | |
135 | |
136 // Now compute a loop exit count | |
137 float loop_exit_cnt = 0.0f; | |
138 for( uint i = 0; i < _body.size(); i++ ) { | |
139 Node *n = _body[i]; | |
140 if( n->is_If() ) { | |
141 IfNode *iff = n->as_If(); | |
142 if( iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN ) { | |
143 Node *exit = is_loop_exit(iff); | |
144 if( exit ) { | |
145 float exit_prob = iff->_prob; | |
146 if (exit->Opcode() == Op_IfFalse) exit_prob = 1.0 - exit_prob; | |
147 if (exit_prob > PROB_MIN) { | |
148 float exit_cnt = iff->_fcnt * exit_prob; | |
149 loop_exit_cnt += exit_cnt; | |
150 } | |
151 } | |
152 } | |
153 } | |
154 } | |
155 if (loop_exit_cnt > 0.0f) { | |
156 trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt; | |
157 } else { | |
158 // No exit count so use | |
159 trip_cnt = loop_back_cnt; | |
160 } | |
161 } | |
162 #ifndef PRODUCT | |
163 if (TraceProfileTripCount) { | |
164 tty->print_cr("compute_profile_trip_cnt lp: %d cnt: %f\n", head->_idx, trip_cnt); | |
165 } | |
166 #endif | |
167 head->set_profile_trip_cnt(trip_cnt); | |
168 } | |
169 | |
170 //---------------------is_invariant_addition----------------------------- | |
171 // Return nonzero index of invariant operand for an Add or Sub | |
605 | 172 // of (nonconstant) invariant and variant values. Helper for reassociate_invariants. |
0 | 173 int IdealLoopTree::is_invariant_addition(Node* n, PhaseIdealLoop *phase) { |
174 int op = n->Opcode(); | |
175 if (op == Op_AddI || op == Op_SubI) { | |
176 bool in1_invar = this->is_invariant(n->in(1)); | |
177 bool in2_invar = this->is_invariant(n->in(2)); | |
178 if (in1_invar && !in2_invar) return 1; | |
179 if (!in1_invar && in2_invar) return 2; | |
180 } | |
181 return 0; | |
182 } | |
183 | |
184 //---------------------reassociate_add_sub----------------------------- | |
185 // Reassociate invariant add and subtract expressions: | |
186 // | |
187 // inv1 + (x + inv2) => ( inv1 + inv2) + x | |
188 // (x + inv2) + inv1 => ( inv1 + inv2) + x | |
189 // inv1 + (x - inv2) => ( inv1 - inv2) + x | |
190 // inv1 - (inv2 - x) => ( inv1 - inv2) + x | |
191 // (x + inv2) - inv1 => (-inv1 + inv2) + x | |
192 // (x - inv2) + inv1 => ( inv1 - inv2) + x | |
193 // (x - inv2) - inv1 => (-inv1 - inv2) + x | |
194 // inv1 + (inv2 - x) => ( inv1 + inv2) - x | |
195 // inv1 - (x - inv2) => ( inv1 + inv2) - x | |
196 // (inv2 - x) + inv1 => ( inv1 + inv2) - x | |
197 // (inv2 - x) - inv1 => (-inv1 + inv2) - x | |
198 // inv1 - (x + inv2) => ( inv1 - inv2) - x | |
199 // | |
200 Node* IdealLoopTree::reassociate_add_sub(Node* n1, PhaseIdealLoop *phase) { | |
201 if (!n1->is_Add() && !n1->is_Sub() || n1->outcnt() == 0) return NULL; | |
202 if (is_invariant(n1)) return NULL; | |
203 int inv1_idx = is_invariant_addition(n1, phase); | |
204 if (!inv1_idx) return NULL; | |
205 // Don't mess with add of constant (igvn moves them to expression tree root.) | |
206 if (n1->is_Add() && n1->in(2)->is_Con()) return NULL; | |
207 Node* inv1 = n1->in(inv1_idx); | |
208 Node* n2 = n1->in(3 - inv1_idx); | |
209 int inv2_idx = is_invariant_addition(n2, phase); | |
210 if (!inv2_idx) return NULL; | |
211 Node* x = n2->in(3 - inv2_idx); | |
212 Node* inv2 = n2->in(inv2_idx); | |
213 | |
214 bool neg_x = n2->is_Sub() && inv2_idx == 1; | |
215 bool neg_inv2 = n2->is_Sub() && inv2_idx == 2; | |
216 bool neg_inv1 = n1->is_Sub() && inv1_idx == 2; | |
217 if (n1->is_Sub() && inv1_idx == 1) { | |
218 neg_x = !neg_x; | |
219 neg_inv2 = !neg_inv2; | |
220 } | |
221 Node* inv1_c = phase->get_ctrl(inv1); | |
222 Node* inv2_c = phase->get_ctrl(inv2); | |
223 Node* n_inv1; | |
224 if (neg_inv1) { | |
225 Node *zero = phase->_igvn.intcon(0); | |
226 phase->set_ctrl(zero, phase->C->root()); | |
227 n_inv1 = new (phase->C, 3) SubINode(zero, inv1); | |
228 phase->register_new_node(n_inv1, inv1_c); | |
229 } else { | |
230 n_inv1 = inv1; | |
231 } | |
232 Node* inv; | |
233 if (neg_inv2) { | |
234 inv = new (phase->C, 3) SubINode(n_inv1, inv2); | |
235 } else { | |
236 inv = new (phase->C, 3) AddINode(n_inv1, inv2); | |
237 } | |
238 phase->register_new_node(inv, phase->get_early_ctrl(inv)); | |
239 | |
240 Node* addx; | |
241 if (neg_x) { | |
242 addx = new (phase->C, 3) SubINode(inv, x); | |
243 } else { | |
244 addx = new (phase->C, 3) AddINode(x, inv); | |
245 } | |
246 phase->register_new_node(addx, phase->get_ctrl(x)); | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
247 phase->_igvn.replace_node(n1, addx); |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
248 assert(phase->get_loop(phase->get_ctrl(n1)) == this, ""); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
249 _body.yank(n1); |
0 | 250 return addx; |
251 } | |
252 | |
253 //---------------------reassociate_invariants----------------------------- | |
254 // Reassociate invariant expressions: | |
255 void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) { | |
256 for (int i = _body.size() - 1; i >= 0; i--) { | |
257 Node *n = _body.at(i); | |
258 for (int j = 0; j < 5; j++) { | |
259 Node* nn = reassociate_add_sub(n, phase); | |
260 if (nn == NULL) break; | |
261 n = nn; // again | |
262 }; | |
263 } | |
264 } | |
265 | |
266 //------------------------------policy_peeling--------------------------------- | |
267 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can | |
268 // make some loop-invariant test (usually a null-check) happen before the loop. | |
269 bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const { | |
270 Node *test = ((IdealLoopTree*)this)->tail(); | |
271 int body_size = ((IdealLoopTree*)this)->_body.size(); | |
272 int uniq = phase->C->unique(); | |
273 // Peeling does loop cloning which can result in O(N^2) node construction | |
274 if( body_size > 255 /* Prevent overflow for large body_size */ | |
275 || (body_size * body_size + uniq > MaxNodeLimit) ) { | |
276 return false; // too large to safely clone | |
277 } | |
278 while( test != _head ) { // Scan till run off top of loop | |
279 if( test->is_If() ) { // Test? | |
280 Node *ctrl = phase->get_ctrl(test->in(1)); | |
281 if (ctrl->is_top()) | |
282 return false; // Found dead test on live IF? No peeling! | |
283 // Standard IF only has one input value to check for loop invariance | |
284 assert( test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added"); | |
285 // Condition is not a member of this loop? | |
286 if( !is_member(phase->get_loop(ctrl)) && | |
287 is_loop_exit(test) ) | |
288 return true; // Found reason to peel! | |
289 } | |
290 // Walk up dominators to loop _head looking for test which is | |
291 // executed on every path thru loop. | |
292 test = phase->idom(test); | |
293 } | |
294 return false; | |
295 } | |
296 | |
297 //------------------------------peeled_dom_test_elim--------------------------- | |
298 // If we got the effect of peeling, either by actually peeling or by making | |
299 // a pre-loop which must execute at least once, we can remove all | |
300 // loop-invariant dominated tests in the main body. | |
301 void PhaseIdealLoop::peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new ) { | |
302 bool progress = true; | |
303 while( progress ) { | |
304 progress = false; // Reset for next iteration | |
305 Node *prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail(); | |
306 Node *test = prev->in(0); | |
307 while( test != loop->_head ) { // Scan till run off top of loop | |
308 | |
309 int p_op = prev->Opcode(); | |
310 if( (p_op == Op_IfFalse || p_op == Op_IfTrue) && | |
311 test->is_If() && // Test? | |
312 !test->in(1)->is_Con() && // And not already obvious? | |
313 // Condition is not a member of this loop? | |
314 !loop->is_member(get_loop(get_ctrl(test->in(1))))){ | |
315 // Walk loop body looking for instances of this test | |
316 for( uint i = 0; i < loop->_body.size(); i++ ) { | |
317 Node *n = loop->_body.at(i); | |
318 if( n->is_If() && n->in(1) == test->in(1) /*&& n != loop->tail()->in(0)*/ ) { | |
319 // IfNode was dominated by version in peeled loop body | |
320 progress = true; | |
321 dominated_by( old_new[prev->_idx], n ); | |
322 } | |
323 } | |
324 } | |
325 prev = test; | |
326 test = idom(test); | |
327 } // End of scan tests in loop | |
328 | |
329 } // End of while( progress ) | |
330 } | |
331 | |
332 //------------------------------do_peeling------------------------------------- | |
333 // Peel the first iteration of the given loop. | |
334 // Step 1: Clone the loop body. The clone becomes the peeled iteration. | |
335 // The pre-loop illegally has 2 control users (old & new loops). | |
336 // Step 2: Make the old-loop fall-in edges point to the peeled iteration. | |
337 // Do this by making the old-loop fall-in edges act as if they came | |
338 // around the loopback from the prior iteration (follow the old-loop | |
339 // backedges) and then map to the new peeled iteration. This leaves | |
340 // the pre-loop with only 1 user (the new peeled iteration), but the | |
341 // peeled-loop backedge has 2 users. | |
342 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the | |
343 // extra backedge user. | |
2445 | 344 // |
345 // orig | |
346 // | |
347 // stmt1 | |
348 // | | |
349 // v | |
350 // loop predicate | |
351 // | | |
352 // v | |
353 // loop<----+ | |
354 // | | | |
355 // stmt2 | | |
356 // | | | |
357 // v | | |
358 // if ^ | |
359 // / \ | | |
360 // / \ | | |
361 // v v | | |
362 // false true | | |
363 // / \ | | |
364 // / ----+ | |
365 // | | |
366 // v | |
367 // exit | |
368 // | |
369 // | |
370 // after clone loop | |
371 // | |
372 // stmt1 | |
373 // | | |
374 // v | |
375 // loop predicate | |
376 // / \ | |
377 // clone / \ orig | |
378 // / \ | |
379 // / \ | |
380 // v v | |
381 // +---->loop clone loop<----+ | |
382 // | | | | | |
383 // | stmt2 clone stmt2 | | |
384 // | | | | | |
385 // | v v | | |
386 // ^ if clone If ^ | |
387 // | / \ / \ | | |
388 // | / \ / \ | | |
389 // | v v v v | | |
390 // | true false false true | | |
391 // | / \ / \ | | |
392 // +---- \ / ----+ | |
393 // \ / | |
394 // 1v v2 | |
395 // region | |
396 // | | |
397 // v | |
398 // exit | |
399 // | |
400 // | |
401 // after peel and predicate move | |
402 // | |
403 // stmt1 | |
404 // / | |
405 // / | |
406 // clone / orig | |
407 // / | |
408 // / +----------+ | |
409 // / | | | |
410 // / loop predicate | | |
411 // / | | | |
412 // v v | | |
413 // TOP-->loop clone loop<----+ | | |
414 // | | | | | |
415 // stmt2 clone stmt2 | | | |
416 // | | | ^ | |
417 // v v | | | |
418 // if clone If ^ | | |
419 // / \ / \ | | | |
420 // / \ / \ | | | |
421 // v v v v | | | |
422 // true false false true | | | |
423 // | \ / \ | | | |
424 // | \ / ----+ ^ | |
425 // | \ / | | |
426 // | 1v v2 | | |
427 // v region | | |
428 // | | | | |
429 // | v | | |
430 // | exit | | |
431 // | | | |
432 // +--------------->-----------------+ | |
433 // | |
434 // | |
435 // final graph | |
436 // | |
437 // stmt1 | |
438 // | | |
439 // v | |
440 // stmt2 clone | |
441 // | | |
442 // v | |
443 // if clone | |
444 // / | | |
445 // / | | |
446 // v v | |
447 // false true | |
448 // | | | |
449 // | v | |
450 // | loop predicate | |
451 // | | | |
452 // | v | |
453 // | loop<----+ | |
454 // | | | | |
455 // | stmt2 | | |
456 // | | | | |
457 // | v | | |
458 // v if ^ | |
459 // | / \ | | |
460 // | / \ | | |
461 // | v v | | |
462 // | false true | | |
463 // | | \ | | |
464 // v v --+ | |
465 // region | |
466 // | | |
467 // v | |
468 // exit | |
469 // | |
0 | 470 void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) { |
471 | |
472 C->set_major_progress(); | |
473 // Peeling a 'main' loop in a pre/main/post situation obfuscates the | |
474 // 'pre' loop from the main and the 'pre' can no longer have it's | |
475 // iterations adjusted. Therefore, we need to declare this loop as | |
476 // no longer a 'main' loop; it will need new pre and post loops before | |
477 // we can do further RCE. | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
478 #ifndef PRODUCT |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
479 if (TraceLoopOpts) { |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
480 tty->print("Peel "); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
481 loop->dump_head(); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
482 } |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
483 #endif |
2445 | 484 Node* head = loop->_head; |
485 bool counted_loop = head->is_CountedLoop(); | |
486 if (counted_loop) { | |
487 CountedLoopNode *cl = head->as_CountedLoop(); | |
0 | 488 assert(cl->trip_count() > 0, "peeling a fully unrolled loop"); |
489 cl->set_trip_count(cl->trip_count() - 1); | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
490 if (cl->is_main_loop()) { |
0 | 491 cl->set_normal_loop(); |
492 #ifndef PRODUCT | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
493 if (PrintOpto && VerifyLoopOptimizations) { |
0 | 494 tty->print("Peeling a 'main' loop; resetting to 'normal' "); |
495 loop->dump_head(); | |
496 } | |
497 #endif | |
498 } | |
499 } | |
2445 | 500 Node* entry = head->in(LoopNode::EntryControl); |
0 | 501 |
502 // Step 1: Clone the loop body. The clone becomes the peeled iteration. | |
503 // The pre-loop illegally has 2 control users (old & new loops). | |
2445 | 504 clone_loop( loop, old_new, dom_depth(head) ); |
0 | 505 |
506 // Step 2: Make the old-loop fall-in edges point to the peeled iteration. | |
507 // Do this by making the old-loop fall-in edges act as if they came | |
508 // around the loopback from the prior iteration (follow the old-loop | |
509 // backedges) and then map to the new peeled iteration. This leaves | |
510 // the pre-loop with only 1 user (the new peeled iteration), but the | |
511 // peeled-loop backedge has 2 users. | |
3845
c96c3eb1efae
7068051: SIGSEGV in PhaseIdealLoop::build_loop_late_post
kvn
parents:
3788
diff
changeset
|
512 Node* new_entry = old_new[head->in(LoopNode::LoopBackControl)->_idx]; |
2445 | 513 _igvn.hash_delete(head); |
3845
c96c3eb1efae
7068051: SIGSEGV in PhaseIdealLoop::build_loop_late_post
kvn
parents:
3788
diff
changeset
|
514 head->set_req(LoopNode::EntryControl, new_entry); |
2445 | 515 for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { |
516 Node* old = head->fast_out(j); | |
517 if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) { | |
3845
c96c3eb1efae
7068051: SIGSEGV in PhaseIdealLoop::build_loop_late_post
kvn
parents:
3788
diff
changeset
|
518 Node* new_exit_value = old_new[old->in(LoopNode::LoopBackControl)->_idx]; |
2445 | 519 if (!new_exit_value ) // Backedge value is ALSO loop invariant? |
0 | 520 // Then loop body backedge value remains the same. |
521 new_exit_value = old->in(LoopNode::LoopBackControl); | |
522 _igvn.hash_delete(old); | |
523 old->set_req(LoopNode::EntryControl, new_exit_value); | |
524 } | |
525 } | |
526 | |
527 | |
528 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the | |
529 // extra backedge user. | |
2445 | 530 Node* new_head = old_new[head->_idx]; |
531 _igvn.hash_delete(new_head); | |
532 new_head->set_req(LoopNode::LoopBackControl, C->top()); | |
533 for (DUIterator_Fast j2max, j2 = new_head->fast_outs(j2max); j2 < j2max; j2++) { | |
534 Node* use = new_head->fast_out(j2); | |
535 if (use->in(0) == new_head && use->req() == 3 && use->is_Phi()) { | |
0 | 536 _igvn.hash_delete(use); |
537 use->set_req(LoopNode::LoopBackControl, C->top()); | |
538 } | |
539 } | |
540 | |
541 | |
542 // Step 4: Correct dom-depth info. Set to loop-head depth. | |
2445 | 543 int dd = dom_depth(head); |
544 set_idom(head, head->in(1), dd); | |
0 | 545 for (uint j3 = 0; j3 < loop->_body.size(); j3++) { |
546 Node *old = loop->_body.at(j3); | |
547 Node *nnn = old_new[old->_idx]; | |
548 if (!has_ctrl(nnn)) | |
549 set_idom(nnn, idom(nnn), dd-1); | |
550 // While we're at it, remove any SafePoints from the peeled code | |
2445 | 551 if (old->Opcode() == Op_SafePoint) { |
0 | 552 Node *nnn = old_new[old->_idx]; |
553 lazy_replace(nnn,nnn->in(TypeFunc::Control)); | |
554 } | |
555 } | |
556 | |
557 // Now force out all loop-invariant dominating tests. The optimizer | |
558 // finds some, but we _know_ they are all useless. | |
559 peeled_dom_test_elim(loop,old_new); | |
560 | |
561 loop->record_for_igvn(); | |
562 } | |
563 | |
2453
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
564 #define EMPTY_LOOP_SIZE 7 // number of nodes in an empty loop |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
565 |
0 | 566 //------------------------------policy_maximally_unroll------------------------ |
2453
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
567 // Calculate exact loop trip count and return true if loop can be maximally |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
568 // unrolled. |
0 | 569 bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const { |
570 CountedLoopNode *cl = _head->as_CountedLoop(); | |
2412
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
571 assert(cl->is_normal_loop(), ""); |
2453
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
572 if (!cl->is_valid_counted_loop()) |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
573 return false; // Malformed counted loop |
0 | 574 |
2465 | 575 if (!cl->has_exact_trip_count()) { |
576 // Trip count is not exact. | |
0 | 577 return false; |
578 } | |
579 | |
2465 | 580 uint trip_count = cl->trip_count(); |
581 // Note, max_juint is used to indicate unknown trip count. | |
582 assert(trip_count > 1, "one iteration loop should be optimized out already"); | |
583 assert(trip_count < max_juint, "exact trip_count should be less than max_uint."); | |
0 | 584 |
585 // Real policy: if we maximally unroll, does it get too big? | |
586 // Allow the unrolled mess to get larger than standard loop | |
587 // size. After all, it will no longer be a loop. | |
588 uint body_size = _body.size(); | |
589 uint unroll_limit = (uint)LoopUnrollLimit * 4; | |
590 assert( (intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits"); | |
2412
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
591 if (trip_count > unroll_limit || body_size > unroll_limit) { |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
592 return false; |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
593 } |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
594 |
3345 | 595 // Fully unroll a loop with few iterations regardless next |
596 // conditions since following loop optimizations will split | |
597 // such loop anyway (pre-main-post). | |
598 if (trip_count <= 3) | |
599 return true; | |
600 | |
2453
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
601 // Take into account that after unroll conjoined heads and tails will fold, |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
602 // otherwise policy_unroll() may allow more unrolling than max unrolling. |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
603 uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count; |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
604 uint tst_body_size = (new_body_size - EMPTY_LOOP_SIZE) / trip_count + EMPTY_LOOP_SIZE; |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
605 if (body_size != tst_body_size) // Check for int overflow |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
606 return false; |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
607 if (new_body_size > unroll_limit || |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
608 // Unrolling can result in a large amount of node construction |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
609 new_body_size >= MaxNodeLimit - phase->C->unique()) { |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
610 return false; |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
611 } |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
612 |
2412
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
613 // Do not unroll a loop with String intrinsics code. |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
614 // String intrinsics are large and have loops. |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
615 for (uint k = 0; k < _body.size(); k++) { |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
616 Node* n = _body.at(k); |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
617 switch (n->Opcode()) { |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
618 case Op_StrComp: |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
619 case Op_StrEquals: |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
620 case Op_StrIndexOf: |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
621 case Op_AryEq: { |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
622 return false; |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
623 } |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
624 } // switch |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
625 } |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
626 |
2453
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
627 return true; // Do maximally unroll |
0 | 628 } |
629 | |
630 | |
3333 | 631 #define MAX_UNROLL 16 // maximum number of unrolls for main loop |
632 | |
0 | 633 //------------------------------policy_unroll---------------------------------- |
634 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if | |
635 // the loop is a CountedLoop and the body is small enough. | |
636 bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const { | |
637 | |
638 CountedLoopNode *cl = _head->as_CountedLoop(); | |
2412
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
639 assert(cl->is_normal_loop() || cl->is_main_loop(), ""); |
0 | 640 |
2453
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
641 if (!cl->is_valid_counted_loop()) |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
642 return false; // Malformed counted loop |
0 | 643 |
3345 | 644 // Protect against over-unrolling. |
645 // After split at least one iteration will be executed in pre-loop. | |
646 if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false; | |
0 | 647 |
3333 | 648 int future_unroll_ct = cl->unrolled_count() * 2; |
649 if (future_unroll_ct > MAX_UNROLL) return false; | |
2453
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
650 |
3333 | 651 // Check for initial stride being a small enough constant |
652 if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false; | |
0 | 653 |
654 // Don't unroll if the next round of unrolling would push us | |
655 // over the expected trip count of the loop. One is subtracted | |
656 // from the expected trip count because the pre-loop normally | |
657 // executes 1 iteration. | |
658 if (UnrollLimitForProfileCheck > 0 && | |
659 cl->profile_trip_cnt() != COUNT_UNKNOWN && | |
660 future_unroll_ct > UnrollLimitForProfileCheck && | |
661 (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) { | |
662 return false; | |
663 } | |
664 | |
665 // When unroll count is greater than LoopUnrollMin, don't unroll if: | |
666 // the residual iterations are more than 10% of the trip count | |
667 // and rounds of "unroll,optimize" are not making significant progress | |
668 // Progress defined as current size less than 20% larger than previous size. | |
669 if (UseSuperWord && cl->node_count_before_unroll() > 0 && | |
670 future_unroll_ct > LoopUnrollMin && | |
671 (future_unroll_ct - 1) * 10.0 > cl->profile_trip_cnt() && | |
672 1.2 * cl->node_count_before_unroll() < (double)_body.size()) { | |
673 return false; | |
674 } | |
675 | |
676 Node *init_n = cl->init_trip(); | |
677 Node *limit_n = cl->limit(); | |
3345 | 678 int stride_con = cl->stride_con(); |
0 | 679 // Non-constant bounds. |
680 // Protect against over-unrolling when init or/and limit are not constant | |
681 // (so that trip_count's init value is maxint) but iv range is known. | |
2412
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
682 if (init_n == NULL || !init_n->is_Con() || |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
683 limit_n == NULL || !limit_n->is_Con()) { |
0 | 684 Node* phi = cl->phi(); |
2412
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
685 if (phi != NULL) { |
0 | 686 assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi."); |
687 const TypeInt* iv_type = phase->_igvn.type(phi)->is_int(); | |
3345 | 688 int next_stride = stride_con * 2; // stride after this unroll |
2412
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
689 if (next_stride > 0) { |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
690 if (iv_type->_lo + next_stride <= iv_type->_lo || // overflow |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
691 iv_type->_lo + next_stride > iv_type->_hi) { |
0 | 692 return false; // over-unrolling |
693 } | |
2412
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
694 } else if (next_stride < 0) { |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
695 if (iv_type->_hi + next_stride >= iv_type->_hi || // overflow |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
696 iv_type->_hi + next_stride < iv_type->_lo) { |
0 | 697 return false; // over-unrolling |
698 } | |
699 } | |
700 } | |
701 } | |
702 | |
3345 | 703 // After unroll limit will be adjusted: new_limit = limit-stride. |
704 // Bailout if adjustment overflow. | |
705 const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int(); | |
706 if (stride_con > 0 && ((limit_type->_hi - stride_con) >= limit_type->_hi) || | |
707 stride_con < 0 && ((limit_type->_lo - stride_con) <= limit_type->_lo)) | |
708 return false; // overflow | |
709 | |
0 | 710 // Adjust body_size to determine if we unroll or not |
711 uint body_size = _body.size(); | |
3930
da6a29fb0da5
7054211: No loop unrolling done in jdk7b144 for a test update() while loop
kvn
parents:
3850
diff
changeset
|
712 // Key test to unroll loop in CRC32 java code |
da6a29fb0da5
7054211: No loop unrolling done in jdk7b144 for a test update() while loop
kvn
parents:
3850
diff
changeset
|
713 int xors_in_loop = 0; |
0 | 714 // Also count ModL, DivL and MulL which expand mightly |
2412
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
715 for (uint k = 0; k < _body.size(); k++) { |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
716 Node* n = _body.at(k); |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
717 switch (n->Opcode()) { |
3930
da6a29fb0da5
7054211: No loop unrolling done in jdk7b144 for a test update() while loop
kvn
parents:
3850
diff
changeset
|
718 case Op_XorI: xors_in_loop++; break; // CRC32 java code |
2412
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
719 case Op_ModL: body_size += 30; break; |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
720 case Op_DivL: body_size += 30; break; |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
721 case Op_MulL: body_size += 10; break; |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
722 case Op_StrComp: |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
723 case Op_StrEquals: |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
724 case Op_StrIndexOf: |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
725 case Op_AryEq: { |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
726 // Do not unroll a loop with String intrinsics code. |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
727 // String intrinsics are large and have loops. |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
728 return false; |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
729 } |
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
730 } // switch |
0 | 731 } |
732 | |
733 // Check for being too big | |
2412
f9424955eb18
7029152: Ideal nodes for String intrinsics miss memory edge optimization
kvn
parents:
2403
diff
changeset
|
734 if (body_size > (uint)LoopUnrollLimit) { |
3930
da6a29fb0da5
7054211: No loop unrolling done in jdk7b144 for a test update() while loop
kvn
parents:
3850
diff
changeset
|
735 if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; |
da6a29fb0da5
7054211: No loop unrolling done in jdk7b144 for a test update() while loop
kvn
parents:
3850
diff
changeset
|
736 // Normal case: loop too big |
0 | 737 return false; |
738 } | |
739 | |
740 // Unroll once! (Each trip will soon do double iterations) | |
741 return true; | |
742 } | |
743 | |
744 //------------------------------policy_align----------------------------------- | |
745 // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the | |
746 // expression that does the alignment. Note that only one array base can be | |
605 | 747 // aligned in a loop (unless the VM guarantees mutual alignment). Note that |
0 | 748 // if we vectorize short memory ops into longer memory ops, we may want to |
749 // increase alignment. | |
750 bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const { | |
751 return false; | |
752 } | |
753 | |
754 //------------------------------policy_range_check----------------------------- | |
755 // Return TRUE or FALSE if the loop should be range-check-eliminated. | |
756 // Actually we do iteration-splitting, a more powerful form of RCE. | |
757 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const { | |
3345 | 758 if (!RangeCheckElimination) return false; |
0 | 759 |
760 CountedLoopNode *cl = _head->as_CountedLoop(); | |
761 // If we unrolled with no intention of doing RCE and we later | |
762 // changed our minds, we got no pre-loop. Either we need to | |
763 // make a new pre-loop, or we gotta disallow RCE. | |
3345 | 764 if (cl->is_main_no_pre_loop()) return false; // Disallowed for now. |
0 | 765 Node *trip_counter = cl->phi(); |
766 | |
767 // Check loop body for tests of trip-counter plus loop-invariant vs | |
768 // loop-invariant. | |
3345 | 769 for (uint i = 0; i < _body.size(); i++) { |
0 | 770 Node *iff = _body[i]; |
3345 | 771 if (iff->Opcode() == Op_If) { // Test? |
0 | 772 |
773 // Comparing trip+off vs limit | |
774 Node *bol = iff->in(1); | |
3345 | 775 if (bol->req() != 2) continue; // dead constant test |
1172 | 776 if (!bol->is_Bool()) { |
777 assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only"); | |
778 continue; | |
779 } | |
3345 | 780 if (bol->as_Bool()->_test._test == BoolTest::ne) |
781 continue; // not RC | |
782 | |
0 | 783 Node *cmp = bol->in(1); |
784 | |
785 Node *rc_exp = cmp->in(1); | |
786 Node *limit = cmp->in(2); | |
787 | |
788 Node *limit_c = phase->get_ctrl(limit); | |
789 if( limit_c == phase->C->top() ) | |
790 return false; // Found dead test on live IF? No RCE! | |
791 if( is_member(phase->get_loop(limit_c) ) ) { | |
792 // Compare might have operands swapped; commute them | |
793 rc_exp = cmp->in(2); | |
794 limit = cmp->in(1); | |
795 limit_c = phase->get_ctrl(limit); | |
796 if( is_member(phase->get_loop(limit_c) ) ) | |
797 continue; // Both inputs are loop varying; cannot RCE | |
798 } | |
799 | |
800 if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) { | |
801 continue; | |
802 } | |
803 // Yeah! Found a test like 'trip+off vs limit' | |
804 // Test is an IfNode, has 2 projections. If BOTH are in the loop | |
805 // we need loop unswitching instead of iteration splitting. | |
806 if( is_loop_exit(iff) ) | |
807 return true; // Found reason to split iterations | |
808 } // End of is IF | |
809 } | |
810 | |
811 return false; | |
812 } | |
813 | |
814 //------------------------------policy_peel_only------------------------------- | |
815 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful | |
816 // for unrolling loops with NO array accesses. | |
817 bool IdealLoopTree::policy_peel_only( PhaseIdealLoop *phase ) const { | |
818 | |
819 for( uint i = 0; i < _body.size(); i++ ) | |
820 if( _body[i]->is_Mem() ) | |
821 return false; | |
822 | |
823 // No memory accesses at all! | |
824 return true; | |
825 } | |
826 | |
827 //------------------------------clone_up_backedge_goo-------------------------- | |
828 // If Node n lives in the back_ctrl block and cannot float, we clone a private | |
829 // version of n in preheader_ctrl block and return that, otherwise return n. | |
3788
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
830 Node *PhaseIdealLoop::clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones ) { |
0 | 831 if( get_ctrl(n) != back_ctrl ) return n; |
832 | |
3788
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
833 // Only visit once |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
834 if (visited.test_set(n->_idx)) { |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
835 Node *x = clones.find(n->_idx); |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
836 if (x != NULL) |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
837 return x; |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
838 return n; |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
839 } |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
840 |
0 | 841 Node *x = NULL; // If required, a clone of 'n' |
842 // Check for 'n' being pinned in the backedge. | |
843 if( n->in(0) && n->in(0) == back_ctrl ) { | |
3788
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
844 assert(clones.find(n->_idx) == NULL, "dead loop"); |
0 | 845 x = n->clone(); // Clone a copy of 'n' to preheader |
3788
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
846 clones.push(x, n->_idx); |
0 | 847 x->set_req( 0, preheader_ctrl ); // Fix x's control input to preheader |
848 } | |
849 | |
850 // Recursive fixup any other input edges into x. | |
851 // If there are no changes we can just return 'n', otherwise | |
852 // we need to clone a private copy and change it. | |
853 for( uint i = 1; i < n->req(); i++ ) { | |
3788
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
854 Node *g = clone_up_backedge_goo( back_ctrl, preheader_ctrl, n->in(i), visited, clones ); |
0 | 855 if( g != n->in(i) ) { |
3788
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
856 if( !x ) { |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
857 assert(clones.find(n->_idx) == NULL, "dead loop"); |
0 | 858 x = n->clone(); |
3788
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
859 clones.push(x, n->_idx); |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
860 } |
0 | 861 x->set_req(i, g); |
862 } | |
863 } | |
864 if( x ) { // x can legally float to pre-header location | |
865 register_new_node( x, preheader_ctrl ); | |
866 return x; | |
867 } else { // raise n to cover LCA of uses | |
868 set_ctrl( n, find_non_split_ctrl(back_ctrl->in(0)) ); | |
869 } | |
870 return n; | |
871 } | |
872 | |
873 //------------------------------insert_pre_post_loops-------------------------- | |
874 // Insert pre and post loops. If peel_only is set, the pre-loop can not have | |
875 // more iterations added. It acts as a 'peel' only, no lower-bound RCE, no | |
876 // alignment. Useful to unroll loops that do no array accesses. | |
877 void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) { | |
878 | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
879 #ifndef PRODUCT |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
880 if (TraceLoopOpts) { |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
881 if (peel_only) |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
882 tty->print("PeelMainPost "); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
883 else |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
884 tty->print("PreMainPost "); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
885 loop->dump_head(); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
886 } |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
887 #endif |
0 | 888 C->set_major_progress(); |
889 | |
890 // Find common pieces of the loop being guarded with pre & post loops | |
891 CountedLoopNode *main_head = loop->_head->as_CountedLoop(); | |
892 assert( main_head->is_normal_loop(), "" ); | |
893 CountedLoopEndNode *main_end = main_head->loopexit(); | |
894 assert( main_end->outcnt() == 2, "1 true, 1 false path only" ); | |
895 uint dd_main_head = dom_depth(main_head); | |
896 uint max = main_head->outcnt(); | |
897 | |
898 Node *pre_header= main_head->in(LoopNode::EntryControl); | |
899 Node *init = main_head->init_trip(); | |
900 Node *incr = main_end ->incr(); | |
901 Node *limit = main_end ->limit(); | |
902 Node *stride = main_end ->stride(); | |
903 Node *cmp = main_end ->cmp_node(); | |
904 BoolTest::mask b_test = main_end->test_trip(); | |
905 | |
906 // Need only 1 user of 'bol' because I will be hacking the loop bounds. | |
907 Node *bol = main_end->in(CountedLoopEndNode::TestValue); | |
908 if( bol->outcnt() != 1 ) { | |
909 bol = bol->clone(); | |
910 register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl)); | |
911 _igvn.hash_delete(main_end); | |
912 main_end->set_req(CountedLoopEndNode::TestValue, bol); | |
913 } | |
914 // Need only 1 user of 'cmp' because I will be hacking the loop bounds. | |
915 if( cmp->outcnt() != 1 ) { | |
916 cmp = cmp->clone(); | |
917 register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl)); | |
918 _igvn.hash_delete(bol); | |
919 bol->set_req(1, cmp); | |
920 } | |
921 | |
922 //------------------------------ | |
923 // Step A: Create Post-Loop. | |
924 Node* main_exit = main_end->proj_out(false); | |
925 assert( main_exit->Opcode() == Op_IfFalse, "" ); | |
926 int dd_main_exit = dom_depth(main_exit); | |
927 | |
928 // Step A1: Clone the loop body. The clone becomes the post-loop. The main | |
929 // loop pre-header illegally has 2 control users (old & new loops). | |
930 clone_loop( loop, old_new, dd_main_exit ); | |
931 assert( old_new[main_end ->_idx]->Opcode() == Op_CountedLoopEnd, "" ); | |
932 CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop(); | |
933 post_head->set_post_loop(main_head); | |
934 | |
400
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
935 // Reduce the post-loop trip count. |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
936 CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd(); |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
937 post_end->_prob = PROB_FAIR; |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
938 |
0 | 939 // Build the main-loop normal exit. |
940 IfFalseNode *new_main_exit = new (C, 1) IfFalseNode(main_end); | |
941 _igvn.register_new_node_with_optimizer( new_main_exit ); | |
942 set_idom(new_main_exit, main_end, dd_main_exit ); | |
943 set_loop(new_main_exit, loop->_parent); | |
944 | |
945 // Step A2: Build a zero-trip guard for the post-loop. After leaving the | |
946 // main-loop, the post-loop may not execute at all. We 'opaque' the incr | |
947 // (the main-loop trip-counter exit value) because we will be changing | |
948 // the exit value (via unrolling) so we cannot constant-fold away the zero | |
949 // trip guard until all unrolling is done. | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
113
diff
changeset
|
950 Node *zer_opaq = new (C, 2) Opaque1Node(C, incr); |
0 | 951 Node *zer_cmp = new (C, 3) CmpINode( zer_opaq, limit ); |
952 Node *zer_bol = new (C, 2) BoolNode( zer_cmp, b_test ); | |
953 register_new_node( zer_opaq, new_main_exit ); | |
954 register_new_node( zer_cmp , new_main_exit ); | |
955 register_new_node( zer_bol , new_main_exit ); | |
956 | |
957 // Build the IfNode | |
958 IfNode *zer_iff = new (C, 2) IfNode( new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN ); | |
959 _igvn.register_new_node_with_optimizer( zer_iff ); | |
960 set_idom(zer_iff, new_main_exit, dd_main_exit); | |
961 set_loop(zer_iff, loop->_parent); | |
962 | |
963 // Plug in the false-path, taken if we need to skip post-loop | |
964 _igvn.hash_delete( main_exit ); | |
965 main_exit->set_req(0, zer_iff); | |
966 _igvn._worklist.push(main_exit); | |
967 set_idom(main_exit, zer_iff, dd_main_exit); | |
968 set_idom(main_exit->unique_out(), zer_iff, dd_main_exit); | |
969 // Make the true-path, must enter the post loop | |
970 Node *zer_taken = new (C, 1) IfTrueNode( zer_iff ); | |
971 _igvn.register_new_node_with_optimizer( zer_taken ); | |
972 set_idom(zer_taken, zer_iff, dd_main_exit); | |
973 set_loop(zer_taken, loop->_parent); | |
974 // Plug in the true path | |
975 _igvn.hash_delete( post_head ); | |
976 post_head->set_req(LoopNode::EntryControl, zer_taken); | |
977 set_idom(post_head, zer_taken, dd_main_exit); | |
978 | |
3788
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
979 Arena *a = Thread::current()->resource_area(); |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
980 VectorSet visited(a); |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
981 Node_Stack clones(a, main_head->back_control()->outcnt()); |
0 | 982 // Step A3: Make the fall-in values to the post-loop come from the |
983 // fall-out values of the main-loop. | |
984 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) { | |
985 Node* main_phi = main_head->fast_out(i); | |
986 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) { | |
987 Node *post_phi = old_new[main_phi->_idx]; | |
988 Node *fallmain = clone_up_backedge_goo(main_head->back_control(), | |
989 post_head->init_control(), | |
3788
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
990 main_phi->in(LoopNode::LoopBackControl), |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
991 visited, clones); |
0 | 992 _igvn.hash_delete(post_phi); |
993 post_phi->set_req( LoopNode::EntryControl, fallmain ); | |
994 } | |
995 } | |
996 | |
997 // Update local caches for next stanza | |
998 main_exit = new_main_exit; | |
999 | |
1000 | |
1001 //------------------------------ | |
1002 // Step B: Create Pre-Loop. | |
1003 | |
1004 // Step B1: Clone the loop body. The clone becomes the pre-loop. The main | |
1005 // loop pre-header illegally has 2 control users (old & new loops). | |
1006 clone_loop( loop, old_new, dd_main_head ); | |
1007 CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop(); | |
1008 CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd(); | |
1009 pre_head->set_pre_loop(main_head); | |
1010 Node *pre_incr = old_new[incr->_idx]; | |
1011 | |
400
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
1012 // Reduce the pre-loop trip count. |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
1013 pre_end->_prob = PROB_FAIR; |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
1014 |
0 | 1015 // Find the pre-loop normal exit. |
1016 Node* pre_exit = pre_end->proj_out(false); | |
1017 assert( pre_exit->Opcode() == Op_IfFalse, "" ); | |
1018 IfFalseNode *new_pre_exit = new (C, 1) IfFalseNode(pre_end); | |
1019 _igvn.register_new_node_with_optimizer( new_pre_exit ); | |
1020 set_idom(new_pre_exit, pre_end, dd_main_head); | |
1021 set_loop(new_pre_exit, loop->_parent); | |
1022 | |
1023 // Step B2: Build a zero-trip guard for the main-loop. After leaving the | |
1024 // pre-loop, the main-loop may not execute at all. Later in life this | |
1025 // zero-trip guard will become the minimum-trip guard when we unroll | |
1026 // the main-loop. | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
113
diff
changeset
|
1027 Node *min_opaq = new (C, 2) Opaque1Node(C, limit); |
0 | 1028 Node *min_cmp = new (C, 3) CmpINode( pre_incr, min_opaq ); |
1029 Node *min_bol = new (C, 2) BoolNode( min_cmp, b_test ); | |
1030 register_new_node( min_opaq, new_pre_exit ); | |
1031 register_new_node( min_cmp , new_pre_exit ); | |
1032 register_new_node( min_bol , new_pre_exit ); | |
1033 | |
400
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
1034 // Build the IfNode (assume the main-loop is executed always). |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
367
diff
changeset
|
1035 IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN ); |
0 | 1036 _igvn.register_new_node_with_optimizer( min_iff ); |
1037 set_idom(min_iff, new_pre_exit, dd_main_head); | |
1038 set_loop(min_iff, loop->_parent); | |
1039 | |
1040 // Plug in the false-path, taken if we need to skip main-loop | |
1041 _igvn.hash_delete( pre_exit ); | |
1042 pre_exit->set_req(0, min_iff); | |
1043 set_idom(pre_exit, min_iff, dd_main_head); | |
1044 set_idom(pre_exit->unique_out(), min_iff, dd_main_head); | |
1045 // Make the true-path, must enter the main loop | |
1046 Node *min_taken = new (C, 1) IfTrueNode( min_iff ); | |
1047 _igvn.register_new_node_with_optimizer( min_taken ); | |
1048 set_idom(min_taken, min_iff, dd_main_head); | |
1049 set_loop(min_taken, loop->_parent); | |
1050 // Plug in the true path | |
1051 _igvn.hash_delete( main_head ); | |
1052 main_head->set_req(LoopNode::EntryControl, min_taken); | |
1053 set_idom(main_head, min_taken, dd_main_head); | |
1054 | |
3788
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
1055 visited.Clear(); |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
1056 clones.clear(); |
0 | 1057 // Step B3: Make the fall-in values to the main-loop come from the |
1058 // fall-out values of the pre-loop. | |
1059 for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) { | |
1060 Node* main_phi = main_head->fast_out(i2); | |
1061 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) { | |
1062 Node *pre_phi = old_new[main_phi->_idx]; | |
1063 Node *fallpre = clone_up_backedge_goo(pre_head->back_control(), | |
1064 main_head->init_control(), | |
3788
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
1065 pre_phi->in(LoopNode::LoopBackControl), |
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3782
diff
changeset
|
1066 visited, clones); |
0 | 1067 _igvn.hash_delete(main_phi); |
1068 main_phi->set_req( LoopNode::EntryControl, fallpre ); | |
1069 } | |
1070 } | |
1071 | |
1072 // Step B4: Shorten the pre-loop to run only 1 iteration (for now). | |
1073 // RCE and alignment may change this later. | |
1074 Node *cmp_end = pre_end->cmp_node(); | |
1075 assert( cmp_end->in(2) == limit, "" ); | |
1076 Node *pre_limit = new (C, 3) AddINode( init, stride ); | |
1077 | |
1078 // Save the original loop limit in this Opaque1 node for | |
1079 // use by range check elimination. | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
113
diff
changeset
|
1080 Node *pre_opaq = new (C, 3) Opaque1Node(C, pre_limit, limit); |
0 | 1081 |
1082 register_new_node( pre_limit, pre_head->in(0) ); | |
1083 register_new_node( pre_opaq , pre_head->in(0) ); | |
1084 | |
1085 // Since no other users of pre-loop compare, I can hack limit directly | |
1086 assert( cmp_end->outcnt() == 1, "no other users" ); | |
1087 _igvn.hash_delete(cmp_end); | |
1088 cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq); | |
1089 | |
1090 // Special case for not-equal loop bounds: | |
1091 // Change pre loop test, main loop test, and the | |
1092 // main loop guard test to use lt or gt depending on stride | |
1093 // direction: | |
1094 // positive stride use < | |
1095 // negative stride use > | |
3782 | 1096 // |
1097 // not-equal test is kept for post loop to handle case | |
1098 // when init > limit when stride > 0 (and reverse). | |
0 | 1099 |
1100 if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) { | |
1101 | |
1102 BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt; | |
1103 // Modify pre loop end condition | |
1104 Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool(); | |
1105 BoolNode* new_bol0 = new (C, 2) BoolNode(pre_bol->in(1), new_test); | |
1106 register_new_node( new_bol0, pre_head->in(0) ); | |
1107 _igvn.hash_delete(pre_end); | |
1108 pre_end->set_req(CountedLoopEndNode::TestValue, new_bol0); | |
1109 // Modify main loop guard condition | |
1110 assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay"); | |
1111 BoolNode* new_bol1 = new (C, 2) BoolNode(min_bol->in(1), new_test); | |
1112 register_new_node( new_bol1, new_pre_exit ); | |
1113 _igvn.hash_delete(min_iff); | |
1114 min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1); | |
1115 // Modify main loop end condition | |
1116 BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool(); | |
1117 BoolNode* new_bol2 = new (C, 2) BoolNode(main_bol->in(1), new_test); | |
1118 register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) ); | |
1119 _igvn.hash_delete(main_end); | |
1120 main_end->set_req(CountedLoopEndNode::TestValue, new_bol2); | |
1121 } | |
1122 | |
1123 // Flag main loop | |
1124 main_head->set_main_loop(); | |
1125 if( peel_only ) main_head->set_main_no_pre_loop(); | |
1126 | |
3345 | 1127 // Subtract a trip count for the pre-loop. |
1128 main_head->set_trip_count(main_head->trip_count() - 1); | |
1129 | |
0 | 1130 // It's difficult to be precise about the trip-counts |
1131 // for the pre/post loops. They are usually very short, | |
1132 // so guess that 4 trips is a reasonable value. | |
1133 post_head->set_profile_trip_cnt(4.0); | |
1134 pre_head->set_profile_trip_cnt(4.0); | |
1135 | |
1136 // Now force out all loop-invariant dominating tests. The optimizer | |
1137 // finds some, but we _know_ they are all useless. | |
1138 peeled_dom_test_elim(loop,old_new); | |
1139 } | |
1140 | |
1141 //------------------------------is_invariant----------------------------- | |
1142 // Return true if n is invariant | |
1143 bool IdealLoopTree::is_invariant(Node* n) const { | |
1172 | 1144 Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; |
0 | 1145 if (n_c->is_top()) return false; |
1146 return !is_member(_phase->get_loop(n_c)); | |
1147 } | |
1148 | |
1149 | |
1150 //------------------------------do_unroll-------------------------------------- | |
1151 // Unroll the loop body one step - make each trip do 2 iterations. | |
1152 void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) { | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1153 assert(LoopUnrollLimit, ""); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1154 CountedLoopNode *loop_head = loop->_head->as_CountedLoop(); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1155 CountedLoopEndNode *loop_end = loop_head->loopexit(); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1156 assert(loop_end, ""); |
0 | 1157 #ifndef PRODUCT |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1158 if (PrintOpto && VerifyLoopOptimizations) { |
0 | 1159 tty->print("Unrolling "); |
1160 loop->dump_head(); | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1161 } else if (TraceLoopOpts) { |
2465 | 1162 if (loop_head->trip_count() < (uint)LoopUnrollLimit) { |
3345 | 1163 tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count()); |
2453
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
1164 } else { |
3345 | 1165 tty->print("Unroll %d ", loop_head->unrolled_count()*2); |
2453
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
1166 } |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1167 loop->dump_head(); |
0 | 1168 } |
1169 #endif | |
1170 | |
1171 // Remember loop node count before unrolling to detect | |
1172 // if rounds of unroll,optimize are making progress | |
1173 loop_head->set_node_count_before_unroll(loop->_body.size()); | |
1174 | |
1175 Node *ctrl = loop_head->in(LoopNode::EntryControl); | |
1176 Node *limit = loop_head->limit(); | |
1177 Node *init = loop_head->init_trip(); | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1178 Node *stride = loop_head->stride(); |
0 | 1179 |
1180 Node *opaq = NULL; | |
3345 | 1181 if (adjust_min_trip) { // If not maximally unrolling, need adjustment |
1182 // Search for zero-trip guard. | |
0 | 1183 assert( loop_head->is_main_loop(), "" ); |
1184 assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" ); | |
1185 Node *iff = ctrl->in(0); | |
1186 assert( iff->Opcode() == Op_If, "" ); | |
1187 Node *bol = iff->in(1); | |
1188 assert( bol->Opcode() == Op_Bool, "" ); | |
1189 Node *cmp = bol->in(1); | |
1190 assert( cmp->Opcode() == Op_CmpI, "" ); | |
1191 opaq = cmp->in(2); | |
3345 | 1192 // Occasionally it's possible for a zero-trip guard Opaque1 node to be |
0 | 1193 // optimized away and then another round of loop opts attempted. |
1194 // We can not optimize this particular loop in that case. | |
3345 | 1195 if (opaq->Opcode() != Op_Opaque1) |
1196 return; // Cannot find zero-trip guard! Bail out! | |
1197 // Zero-trip test uses an 'opaque' node which is not shared. | |
1198 assert(opaq->outcnt() == 1 && opaq->in(1) == limit, ""); | |
0 | 1199 } |
1200 | |
1201 C->set_major_progress(); | |
1202 | |
3345 | 1203 Node* new_limit = NULL; |
1204 if (UnrollLimitCheck) { | |
1205 int stride_con = stride->get_int(); | |
1206 int stride_p = (stride_con > 0) ? stride_con : -stride_con; | |
1207 uint old_trip_count = loop_head->trip_count(); | |
1208 // Verify that unroll policy result is still valid. | |
1209 assert(old_trip_count > 1 && | |
1210 (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity"); | |
0 | 1211 |
3345 | 1212 // Adjust loop limit to keep valid iterations number after unroll. |
1213 // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride | |
1214 // which may overflow. | |
1215 if (!adjust_min_trip) { | |
1216 assert(old_trip_count > 1 && (old_trip_count & 1) == 0, | |
1217 "odd trip count for maximally unroll"); | |
1218 // Don't need to adjust limit for maximally unroll since trip count is even. | |
1219 } else if (loop_head->has_exact_trip_count() && init->is_Con()) { | |
1220 // Loop's limit is constant. Loop's init could be constant when pre-loop | |
1221 // become peeled iteration. | |
1222 long init_con = init->get_int(); | |
1223 // We can keep old loop limit if iterations count stays the same: | |
1224 // old_trip_count == new_trip_count * 2 | |
1225 // Note: since old_trip_count >= 2 then new_trip_count >= 1 | |
1226 // so we also don't need to adjust zero trip test. | |
1227 long limit_con = limit->get_int(); | |
1228 // (stride_con*2) not overflow since stride_con <= 8. | |
1229 int new_stride_con = stride_con * 2; | |
1230 int stride_m = new_stride_con - (stride_con > 0 ? 1 : -1); | |
1231 long trip_count = (limit_con - init_con + stride_m)/new_stride_con; | |
1232 // New trip count should satisfy next conditions. | |
1233 assert(trip_count > 0 && (julong)trip_count < (julong)max_juint/2, "sanity"); | |
1234 uint new_trip_count = (uint)trip_count; | |
1235 adjust_min_trip = (old_trip_count != new_trip_count*2); | |
1236 } | |
1237 | |
1238 if (adjust_min_trip) { | |
1239 // Step 2: Adjust the trip limit if it is called for. | |
1240 // The adjustment amount is -stride. Need to make sure if the | |
1241 // adjustment underflows or overflows, then the main loop is skipped. | |
1242 Node* cmp = loop_end->cmp_node(); | |
1243 assert(cmp->in(2) == limit, "sanity"); | |
1244 assert(opaq != NULL && opaq->in(1) == limit, "sanity"); | |
1245 | |
1246 // Verify that policy_unroll result is still valid. | |
1247 const TypeInt* limit_type = _igvn.type(limit)->is_int(); | |
1248 assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) || | |
1249 stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo), "sanity"); | |
0 | 1250 |
3345 | 1251 if (limit->is_Con()) { |
1252 // The check in policy_unroll and the assert above guarantee | |
1253 // no underflow if limit is constant. | |
1254 new_limit = _igvn.intcon(limit->get_int() - stride_con); | |
1255 set_ctrl(new_limit, C->root()); | |
1256 } else { | |
3348
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1257 // Limit is not constant. |
3367 | 1258 if (loop_head->unrolled_count() == 1) { // only for first unroll |
3348
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1259 // Separate limit by Opaque node in case it is an incremented |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1260 // variable from previous loop to avoid using pre-incremented |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1261 // value which could increase register pressure. |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1262 // Otherwise reorg_offsets() optimization will create a separate |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1263 // Opaque node for each use of trip-counter and as result |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1264 // zero trip guard limit will be different from loop limit. |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1265 assert(has_ctrl(opaq), "should have it"); |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1266 Node* opaq_ctrl = get_ctrl(opaq); |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1267 limit = new (C, 2) Opaque2Node( C, limit ); |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1268 register_new_node( limit, opaq_ctrl ); |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1269 } |
3345 | 1270 if (stride_con > 0 && ((limit_type->_lo - stride_con) < limit_type->_lo) || |
1271 stride_con < 0 && ((limit_type->_hi - stride_con) > limit_type->_hi)) { | |
1272 // No underflow. | |
1273 new_limit = new (C, 3) SubINode(limit, stride); | |
1274 } else { | |
1275 // (limit - stride) may underflow. | |
1276 // Clamp the adjustment value with MININT or MAXINT: | |
1277 // | |
1278 // new_limit = limit-stride | |
1279 // if (stride > 0) | |
1280 // new_limit = (limit < new_limit) ? MININT : new_limit; | |
1281 // else | |
1282 // new_limit = (limit > new_limit) ? MAXINT : new_limit; | |
1283 // | |
1284 BoolTest::mask bt = loop_end->test_trip(); | |
1285 assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected"); | |
1286 Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint); | |
1287 set_ctrl(adj_max, C->root()); | |
1288 Node* old_limit = NULL; | |
1289 Node* adj_limit = NULL; | |
1290 Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL; | |
1291 if (loop_head->unrolled_count() > 1 && | |
1292 limit->is_CMove() && limit->Opcode() == Op_CMoveI && | |
1293 limit->in(CMoveNode::IfTrue) == adj_max && | |
1294 bol->as_Bool()->_test._test == bt && | |
1295 bol->in(1)->Opcode() == Op_CmpI && | |
1296 bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) { | |
1297 // Loop was unrolled before. | |
1298 // Optimize the limit to avoid nested CMove: | |
1299 // use original limit as old limit. | |
1300 old_limit = bol->in(1)->in(1); | |
1301 // Adjust previous adjusted limit. | |
1302 adj_limit = limit->in(CMoveNode::IfFalse); | |
1303 adj_limit = new (C, 3) SubINode(adj_limit, stride); | |
1304 } else { | |
1305 old_limit = limit; | |
1306 adj_limit = new (C, 3) SubINode(limit, stride); | |
1307 } | |
1308 assert(old_limit != NULL && adj_limit != NULL, ""); | |
1309 register_new_node( adj_limit, ctrl ); // adjust amount | |
1310 Node* adj_cmp = new (C, 3) CmpINode(old_limit, adj_limit); | |
1311 register_new_node( adj_cmp, ctrl ); | |
1312 Node* adj_bool = new (C, 2) BoolNode(adj_cmp, bt); | |
1313 register_new_node( adj_bool, ctrl ); | |
1314 new_limit = new (C, 4) CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT); | |
1315 } | |
1316 register_new_node(new_limit, ctrl); | |
1317 } | |
1318 assert(new_limit != NULL, ""); | |
3348
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1319 // Replace in loop test. |
3398
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1320 assert(loop_end->in(1)->in(1) == cmp, "sanity"); |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1321 if (cmp->outcnt() == 1 && loop_end->in(1)->outcnt() == 1) { |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1322 // Don't need to create new test since only one user. |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1323 _igvn.hash_delete(cmp); |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1324 cmp->set_req(2, new_limit); |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1325 } else { |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1326 // Create new test since it is shared. |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1327 Node* ctrl2 = loop_end->in(0); |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1328 Node* cmp2 = cmp->clone(); |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1329 cmp2->set_req(2, new_limit); |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1330 register_new_node(cmp2, ctrl2); |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1331 Node* bol2 = loop_end->in(1)->clone(); |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1332 bol2->set_req(1, cmp2); |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1333 register_new_node(bol2, ctrl2); |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1334 _igvn.hash_delete(loop_end); |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1335 loop_end->set_req(1, bol2); |
789d04408ca3
7045693: java/util/EnumSet/EnumSetBash.java still failing intermittently
kvn
parents:
3383
diff
changeset
|
1336 } |
3348
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1337 // Step 3: Find the min-trip test guaranteed before a 'main' loop. |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1338 // Make it a 1-trip test (means at least 2 trips). |
3345 | 1339 |
3348
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1340 // Guard test uses an 'opaque' node which is not shared. Hence I |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1341 // can edit it's inputs directly. Hammer in the new limit for the |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1342 // minimum-trip guard. |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1343 assert(opaq->outcnt() == 1, ""); |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1344 _igvn.hash_delete(opaq); |
f879eafd5835
7042327: assert(opaq->outcnt() == 1 && opaq->in(1) == limit)
kvn
parents:
3345
diff
changeset
|
1345 opaq->set_req(1, new_limit); |
3345 | 1346 } |
1347 | |
1348 // Adjust max trip count. The trip count is intentionally rounded | |
1349 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll, | |
1350 // the main, unrolled, part of the loop will never execute as it is protected | |
1351 // by the min-trip test. See bug 4834191 for a case where we over-unrolled | |
1352 // and later determined that part of the unrolled loop was dead. | |
1353 loop_head->set_trip_count(old_trip_count / 2); | |
1354 | |
1355 // Double the count of original iterations in the unrolled loop body. | |
1356 loop_head->double_unrolled_count(); | |
1357 | |
1358 } else { // LoopLimitCheck | |
1359 | |
1360 // Adjust max trip count. The trip count is intentionally rounded | |
1361 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll, | |
1362 // the main, unrolled, part of the loop will never execute as it is protected | |
1363 // by the min-trip test. See bug 4834191 for a case where we over-unrolled | |
1364 // and later determined that part of the unrolled loop was dead. | |
1365 loop_head->set_trip_count(loop_head->trip_count() / 2); | |
1366 | |
1367 // Double the count of original iterations in the unrolled loop body. | |
1368 loop_head->double_unrolled_count(); | |
0 | 1369 |
3345 | 1370 // ----------- |
1371 // Step 2: Cut back the trip counter for an unroll amount of 2. | |
1372 // Loop will normally trip (limit - init)/stride_con. Since it's a | |
1373 // CountedLoop this is exact (stride divides limit-init exactly). | |
1374 // We are going to double the loop body, so we want to knock off any | |
1375 // odd iteration: (trip_cnt & ~1). Then back compute a new limit. | |
1376 Node *span = new (C, 3) SubINode( limit, init ); | |
1377 register_new_node( span, ctrl ); | |
1378 Node *trip = new (C, 3) DivINode( 0, span, stride ); | |
1379 register_new_node( trip, ctrl ); | |
1380 Node *mtwo = _igvn.intcon(-2); | |
1381 set_ctrl(mtwo, C->root()); | |
1382 Node *rond = new (C, 3) AndINode( trip, mtwo ); | |
1383 register_new_node( rond, ctrl ); | |
1384 Node *spn2 = new (C, 3) MulINode( rond, stride ); | |
1385 register_new_node( spn2, ctrl ); | |
1386 new_limit = new (C, 3) AddINode( spn2, init ); | |
1387 register_new_node( new_limit, ctrl ); | |
1388 | |
1389 // Hammer in the new limit | |
1390 Node *ctrl2 = loop_end->in(0); | |
1391 Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), new_limit ); | |
1392 register_new_node( cmp2, ctrl2 ); | |
1393 Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() ); | |
1394 register_new_node( bol2, ctrl2 ); | |
1395 _igvn.hash_delete(loop_end); | |
1396 loop_end->set_req(CountedLoopEndNode::TestValue, bol2); | |
1397 | |
1398 // Step 3: Find the min-trip test guaranteed before a 'main' loop. | |
1399 // Make it a 1-trip test (means at least 2 trips). | |
1400 if( adjust_min_trip ) { | |
1401 assert( new_limit != NULL, "" ); | |
1402 // Guard test uses an 'opaque' node which is not shared. Hence I | |
1403 // can edit it's inputs directly. Hammer in the new limit for the | |
1404 // minimum-trip guard. | |
1405 assert( opaq->outcnt() == 1, "" ); | |
1406 _igvn.hash_delete(opaq); | |
1407 opaq->set_req(1, new_limit); | |
1408 } | |
1409 } // LoopLimitCheck | |
0 | 1410 |
1411 // --------- | |
1412 // Step 4: Clone the loop body. Move it inside the loop. This loop body | |
1413 // represents the odd iterations; since the loop trips an even number of | |
1414 // times its backedge is never taken. Kill the backedge. | |
1415 uint dd = dom_depth(loop_head); | |
1416 clone_loop( loop, old_new, dd ); | |
1417 | |
1418 // Make backedges of the clone equal to backedges of the original. | |
1419 // Make the fall-in from the original come from the fall-out of the clone. | |
1420 for (DUIterator_Fast jmax, j = loop_head->fast_outs(jmax); j < jmax; j++) { | |
1421 Node* phi = loop_head->fast_out(j); | |
1422 if( phi->is_Phi() && phi->in(0) == loop_head && phi->outcnt() > 0 ) { | |
1423 Node *newphi = old_new[phi->_idx]; | |
1424 _igvn.hash_delete( phi ); | |
1425 _igvn.hash_delete( newphi ); | |
1426 | |
1427 phi ->set_req(LoopNode:: EntryControl, newphi->in(LoopNode::LoopBackControl)); | |
1428 newphi->set_req(LoopNode::LoopBackControl, phi ->in(LoopNode::LoopBackControl)); | |
1429 phi ->set_req(LoopNode::LoopBackControl, C->top()); | |
1430 } | |
1431 } | |
1432 Node *clone_head = old_new[loop_head->_idx]; | |
1433 _igvn.hash_delete( clone_head ); | |
1434 loop_head ->set_req(LoopNode:: EntryControl, clone_head->in(LoopNode::LoopBackControl)); | |
1435 clone_head->set_req(LoopNode::LoopBackControl, loop_head ->in(LoopNode::LoopBackControl)); | |
1436 loop_head ->set_req(LoopNode::LoopBackControl, C->top()); | |
1437 loop->_head = clone_head; // New loop header | |
1438 | |
1439 set_idom(loop_head, loop_head ->in(LoopNode::EntryControl), dd); | |
1440 set_idom(clone_head, clone_head->in(LoopNode::EntryControl), dd); | |
1441 | |
1442 // Kill the clone's backedge | |
1443 Node *newcle = old_new[loop_end->_idx]; | |
1444 _igvn.hash_delete( newcle ); | |
1445 Node *one = _igvn.intcon(1); | |
1446 set_ctrl(one, C->root()); | |
1447 newcle->set_req(1, one); | |
1448 // Force clone into same loop body | |
1449 uint max = loop->_body.size(); | |
1450 for( uint k = 0; k < max; k++ ) { | |
1451 Node *old = loop->_body.at(k); | |
1452 Node *nnn = old_new[old->_idx]; | |
1453 loop->_body.push(nnn); | |
1454 if (!has_ctrl(old)) | |
1455 set_loop(nnn, loop); | |
1456 } | |
367
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
235
diff
changeset
|
1457 |
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
235
diff
changeset
|
1458 loop->record_for_igvn(); |
0 | 1459 } |
1460 | |
1461 //------------------------------do_maximally_unroll---------------------------- | |
1462 | |
1463 void PhaseIdealLoop::do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ) { | |
1464 CountedLoopNode *cl = loop->_head->as_CountedLoop(); | |
3345 | 1465 assert(cl->has_exact_trip_count(), "trip count is not exact"); |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1466 assert(cl->trip_count() > 0, ""); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1467 #ifndef PRODUCT |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1468 if (TraceLoopOpts) { |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1469 tty->print("MaxUnroll %d ", cl->trip_count()); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1470 loop->dump_head(); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1471 } |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1472 #endif |
0 | 1473 |
1474 // If loop is tripping an odd number of times, peel odd iteration | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1475 if ((cl->trip_count() & 1) == 1) { |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1476 do_peeling(loop, old_new); |
0 | 1477 } |
1478 | |
1479 // Now its tripping an even number of times remaining. Double loop body. | |
1480 // Do not adjust pre-guards; they are not needed and do not exist. | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1481 if (cl->trip_count() > 0) { |
3345 | 1482 assert((cl->trip_count() & 1) == 0, "missed peeling"); |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1483 do_unroll(loop, old_new, false); |
0 | 1484 } |
1485 } | |
1486 | |
1487 //------------------------------dominates_backedge--------------------------------- | |
1488 // Returns true if ctrl is executed on every complete iteration | |
1489 bool IdealLoopTree::dominates_backedge(Node* ctrl) { | |
1490 assert(ctrl->is_CFG(), "must be control"); | |
1491 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl); | |
1492 return _phase->dom_lca_internal(ctrl, backedge) == ctrl; | |
1493 } | |
1494 | |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1495 //------------------------------adjust_limit----------------------------------- |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1496 // Helper function for add_constraint(). |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1497 Node* PhaseIdealLoop::adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl) { |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1498 // Compute "I :: (limit-offset)/scale" |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1499 Node *con = new (C, 3) SubINode(rc_limit, offset); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1500 register_new_node(con, pre_ctrl); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1501 Node *X = new (C, 3) DivINode(0, con, scale); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1502 register_new_node(X, pre_ctrl); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1503 |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1504 // Adjust loop limit |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1505 loop_limit = (stride_con > 0) |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1506 ? (Node*)(new (C, 3) MinINode(loop_limit, X)) |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1507 : (Node*)(new (C, 3) MaxINode(loop_limit, X)); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1508 register_new_node(loop_limit, pre_ctrl); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1509 return loop_limit; |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1510 } |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1511 |
0 | 1512 //------------------------------add_constraint--------------------------------- |
3345 | 1513 // Constrain the main loop iterations so the conditions: |
1514 // low_limit <= scale_con * I + offset < upper_limit | |
0 | 1515 // always holds true. That is, either increase the number of iterations in |
1516 // the pre-loop or the post-loop until the condition holds true in the main | |
1517 // loop. Stride, scale, offset and limit are all loop invariant. Further, | |
1518 // stride and scale are constants (offset and limit often are). | |
3345 | 1519 void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) { |
0 | 1520 // For positive stride, the pre-loop limit always uses a MAX function |
1521 // and the main loop a MIN function. For negative stride these are | |
1522 // reversed. | |
1523 | |
1524 // Also for positive stride*scale the affine function is increasing, so the | |
1525 // pre-loop must check for underflow and the post-loop for overflow. | |
1526 // Negative stride*scale reverses this; pre-loop checks for overflow and | |
1527 // post-loop for underflow. | |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1528 |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1529 Node *scale = _igvn.intcon(scale_con); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1530 set_ctrl(scale, C->root()); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1531 |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1532 if ((stride_con^scale_con) >= 0) { // Use XOR to avoid overflow |
3345 | 1533 // The overflow limit: scale*I+offset < upper_limit |
1534 // For main-loop compute | |
1535 // ( if (scale > 0) /* and stride > 0 */ | |
1536 // I < (upper_limit-offset)/scale | |
1537 // else /* scale < 0 and stride < 0 */ | |
1538 // I > (upper_limit-offset)/scale | |
1539 // ) | |
1540 // | |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1541 // (upper_limit-offset) may overflow or underflow. |
3345 | 1542 // But it is fine since main loop will either have |
1543 // less iterations or will be skipped in such case. | |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1544 *main_limit = adjust_limit(stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl); |
0 | 1545 |
3345 | 1546 // The underflow limit: low_limit <= scale*I+offset. |
1547 // For pre-loop compute | |
1548 // NOT(scale*I+offset >= low_limit) | |
1549 // scale*I+offset < low_limit | |
1550 // ( if (scale > 0) /* and stride > 0 */ | |
1551 // I < (low_limit-offset)/scale | |
1552 // else /* scale < 0 and stride < 0 */ | |
1553 // I > (low_limit-offset)/scale | |
1554 // ) | |
1555 | |
1556 if (low_limit->get_int() == -max_jint) { | |
1557 if (!RangeLimitCheck) return; | |
1558 // We need this guard when scale*pre_limit+offset >= limit | |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1559 // due to underflow. So we need execute pre-loop until |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1560 // scale*I+offset >= min_int. But (min_int-offset) will |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1561 // underflow when offset > 0 and X will be > original_limit |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1562 // when stride > 0. To avoid it we replace positive offset with 0. |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1563 // |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1564 // Also (min_int+1 == -max_int) is used instead of min_int here |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1565 // to avoid problem with scale == -1 (min_int/(-1) == min_int). |
3345 | 1566 Node* shift = _igvn.intcon(31); |
1567 set_ctrl(shift, C->root()); | |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1568 Node* sign = new (C, 3) RShiftINode(offset, shift); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1569 register_new_node(sign, pre_ctrl); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1570 offset = new (C, 3) AndINode(offset, sign); |
3345 | 1571 register_new_node(offset, pre_ctrl); |
1572 } else { | |
1573 assert(low_limit->get_int() == 0, "wrong low limit for range check"); | |
1574 // The only problem we have here when offset == min_int | |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1575 // since (0-min_int) == min_int. It may be fine for stride > 0 |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1576 // but for stride < 0 X will be < original_limit. To avoid it |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1577 // max(pre_limit, original_limit) is used in do_range_check(). |
3345 | 1578 } |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1579 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1580 *pre_limit = adjust_limit((-stride_con), scale, offset, low_limit, *pre_limit, pre_ctrl); |
3345 | 1581 |
1582 } else { // stride_con*scale_con < 0 | |
1583 // For negative stride*scale pre-loop checks for overflow and | |
1584 // post-loop for underflow. | |
1585 // | |
1586 // The overflow limit: scale*I+offset < upper_limit | |
1587 // For pre-loop compute | |
1588 // NOT(scale*I+offset < upper_limit) | |
1589 // scale*I+offset >= upper_limit | |
1590 // scale*I+offset+1 > upper_limit | |
1591 // ( if (scale < 0) /* and stride > 0 */ | |
1592 // I < (upper_limit-(offset+1))/scale | |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1593 // else /* scale > 0 and stride < 0 */ |
3345 | 1594 // I > (upper_limit-(offset+1))/scale |
1595 // ) | |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1596 // |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1597 // (upper_limit-offset-1) may underflow or overflow. |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1598 // To avoid it min(pre_limit, original_limit) is used |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1599 // in do_range_check() for stride > 0 and max() for < 0. |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1600 Node *one = _igvn.intcon(1); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1601 set_ctrl(one, C->root()); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1602 |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1603 Node *plus_one = new (C, 3) AddINode(offset, one); |
3345 | 1604 register_new_node( plus_one, pre_ctrl ); |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1605 // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1606 *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl); |
3345 | 1607 |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1608 if (low_limit->get_int() == -max_jint) { |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1609 if (!RangeLimitCheck) return; |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1610 // We need this guard when scale*main_limit+offset >= limit |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1611 // due to underflow. So we need execute main-loop while |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1612 // scale*I+offset+1 > min_int. But (min_int-offset-1) will |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1613 // underflow when (offset+1) > 0 and X will be < main_limit |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1614 // when scale < 0 (and stride > 0). To avoid it we replace |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1615 // positive (offset+1) with 0. |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1616 // |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1617 // Also (min_int+1 == -max_int) is used instead of min_int here |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1618 // to avoid problem with scale == -1 (min_int/(-1) == min_int). |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1619 Node* shift = _igvn.intcon(31); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1620 set_ctrl(shift, C->root()); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1621 Node* sign = new (C, 3) RShiftINode(plus_one, shift); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1622 register_new_node(sign, pre_ctrl); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1623 plus_one = new (C, 3) AndINode(plus_one, sign); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1624 register_new_node(plus_one, pre_ctrl); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1625 } else { |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1626 assert(low_limit->get_int() == 0, "wrong low limit for range check"); |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1627 // The only problem we have here when offset == max_int |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1628 // since (max_int+1) == min_int and (0-min_int) == min_int. |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1629 // But it is fine since main loop will either have |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1630 // less iterations or will be skipped in such case. |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1631 } |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1632 // The underflow limit: low_limit <= scale*I+offset. |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1633 // For main-loop compute |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1634 // scale*I+offset+1 > low_limit |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1635 // ( if (scale < 0) /* and stride > 0 */ |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1636 // I < (low_limit-(offset+1))/scale |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1637 // else /* scale > 0 and stride < 0 */ |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1638 // I > (low_limit-(offset+1))/scale |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1639 // ) |
3345 | 1640 |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1641 *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl); |
0 | 1642 } |
1643 } | |
1644 | |
1645 | |
1646 //------------------------------is_scaled_iv--------------------------------- | |
1647 // Return true if exp is a constant times an induction var | |
1648 bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) { | |
1649 if (exp == iv) { | |
1650 if (p_scale != NULL) { | |
1651 *p_scale = 1; | |
1652 } | |
1653 return true; | |
1654 } | |
1655 int opc = exp->Opcode(); | |
1656 if (opc == Op_MulI) { | |
1657 if (exp->in(1) == iv && exp->in(2)->is_Con()) { | |
1658 if (p_scale != NULL) { | |
1659 *p_scale = exp->in(2)->get_int(); | |
1660 } | |
1661 return true; | |
1662 } | |
1663 if (exp->in(2) == iv && exp->in(1)->is_Con()) { | |
1664 if (p_scale != NULL) { | |
1665 *p_scale = exp->in(1)->get_int(); | |
1666 } | |
1667 return true; | |
1668 } | |
1669 } else if (opc == Op_LShiftI) { | |
1670 if (exp->in(1) == iv && exp->in(2)->is_Con()) { | |
1671 if (p_scale != NULL) { | |
1672 *p_scale = 1 << exp->in(2)->get_int(); | |
1673 } | |
1674 return true; | |
1675 } | |
1676 } | |
1677 return false; | |
1678 } | |
1679 | |
1680 //-----------------------------is_scaled_iv_plus_offset------------------------------ | |
1681 // Return true if exp is a simple induction variable expression: k1*iv + (invar + k2) | |
1682 bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth) { | |
1683 if (is_scaled_iv(exp, iv, p_scale)) { | |
1684 if (p_offset != NULL) { | |
1685 Node *zero = _igvn.intcon(0); | |
1686 set_ctrl(zero, C->root()); | |
1687 *p_offset = zero; | |
1688 } | |
1689 return true; | |
1690 } | |
1691 int opc = exp->Opcode(); | |
1692 if (opc == Op_AddI) { | |
1693 if (is_scaled_iv(exp->in(1), iv, p_scale)) { | |
1694 if (p_offset != NULL) { | |
1695 *p_offset = exp->in(2); | |
1696 } | |
1697 return true; | |
1698 } | |
1699 if (exp->in(2)->is_Con()) { | |
1700 Node* offset2 = NULL; | |
1701 if (depth < 2 && | |
1702 is_scaled_iv_plus_offset(exp->in(1), iv, p_scale, | |
1703 p_offset != NULL ? &offset2 : NULL, depth+1)) { | |
1704 if (p_offset != NULL) { | |
1705 Node *ctrl_off2 = get_ctrl(offset2); | |
1706 Node* offset = new (C, 3) AddINode(offset2, exp->in(2)); | |
1707 register_new_node(offset, ctrl_off2); | |
1708 *p_offset = offset; | |
1709 } | |
1710 return true; | |
1711 } | |
1712 } | |
1713 } else if (opc == Op_SubI) { | |
1714 if (is_scaled_iv(exp->in(1), iv, p_scale)) { | |
1715 if (p_offset != NULL) { | |
1716 Node *zero = _igvn.intcon(0); | |
1717 set_ctrl(zero, C->root()); | |
1718 Node *ctrl_off = get_ctrl(exp->in(2)); | |
1719 Node* offset = new (C, 3) SubINode(zero, exp->in(2)); | |
1720 register_new_node(offset, ctrl_off); | |
1721 *p_offset = offset; | |
1722 } | |
1723 return true; | |
1724 } | |
1725 if (is_scaled_iv(exp->in(2), iv, p_scale)) { | |
1726 if (p_offset != NULL) { | |
1727 *p_scale *= -1; | |
1728 *p_offset = exp->in(1); | |
1729 } | |
1730 return true; | |
1731 } | |
1732 } | |
1733 return false; | |
1734 } | |
1735 | |
1736 //------------------------------do_range_check--------------------------------- | |
1737 // Eliminate range-checks and other trip-counter vs loop-invariant tests. | |
1738 void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) { | |
1739 #ifndef PRODUCT | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1740 if (PrintOpto && VerifyLoopOptimizations) { |
0 | 1741 tty->print("Range Check Elimination "); |
1742 loop->dump_head(); | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1743 } else if (TraceLoopOpts) { |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1744 tty->print("RangeCheck "); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1745 loop->dump_head(); |
0 | 1746 } |
1747 #endif | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1748 assert(RangeCheckElimination, ""); |
0 | 1749 CountedLoopNode *cl = loop->_head->as_CountedLoop(); |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1750 assert(cl->is_main_loop(), ""); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1751 |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1752 // protect against stride not being a constant |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1753 if (!cl->stride_is_con()) |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1754 return; |
0 | 1755 |
1756 // Find the trip counter; we are iteration splitting based on it | |
1757 Node *trip_counter = cl->phi(); | |
1758 // Find the main loop limit; we will trim it's iterations | |
1759 // to not ever trip end tests | |
1760 Node *main_limit = cl->limit(); | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1761 |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1762 // Need to find the main-loop zero-trip guard |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1763 Node *ctrl = cl->in(LoopNode::EntryControl); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1764 assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, ""); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1765 Node *iffm = ctrl->in(0); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1766 assert(iffm->Opcode() == Op_If, ""); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1767 Node *bolzm = iffm->in(1); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1768 assert(bolzm->Opcode() == Op_Bool, ""); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1769 Node *cmpzm = bolzm->in(1); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1770 assert(cmpzm->is_Cmp(), ""); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1771 Node *opqzm = cmpzm->in(2); |
3345 | 1772 // Can not optimize a loop if zero-trip Opaque1 node is optimized |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1773 // away and then another round of loop opts attempted. |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1774 if (opqzm->Opcode() != Op_Opaque1) |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1775 return; |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1776 assert(opqzm->in(1) == main_limit, "do not understand situation"); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1777 |
0 | 1778 // Find the pre-loop limit; we will expand it's iterations to |
1779 // not ever trip low tests. | |
1780 Node *p_f = iffm->in(0); | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1781 assert(p_f->Opcode() == Op_IfFalse, ""); |
0 | 1782 CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd(); |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1783 assert(pre_end->loopnode()->is_pre_loop(), ""); |
0 | 1784 Node *pre_opaq1 = pre_end->limit(); |
1785 // Occasionally it's possible for a pre-loop Opaque1 node to be | |
1786 // optimized away and then another round of loop opts attempted. | |
1787 // We can not optimize this particular loop in that case. | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1788 if (pre_opaq1->Opcode() != Op_Opaque1) |
0 | 1789 return; |
1790 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1; | |
1791 Node *pre_limit = pre_opaq->in(1); | |
1792 | |
1793 // Where do we put new limit calculations | |
1794 Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl); | |
1795 | |
1796 // Ensure the original loop limit is available from the | |
1797 // pre-loop Opaque1 node. | |
1798 Node *orig_limit = pre_opaq->original_loop_limit(); | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
1799 if (orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP) |
0 | 1800 return; |
1801 | |
1802 // Must know if its a count-up or count-down loop | |
1803 | |
1804 int stride_con = cl->stride_con(); | |
1805 Node *zero = _igvn.intcon(0); | |
1806 Node *one = _igvn.intcon(1); | |
3345 | 1807 // Use symmetrical int range [-max_jint,max_jint] |
1808 Node *mini = _igvn.intcon(-max_jint); | |
0 | 1809 set_ctrl(zero, C->root()); |
1810 set_ctrl(one, C->root()); | |
3345 | 1811 set_ctrl(mini, C->root()); |
0 | 1812 |
1813 // Range checks that do not dominate the loop backedge (ie. | |
1814 // conditionally executed) can lengthen the pre loop limit beyond | |
1815 // the original loop limit. To prevent this, the pre limit is | |
1816 // (for stride > 0) MINed with the original loop limit (MAXed | |
1817 // stride < 0) when some range_check (rc) is conditionally | |
1818 // executed. | |
1819 bool conditional_rc = false; | |
1820 | |
1821 // Check loop body for tests of trip-counter plus loop-invariant vs | |
1822 // loop-invariant. | |
1823 for( uint i = 0; i < loop->_body.size(); i++ ) { | |
1824 Node *iff = loop->_body[i]; | |
1825 if( iff->Opcode() == Op_If ) { // Test? | |
1826 | |
1827 // Test is an IfNode, has 2 projections. If BOTH are in the loop | |
1828 // we need loop unswitching instead of iteration splitting. | |
1829 Node *exit = loop->is_loop_exit(iff); | |
1830 if( !exit ) continue; | |
1831 int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0; | |
1832 | |
1833 // Get boolean condition to test | |
1834 Node *i1 = iff->in(1); | |
1835 if( !i1->is_Bool() ) continue; | |
1836 BoolNode *bol = i1->as_Bool(); | |
1837 BoolTest b_test = bol->_test; | |
1838 // Flip sense of test if exit condition is flipped | |
1839 if( flip ) | |
1840 b_test = b_test.negate(); | |
1841 | |
1842 // Get compare | |
1843 Node *cmp = bol->in(1); | |
1844 | |
1845 // Look for trip_counter + offset vs limit | |
1846 Node *rc_exp = cmp->in(1); | |
1847 Node *limit = cmp->in(2); | |
1848 jint scale_con= 1; // Assume trip counter not scaled | |
1849 | |
1850 Node *limit_c = get_ctrl(limit); | |
1851 if( loop->is_member(get_loop(limit_c) ) ) { | |
1852 // Compare might have operands swapped; commute them | |
1853 b_test = b_test.commute(); | |
1854 rc_exp = cmp->in(2); | |
1855 limit = cmp->in(1); | |
1856 limit_c = get_ctrl(limit); | |
1857 if( loop->is_member(get_loop(limit_c) ) ) | |
1858 continue; // Both inputs are loop varying; cannot RCE | |
1859 } | |
1860 // Here we know 'limit' is loop invariant | |
1861 | |
1862 // 'limit' maybe pinned below the zero trip test (probably from a | |
1863 // previous round of rce), in which case, it can't be used in the | |
1864 // zero trip test expression which must occur before the zero test's if. | |
1865 if( limit_c == ctrl ) { | |
1866 continue; // Don't rce this check but continue looking for other candidates. | |
1867 } | |
1868 | |
1869 // Check for scaled induction variable plus an offset | |
1870 Node *offset = NULL; | |
1871 | |
1872 if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset)) { | |
1873 continue; | |
1874 } | |
1875 | |
1876 Node *offset_c = get_ctrl(offset); | |
1877 if( loop->is_member( get_loop(offset_c) ) ) | |
1878 continue; // Offset is not really loop invariant | |
1879 // Here we know 'offset' is loop invariant. | |
1880 | |
1881 // As above for the 'limit', the 'offset' maybe pinned below the | |
1882 // zero trip test. | |
1883 if( offset_c == ctrl ) { | |
1884 continue; // Don't rce this check but continue looking for other candidates. | |
1885 } | |
3345 | 1886 #ifdef ASSERT |
1887 if (TraceRangeLimitCheck) { | |
1888 tty->print_cr("RC bool node%s", flip ? " flipped:" : ":"); | |
1889 bol->dump(2); | |
1890 } | |
1891 #endif | |
0 | 1892 // At this point we have the expression as: |
1893 // scale_con * trip_counter + offset :: limit | |
1894 // where scale_con, offset and limit are loop invariant. Trip_counter | |
1895 // monotonically increases by stride_con, a constant. Both (or either) | |
1896 // stride_con and scale_con can be negative which will flip about the | |
1897 // sense of the test. | |
1898 | |
1899 // Adjust pre and main loop limits to guard the correct iteration set | |
1900 if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests | |
1901 if( b_test._test == BoolTest::lt ) { // Range checks always use lt | |
3345 | 1902 // The underflow and overflow limits: 0 <= scale*I+offset < limit |
1903 add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit ); | |
0 | 1904 if (!conditional_rc) { |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1905 // (0-offset)/scale could be outside of loop iterations range. |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1906 conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck; |
0 | 1907 } |
1908 } else { | |
1909 #ifndef PRODUCT | |
1910 if( PrintOpto ) | |
1911 tty->print_cr("missed RCE opportunity"); | |
1912 #endif | |
1913 continue; // In release mode, ignore it | |
1914 } | |
1915 } else { // Otherwise work on normal compares | |
1916 switch( b_test._test ) { | |
3345 | 1917 case BoolTest::gt: |
1918 // Fall into GE case | |
1919 case BoolTest::ge: | |
1920 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit | |
0 | 1921 scale_con = -scale_con; |
1922 offset = new (C, 3) SubINode( zero, offset ); | |
1923 register_new_node( offset, pre_ctrl ); | |
1924 limit = new (C, 3) SubINode( zero, limit ); | |
1925 register_new_node( limit, pre_ctrl ); | |
1926 // Fall into LE case | |
3345 | 1927 case BoolTest::le: |
1928 if (b_test._test != BoolTest::gt) { | |
1929 // Convert X <= Y to X < Y+1 | |
1930 limit = new (C, 3) AddINode( limit, one ); | |
1931 register_new_node( limit, pre_ctrl ); | |
1932 } | |
0 | 1933 // Fall into LT case |
1934 case BoolTest::lt: | |
3345 | 1935 // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1936 // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1937 // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT. |
3345 | 1938 add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit ); |
0 | 1939 if (!conditional_rc) { |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1940 // ((MIN_INT+1)-offset)/scale could be outside of loop iterations range. |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1941 // Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1942 // still be outside of loop range. |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3367
diff
changeset
|
1943 conditional_rc = !loop->dominates_backedge(iff) || RangeLimitCheck; |
0 | 1944 } |
1945 break; | |
1946 default: | |
1947 #ifndef PRODUCT | |
1948 if( PrintOpto ) | |
1949 tty->print_cr("missed RCE opportunity"); | |
1950 #endif | |
1951 continue; // Unhandled case | |
1952 } | |
1953 } | |
1954 | |
1955 // Kill the eliminated test | |
1956 C->set_major_progress(); | |
1957 Node *kill_con = _igvn.intcon( 1-flip ); | |
1958 set_ctrl(kill_con, C->root()); | |
1959 _igvn.hash_delete(iff); | |
1960 iff->set_req(1, kill_con); | |
1961 _igvn._worklist.push(iff); | |
1962 // Find surviving projection | |
1963 assert(iff->is_If(), ""); | |
1964 ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip); | |
1965 // Find loads off the surviving projection; remove their control edge | |
1966 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) { | |
1967 Node* cd = dp->fast_out(i); // Control-dependent node | |
1968 if( cd->is_Load() ) { // Loads can now float around in the loop | |
1969 _igvn.hash_delete(cd); | |
1970 // Allow the load to float around in the loop, or before it | |
1971 // but NOT before the pre-loop. | |
1972 cd->set_req(0, ctrl); // ctrl, not NULL | |
1973 _igvn._worklist.push(cd); | |
1974 --i; | |
1975 --imax; | |
1976 } | |
1977 } | |
1978 | |
1979 } // End of is IF | |
1980 | |
1981 } | |
1982 | |
1983 // Update loop limits | |
1984 if (conditional_rc) { | |
1985 pre_limit = (stride_con > 0) ? (Node*)new (C,3) MinINode(pre_limit, orig_limit) | |
1986 : (Node*)new (C,3) MaxINode(pre_limit, orig_limit); | |
1987 register_new_node(pre_limit, pre_ctrl); | |
1988 } | |
1989 _igvn.hash_delete(pre_opaq); | |
1990 pre_opaq->set_req(1, pre_limit); | |
1991 | |
1992 // Note:: we are making the main loop limit no longer precise; | |
1993 // need to round up based on stride. | |
3345 | 1994 cl->set_nonexact_trip_count(); |
1995 if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case | |
0 | 1996 // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init |
1997 // Hopefully, compiler will optimize for powers of 2. | |
1998 Node *ctrl = get_ctrl(main_limit); | |
1999 Node *stride = cl->stride(); | |
2000 Node *init = cl->init_trip(); | |
2001 Node *span = new (C, 3) SubINode(main_limit,init); | |
2002 register_new_node(span,ctrl); | |
2003 Node *rndup = _igvn.intcon(stride_con + ((stride_con>0)?-1:1)); | |
2004 Node *add = new (C, 3) AddINode(span,rndup); | |
2005 register_new_node(add,ctrl); | |
2006 Node *div = new (C, 3) DivINode(0,add,stride); | |
2007 register_new_node(div,ctrl); | |
2008 Node *mul = new (C, 3) MulINode(div,stride); | |
2009 register_new_node(mul,ctrl); | |
2010 Node *newlim = new (C, 3) AddINode(mul,init); | |
2011 register_new_node(newlim,ctrl); | |
2012 main_limit = newlim; | |
2013 } | |
2014 | |
2015 Node *main_cle = cl->loopexit(); | |
2016 Node *main_bol = main_cle->in(1); | |
2017 // Hacking loop bounds; need private copies of exit test | |
2018 if( main_bol->outcnt() > 1 ) {// BoolNode shared? | |
2019 _igvn.hash_delete(main_cle); | |
2020 main_bol = main_bol->clone();// Clone a private BoolNode | |
2021 register_new_node( main_bol, main_cle->in(0) ); | |
2022 main_cle->set_req(1,main_bol); | |
2023 } | |
2024 Node *main_cmp = main_bol->in(1); | |
2025 if( main_cmp->outcnt() > 1 ) { // CmpNode shared? | |
2026 _igvn.hash_delete(main_bol); | |
2027 main_cmp = main_cmp->clone();// Clone a private CmpNode | |
2028 register_new_node( main_cmp, main_cle->in(0) ); | |
2029 main_bol->set_req(1,main_cmp); | |
2030 } | |
2031 // Hack the now-private loop bounds | |
2032 _igvn.hash_delete(main_cmp); | |
2033 main_cmp->set_req(2, main_limit); | |
2034 _igvn._worklist.push(main_cmp); | |
2035 // The OpaqueNode is unshared by design | |
2036 _igvn.hash_delete(opqzm); | |
2037 assert( opqzm->outcnt() == 1, "cannot hack shared node" ); | |
2038 opqzm->set_req(1,main_limit); | |
2039 _igvn._worklist.push(opqzm); | |
2040 } | |
2041 | |
2042 //------------------------------DCE_loop_body---------------------------------- | |
2043 // Remove simplistic dead code from loop body | |
2044 void IdealLoopTree::DCE_loop_body() { | |
2045 for( uint i = 0; i < _body.size(); i++ ) | |
2046 if( _body.at(i)->outcnt() == 0 ) | |
2047 _body.map( i--, _body.pop() ); | |
2048 } | |
2049 | |
2050 | |
2051 //------------------------------adjust_loop_exit_prob-------------------------- | |
2052 // Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage. | |
2053 // Replace with a 1-in-10 exit guess. | |
2054 void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) { | |
2055 Node *test = tail(); | |
2056 while( test != _head ) { | |
2057 uint top = test->Opcode(); | |
2058 if( top == Op_IfTrue || top == Op_IfFalse ) { | |
2059 int test_con = ((ProjNode*)test)->_con; | |
2060 assert(top == (uint)(test_con? Op_IfTrue: Op_IfFalse), "sanity"); | |
2061 IfNode *iff = test->in(0)->as_If(); | |
2062 if( iff->outcnt() == 2 ) { // Ignore dead tests | |
2063 Node *bol = iff->in(1); | |
2064 if( bol && bol->req() > 1 && bol->in(1) && | |
2065 ((bol->in(1)->Opcode() == Op_StorePConditional ) || | |
420
a1980da045cc
6462850: generate biased locking code in C2 ideal graph
kvn
parents:
401
diff
changeset
|
2066 (bol->in(1)->Opcode() == Op_StoreIConditional ) || |
0 | 2067 (bol->in(1)->Opcode() == Op_StoreLConditional ) || |
2068 (bol->in(1)->Opcode() == Op_CompareAndSwapI ) || | |
2069 (bol->in(1)->Opcode() == Op_CompareAndSwapL ) || | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
39
diff
changeset
|
2070 (bol->in(1)->Opcode() == Op_CompareAndSwapP ) || |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
39
diff
changeset
|
2071 (bol->in(1)->Opcode() == Op_CompareAndSwapN ))) |
0 | 2072 return; // Allocation loops RARELY take backedge |
2073 // Find the OTHER exit path from the IF | |
2074 Node* ex = iff->proj_out(1-test_con); | |
2075 float p = iff->_prob; | |
2076 if( !phase->is_member( this, ex ) && iff->_fcnt == COUNT_UNKNOWN ) { | |
2077 if( top == Op_IfTrue ) { | |
2078 if( p < (PROB_FAIR + PROB_UNLIKELY_MAG(3))) { | |
2079 iff->_prob = PROB_STATIC_FREQUENT; | |
2080 } | |
2081 } else { | |
2082 if( p > (PROB_FAIR - PROB_UNLIKELY_MAG(3))) { | |
2083 iff->_prob = PROB_STATIC_INFREQUENT; | |
2084 } | |
2085 } | |
2086 } | |
2087 } | |
2088 } | |
2089 test = phase->idom(test); | |
2090 } | |
2091 } | |
2092 | |
2093 | |
2094 //------------------------------policy_do_remove_empty_loop-------------------- | |
2095 // Micro-benchmark spamming. Policy is to always remove empty loops. | |
2096 // The 'DO' part is to replace the trip counter with the value it will | |
2097 // have on the last iteration. This will break the loop. | |
2098 bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) { | |
2099 // Minimum size must be empty loop | |
2453
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
2100 if (_body.size() > EMPTY_LOOP_SIZE) |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2101 return false; |
0 | 2102 |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2103 if (!_head->is_CountedLoop()) |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2104 return false; // Dead loop |
0 | 2105 CountedLoopNode *cl = _head->as_CountedLoop(); |
3850
6987871cfb9b
7077439: Possible reference through NULL in loopPredicate.cpp:726
kvn
parents:
3845
diff
changeset
|
2106 if (!cl->is_valid_counted_loop()) |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2107 return false; // Malformed loop |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2108 if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue)))) |
0 | 2109 return false; // Infinite loop |
2403 | 2110 |
0 | 2111 #ifdef ASSERT |
2112 // Ensure only one phi which is the iv. | |
2113 Node* iv = NULL; | |
2114 for (DUIterator_Fast imax, i = cl->fast_outs(imax); i < imax; i++) { | |
2115 Node* n = cl->fast_out(i); | |
2116 if (n->Opcode() == Op_Phi) { | |
2117 assert(iv == NULL, "Too many phis" ); | |
2118 iv = n; | |
2119 } | |
2120 } | |
2121 assert(iv == cl->phi(), "Wrong phi" ); | |
2122 #endif | |
2403 | 2123 |
2124 // main and post loops have explicitly created zero trip guard | |
2125 bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop(); | |
2126 if (needs_guard) { | |
2465 | 2127 // Skip guard if values not overlap. |
2128 const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int(); | |
2129 const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int(); | |
2130 int stride_con = cl->stride_con(); | |
2131 if (stride_con > 0) { | |
2132 needs_guard = (init_t->_hi >= limit_t->_lo); | |
2133 } else { | |
2134 needs_guard = (init_t->_lo <= limit_t->_hi); | |
2135 } | |
2136 } | |
2137 if (needs_guard) { | |
2403 | 2138 // Check for an obvious zero trip guard. |
2445 | 2139 Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl)); |
2403 | 2140 if (inctrl->Opcode() == Op_IfTrue) { |
2141 // The test should look like just the backedge of a CountedLoop | |
2142 Node* iff = inctrl->in(0); | |
2143 if (iff->is_If()) { | |
2144 Node* bol = iff->in(1); | |
2145 if (bol->is_Bool() && bol->as_Bool()->_test._test == cl->loopexit()->test_trip()) { | |
2146 Node* cmp = bol->in(1); | |
2147 if (cmp->is_Cmp() && cmp->in(1) == cl->init_trip() && cmp->in(2) == cl->limit()) { | |
2148 needs_guard = false; | |
2149 } | |
2150 } | |
2151 } | |
2152 } | |
2153 } | |
2154 | |
2155 #ifndef PRODUCT | |
2156 if (PrintOpto) { | |
2157 tty->print("Removing empty loop with%s zero trip guard", needs_guard ? "out" : ""); | |
2158 this->dump_head(); | |
2159 } else if (TraceLoopOpts) { | |
2160 tty->print("Empty with%s zero trip guard ", needs_guard ? "out" : ""); | |
2161 this->dump_head(); | |
2162 } | |
2163 #endif | |
2164 | |
2165 if (needs_guard) { | |
2166 // Peel the loop to ensure there's a zero trip guard | |
2167 Node_List old_new; | |
2168 phase->do_peeling(this, old_new); | |
2169 } | |
2170 | |
0 | 2171 // Replace the phi at loop head with the final value of the last |
2172 // iteration. Then the CountedLoopEnd will collapse (backedge never | |
2173 // taken) and all loop-invariant uses of the exit values will be correct. | |
2174 Node *phi = cl->phi(); | |
3345 | 2175 Node *exact_limit = phase->exact_limit(this); |
2176 if (exact_limit != cl->limit()) { | |
2177 // We also need to replace the original limit to collapse loop exit. | |
2178 Node* cmp = cl->loopexit()->cmp_node(); | |
2179 assert(cl->limit() == cmp->in(2), "sanity"); | |
2180 phase->_igvn._worklist.push(cmp->in(2)); // put limit on worklist | |
2181 phase->_igvn.hash_delete(cmp); | |
2182 cmp->set_req(2, exact_limit); | |
2183 phase->_igvn._worklist.push(cmp); // put cmp on worklist | |
2184 } | |
2185 // Note: the final value after increment should not overflow since | |
2186 // counted loop has limit check predicate. | |
2187 Node *final = new (phase->C, 3) SubINode( exact_limit, cl->stride() ); | |
0 | 2188 phase->register_new_node(final,cl->in(LoopNode::EntryControl)); |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
2189 phase->_igvn.replace_node(phi,final); |
0 | 2190 phase->C->set_major_progress(); |
2191 return true; | |
2192 } | |
2193 | |
2465 | 2194 //------------------------------policy_do_one_iteration_loop------------------- |
2195 // Convert one iteration loop into normal code. | |
2196 bool IdealLoopTree::policy_do_one_iteration_loop( PhaseIdealLoop *phase ) { | |
2197 if (!_head->as_Loop()->is_valid_counted_loop()) | |
2198 return false; // Only for counted loop | |
2199 | |
2200 CountedLoopNode *cl = _head->as_CountedLoop(); | |
2201 if (!cl->has_exact_trip_count() || cl->trip_count() != 1) { | |
2202 return false; | |
2203 } | |
2204 | |
2205 #ifndef PRODUCT | |
2206 if(TraceLoopOpts) { | |
2207 tty->print("OneIteration "); | |
2208 this->dump_head(); | |
2209 } | |
2210 #endif | |
2211 | |
2212 Node *init_n = cl->init_trip(); | |
2213 #ifdef ASSERT | |
2214 // Loop boundaries should be constant since trip count is exact. | |
2215 assert(init_n->get_int() + cl->stride_con() >= cl->limit()->get_int(), "should be one iteration"); | |
2216 #endif | |
2217 // Replace the phi at loop head with the value of the init_trip. | |
2218 // Then the CountedLoopEnd will collapse (backedge will not be taken) | |
2219 // and all loop-invariant uses of the exit values will be correct. | |
2220 phase->_igvn.replace_node(cl->phi(), cl->init_trip()); | |
2221 phase->C->set_major_progress(); | |
2222 return true; | |
2223 } | |
0 | 2224 |
2225 //============================================================================= | |
2226 //------------------------------iteration_split_impl--------------------------- | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
2227 bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) { |
2465 | 2228 // Compute exact loop trip count if possible. |
2229 compute_exact_trip_count(phase); | |
2230 | |
2231 // Convert one iteration loop into normal code. | |
2232 if (policy_do_one_iteration_loop(phase)) | |
2233 return true; | |
2234 | |
0 | 2235 // Check and remove empty loops (spam micro-benchmarks) |
2465 | 2236 if (policy_do_remove_empty_loop(phase)) |
1172 | 2237 return true; // Here we removed an empty loop |
0 | 2238 |
2239 bool should_peel = policy_peeling(phase); // Should we peel? | |
2240 | |
2241 bool should_unswitch = policy_unswitching(phase); | |
2242 | |
2243 // Non-counted loops may be peeled; exactly 1 iteration is peeled. | |
2244 // This removes loop-invariant tests (usually null checks). | |
2465 | 2245 if (!_head->is_CountedLoop()) { // Non-counted loop |
0 | 2246 if (PartialPeelLoop && phase->partial_peel(this, old_new)) { |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
2247 // Partial peel succeeded so terminate this round of loop opts |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
2248 return false; |
0 | 2249 } |
2465 | 2250 if (should_peel) { // Should we peel? |
0 | 2251 #ifndef PRODUCT |
2252 if (PrintOpto) tty->print_cr("should_peel"); | |
2253 #endif | |
2254 phase->do_peeling(this,old_new); | |
2465 | 2255 } else if (should_unswitch) { |
0 | 2256 phase->do_unswitching(this, old_new); |
2257 } | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
2258 return true; |
0 | 2259 } |
2260 CountedLoopNode *cl = _head->as_CountedLoop(); | |
2261 | |
3850
6987871cfb9b
7077439: Possible reference through NULL in loopPredicate.cpp:726
kvn
parents:
3845
diff
changeset
|
2262 if (!cl->is_valid_counted_loop()) return true; // Ignore various kinds of broken loops |
0 | 2263 |
2264 // Do nothing special to pre- and post- loops | |
2465 | 2265 if (cl->is_pre_loop() || cl->is_post_loop()) return true; |
0 | 2266 |
2267 // Compute loop trip count from profile data | |
2268 compute_profile_trip_cnt(phase); | |
2269 | |
2270 // Before attempting fancy unrolling, RCE or alignment, see if we want | |
2271 // to completely unroll this loop or do loop unswitching. | |
2465 | 2272 if (cl->is_normal_loop()) { |
789
273b2358ef1a
6837146: Should perform unswitch before maximally unroll in loop transformation
cfang
parents:
605
diff
changeset
|
2273 if (should_unswitch) { |
273b2358ef1a
6837146: Should perform unswitch before maximally unroll in loop transformation
cfang
parents:
605
diff
changeset
|
2274 phase->do_unswitching(this, old_new); |
273b2358ef1a
6837146: Should perform unswitch before maximally unroll in loop transformation
cfang
parents:
605
diff
changeset
|
2275 return true; |
273b2358ef1a
6837146: Should perform unswitch before maximally unroll in loop transformation
cfang
parents:
605
diff
changeset
|
2276 } |
0 | 2277 bool should_maximally_unroll = policy_maximally_unroll(phase); |
2465 | 2278 if (should_maximally_unroll) { |
0 | 2279 // Here we did some unrolling and peeling. Eventually we will |
2280 // completely unroll this loop and it will no longer be a loop. | |
2281 phase->do_maximally_unroll(this,old_new); | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
2282 return true; |
0 | 2283 } |
2284 } | |
2285 | |
2453
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
2286 // Skip next optimizations if running low on nodes. Note that |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
2287 // policy_unswitching and policy_maximally_unroll have this check. |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
2288 uint nodes_left = MaxNodeLimit - phase->C->unique(); |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
2289 if ((2 * _body.size()) > nodes_left) { |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
2290 return true; |
d7a3fed1c1c9
7004547: regular loop unroll should not unroll more than max unrolling
kvn
parents:
2448
diff
changeset
|
2291 } |
0 | 2292 |
2293 // Counted loops may be peeled, may need some iterations run up | |
2294 // front for RCE, and may want to align loop refs to a cache | |
2295 // line. Thus we clone a full loop up front whose trip count is | |
2296 // at least 1 (if peeling), but may be several more. | |
2297 | |
2298 // The main loop will start cache-line aligned with at least 1 | |
2299 // iteration of the unrolled body (zero-trip test required) and | |
2300 // will have some range checks removed. | |
2301 | |
2302 // A post-loop will finish any odd iterations (leftover after | |
2303 // unrolling), plus any needed for RCE purposes. | |
2304 | |
2305 bool should_unroll = policy_unroll(phase); | |
2306 | |
2307 bool should_rce = policy_range_check(phase); | |
2308 | |
2309 bool should_align = policy_align(phase); | |
2310 | |
2311 // If not RCE'ing (iteration splitting) or Aligning, then we do not | |
2312 // need a pre-loop. We may still need to peel an initial iteration but | |
2313 // we will not be needing an unknown number of pre-iterations. | |
2314 // | |
2315 // Basically, if may_rce_align reports FALSE first time through, | |
2316 // we will not be able to later do RCE or Aligning on this loop. | |
2317 bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align; | |
2318 | |
2319 // If we have any of these conditions (RCE, alignment, unrolling) met, then | |
2320 // we switch to the pre-/main-/post-loop model. This model also covers | |
2321 // peeling. | |
2465 | 2322 if (should_rce || should_align || should_unroll) { |
2323 if (cl->is_normal_loop()) // Convert to 'pre/main/post' loops | |
0 | 2324 phase->insert_pre_post_loops(this,old_new, !may_rce_align); |
2325 | |
2326 // Adjust the pre- and main-loop limits to let the pre and post loops run | |
2327 // with full checks, but the main-loop with no checks. Remove said | |
2328 // checks from the main body. | |
2465 | 2329 if (should_rce) |
0 | 2330 phase->do_range_check(this,old_new); |
2331 | |
2332 // Double loop body for unrolling. Adjust the minimum-trip test (will do | |
2333 // twice as many iterations as before) and the main body limit (only do | |
2334 // an even number of trips). If we are peeling, we might enable some RCE | |
2335 // and we'd rather unroll the post-RCE'd loop SO... do not unroll if | |
2336 // peeling. | |
2465 | 2337 if (should_unroll && !should_peel) |
2338 phase->do_unroll(this,old_new, true); | |
0 | 2339 |
2340 // Adjust the pre-loop limits to align the main body | |
2341 // iterations. | |
2465 | 2342 if (should_align) |
0 | 2343 Unimplemented(); |
2344 | |
2345 } else { // Else we have an unchanged counted loop | |
2465 | 2346 if (should_peel) // Might want to peel but do nothing else |
0 | 2347 phase->do_peeling(this,old_new); |
2348 } | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
2349 return true; |
0 | 2350 } |
2351 | |
2352 | |
2353 //============================================================================= | |
2354 //------------------------------iteration_split-------------------------------- | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
2355 bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) { |
0 | 2356 // Recursively iteration split nested loops |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2357 if (_child && !_child->iteration_split(phase, old_new)) |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
2358 return false; |
0 | 2359 |
2360 // Clean out prior deadwood | |
2361 DCE_loop_body(); | |
2362 | |
2363 | |
2364 // Look for loop-exit tests with my 50/50 guesses from the Parsing stage. | |
2365 // Replace with a 1-in-10 exit guess. | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2366 if (_parent /*not the root loop*/ && |
0 | 2367 !_irreducible && |
2368 // Also ignore the occasional dead backedge | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2369 !tail()->is_top()) { |
0 | 2370 adjust_loop_exit_prob(phase); |
2371 } | |
2372 | |
2373 // Gate unrolling, RCE and peeling efforts. | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2374 if (!_child && // If not an inner loop, do not split |
0 | 2375 !_irreducible && |
39
76256d272075
6667612: (Escape Analysis) disable loop cloning if it has a scalar replaceable allocation
kvn
parents:
0
diff
changeset
|
2376 _allow_optimizations && |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2377 !tail()->is_top()) { // Also ignore the occasional dead backedge |
0 | 2378 if (!_has_call) { |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2379 if (!iteration_split_impl(phase, old_new)) { |
1172 | 2380 return false; |
2381 } | |
0 | 2382 } else if (policy_unswitching(phase)) { |
2383 phase->do_unswitching(this, old_new); | |
2384 } | |
2385 } | |
2386 | |
2387 // Minor offset re-organization to remove loop-fallout uses of | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2388 // trip counter when there was no major reshaping. |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2389 phase->reorg_offsets(this); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2390 |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
1972
diff
changeset
|
2391 if (_next && !_next->iteration_split(phase, old_new)) |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
2392 return false; |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
400
diff
changeset
|
2393 return true; |
0 | 2394 } |
1172 | 2395 |
2396 | |
2445 | 2397 //============================================================================= |
1763 | 2398 // Process all the loops in the loop tree and replace any fill |
2399 // patterns with an intrisc version. | |
2400 bool PhaseIdealLoop::do_intrinsify_fill() { | |
2401 bool changed = false; | |
2402 for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { | |
2403 IdealLoopTree* lpt = iter.current(); | |
2404 changed |= intrinsify_fill(lpt); | |
2405 } | |
2406 return changed; | |
2407 } | |
2408 | |
2409 | |
2410 // Examine an inner loop looking for a a single store of an invariant | |
2411 // value in a unit stride loop, | |
2412 bool PhaseIdealLoop::match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value, | |
2413 Node*& shift, Node*& con) { | |
2414 const char* msg = NULL; | |
2415 Node* msg_node = NULL; | |
2416 | |
2417 store_value = NULL; | |
2418 con = NULL; | |
2419 shift = NULL; | |
2420 | |
2421 // Process the loop looking for stores. If there are multiple | |
2422 // stores or extra control flow give at this point. | |
2423 CountedLoopNode* head = lpt->_head->as_CountedLoop(); | |
2424 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) { | |
2425 Node* n = lpt->_body.at(i); | |
2426 if (n->outcnt() == 0) continue; // Ignore dead | |
2427 if (n->is_Store()) { | |
2428 if (store != NULL) { | |
2429 msg = "multiple stores"; | |
2430 break; | |
2431 } | |
2432 int opc = n->Opcode(); | |
2433 if (opc == Op_StoreP || opc == Op_StoreN || opc == Op_StoreCM) { | |
2434 msg = "oop fills not handled"; | |
2435 break; | |
2436 } | |
2437 Node* value = n->in(MemNode::ValueIn); | |
2438 if (!lpt->is_invariant(value)) { | |
2439 msg = "variant store value"; | |
1785
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2440 } else if (!_igvn.type(n->in(MemNode::Address))->isa_aryptr()) { |
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2441 msg = "not array address"; |
1763 | 2442 } |
2443 store = n; | |
2444 store_value = value; | |
2445 } else if (n->is_If() && n != head->loopexit()) { | |
2446 msg = "extra control flow"; | |
2447 msg_node = n; | |
2448 } | |
2449 } | |
2450 | |
2451 if (store == NULL) { | |
2452 // No store in loop | |
2453 return false; | |
2454 } | |
2455 | |
2456 if (msg == NULL && head->stride_con() != 1) { | |
2457 // could handle negative strides too | |
2458 if (head->stride_con() < 0) { | |
2459 msg = "negative stride"; | |
2460 } else { | |
2461 msg = "non-unit stride"; | |
2462 } | |
2463 } | |
2464 | |
2465 if (msg == NULL && !store->in(MemNode::Address)->is_AddP()) { | |
2466 msg = "can't handle store address"; | |
2467 msg_node = store->in(MemNode::Address); | |
2468 } | |
2469 | |
1813
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2470 if (msg == NULL && |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2471 (!store->in(MemNode::Memory)->is_Phi() || |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2472 store->in(MemNode::Memory)->in(LoopNode::LoopBackControl) != store)) { |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2473 msg = "store memory isn't proper phi"; |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2474 msg_node = store->in(MemNode::Memory); |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2475 } |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2476 |
1763 | 2477 // Make sure there is an appropriate fill routine |
2478 BasicType t = store->as_Mem()->memory_type(); | |
2479 const char* fill_name; | |
2480 if (msg == NULL && | |
2481 StubRoutines::select_fill_function(t, false, fill_name) == NULL) { | |
2482 msg = "unsupported store"; | |
2483 msg_node = store; | |
2484 } | |
2485 | |
2486 if (msg != NULL) { | |
2487 #ifndef PRODUCT | |
2488 if (TraceOptimizeFill) { | |
2489 tty->print_cr("not fill intrinsic candidate: %s", msg); | |
2490 if (msg_node != NULL) msg_node->dump(); | |
2491 } | |
2492 #endif | |
2493 return false; | |
2494 } | |
2495 | |
2496 // Make sure the address expression can be handled. It should be | |
2497 // head->phi * elsize + con. head->phi might have a ConvI2L. | |
2498 Node* elements[4]; | |
2499 Node* conv = NULL; | |
1785
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2500 bool found_index = false; |
1763 | 2501 int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements)); |
2502 for (int e = 0; e < count; e++) { | |
2503 Node* n = elements[e]; | |
2504 if (n->is_Con() && con == NULL) { | |
2505 con = n; | |
2506 } else if (n->Opcode() == Op_LShiftX && shift == NULL) { | |
2507 Node* value = n->in(1); | |
2508 #ifdef _LP64 | |
2509 if (value->Opcode() == Op_ConvI2L) { | |
2510 conv = value; | |
2511 value = value->in(1); | |
2512 } | |
2513 #endif | |
2514 if (value != head->phi()) { | |
2515 msg = "unhandled shift in address"; | |
2516 } else { | |
2448
8b2317d732ec
7026957: assert(type2aelembytes(store->as_Mem()->memory_type(), true) == 1 << shift->in(2)->get_int()) failed
never
parents:
2445
diff
changeset
|
2517 if (type2aelembytes(store->as_Mem()->memory_type(), true) != (1 << n->in(2)->get_int())) { |
8b2317d732ec
7026957: assert(type2aelembytes(store->as_Mem()->memory_type(), true) == 1 << shift->in(2)->get_int()) failed
never
parents:
2445
diff
changeset
|
2518 msg = "scale doesn't match"; |
8b2317d732ec
7026957: assert(type2aelembytes(store->as_Mem()->memory_type(), true) == 1 << shift->in(2)->get_int()) failed
never
parents:
2445
diff
changeset
|
2519 } else { |
8b2317d732ec
7026957: assert(type2aelembytes(store->as_Mem()->memory_type(), true) == 1 << shift->in(2)->get_int()) failed
never
parents:
2445
diff
changeset
|
2520 found_index = true; |
8b2317d732ec
7026957: assert(type2aelembytes(store->as_Mem()->memory_type(), true) == 1 << shift->in(2)->get_int()) failed
never
parents:
2445
diff
changeset
|
2521 shift = n; |
8b2317d732ec
7026957: assert(type2aelembytes(store->as_Mem()->memory_type(), true) == 1 << shift->in(2)->get_int()) failed
never
parents:
2445
diff
changeset
|
2522 } |
1763 | 2523 } |
2524 } else if (n->Opcode() == Op_ConvI2L && conv == NULL) { | |
2525 if (n->in(1) == head->phi()) { | |
1785
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2526 found_index = true; |
1763 | 2527 conv = n; |
2528 } else { | |
2529 msg = "unhandled input to ConvI2L"; | |
2530 } | |
2531 } else if (n == head->phi()) { | |
2532 // no shift, check below for allowed cases | |
1785
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2533 found_index = true; |
1763 | 2534 } else { |
2535 msg = "unhandled node in address"; | |
2536 msg_node = n; | |
2537 } | |
2538 } | |
2539 | |
2540 if (count == -1) { | |
2541 msg = "malformed address expression"; | |
2542 msg_node = store; | |
2543 } | |
2544 | |
1785
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2545 if (!found_index) { |
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2546 msg = "missing use of index"; |
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2547 } |
5e4f03302987
6982533: Crash in ~StubRoutines::jbyte_fill with AggressiveOpts enabled
never
parents:
1763
diff
changeset
|
2548 |
1763 | 2549 // byte sized items won't have a shift |
2550 if (msg == NULL && shift == NULL && t != T_BYTE && t != T_BOOLEAN) { | |
2551 msg = "can't find shift"; | |
2552 msg_node = store; | |
2553 } | |
2554 | |
2555 if (msg != NULL) { | |
2556 #ifndef PRODUCT | |
2557 if (TraceOptimizeFill) { | |
2558 tty->print_cr("not fill intrinsic: %s", msg); | |
2559 if (msg_node != NULL) msg_node->dump(); | |
2560 } | |
2561 #endif | |
2562 return false; | |
2563 } | |
2564 | |
2565 // No make sure all the other nodes in the loop can be handled | |
2566 VectorSet ok(Thread::current()->resource_area()); | |
2567 | |
2568 // store related values are ok | |
2569 ok.set(store->_idx); | |
2570 ok.set(store->in(MemNode::Memory)->_idx); | |
2571 | |
2572 // Loop structure is ok | |
2573 ok.set(head->_idx); | |
2574 ok.set(head->loopexit()->_idx); | |
2575 ok.set(head->phi()->_idx); | |
2576 ok.set(head->incr()->_idx); | |
2577 ok.set(head->loopexit()->cmp_node()->_idx); | |
2578 ok.set(head->loopexit()->in(1)->_idx); | |
2579 | |
2580 // Address elements are ok | |
2581 if (con) ok.set(con->_idx); | |
2582 if (shift) ok.set(shift->_idx); | |
2583 if (conv) ok.set(conv->_idx); | |
2584 | |
2585 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) { | |
2586 Node* n = lpt->_body.at(i); | |
2587 if (n->outcnt() == 0) continue; // Ignore dead | |
2588 if (ok.test(n->_idx)) continue; | |
2589 // Backedge projection is ok | |
2590 if (n->is_IfTrue() && n->in(0) == head->loopexit()) continue; | |
2591 if (!n->is_AddP()) { | |
2592 msg = "unhandled node"; | |
2593 msg_node = n; | |
2594 break; | |
2595 } | |
2596 } | |
2597 | |
2598 // Make sure no unexpected values are used outside the loop | |
2599 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) { | |
2600 Node* n = lpt->_body.at(i); | |
2601 // These values can be replaced with other nodes if they are used | |
2602 // outside the loop. | |
1813
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2603 if (n == store || n == head->loopexit() || n == head->incr() || n == store->in(MemNode::Memory)) continue; |
1763 | 2604 for (SimpleDUIterator iter(n); iter.has_next(); iter.next()) { |
2605 Node* use = iter.get(); | |
2606 if (!lpt->_body.contains(use)) { | |
2607 msg = "node is used outside loop"; | |
2608 // lpt->_body.dump(); | |
2609 msg_node = n; | |
2610 break; | |
2611 } | |
2612 } | |
2613 } | |
2614 | |
2615 #ifdef ASSERT | |
2616 if (TraceOptimizeFill) { | |
2617 if (msg != NULL) { | |
2618 tty->print_cr("no fill intrinsic: %s", msg); | |
2619 if (msg_node != NULL) msg_node->dump(); | |
2620 } else { | |
2621 tty->print_cr("fill intrinsic for:"); | |
2622 } | |
2623 store->dump(); | |
2624 if (Verbose) { | |
2625 lpt->_body.dump(); | |
2626 } | |
2627 } | |
2628 #endif | |
2629 | |
2630 return msg == NULL; | |
2631 } | |
2632 | |
2633 | |
2634 | |
2635 bool PhaseIdealLoop::intrinsify_fill(IdealLoopTree* lpt) { | |
2636 // Only for counted inner loops | |
2637 if (!lpt->is_counted() || !lpt->is_inner()) { | |
2638 return false; | |
2639 } | |
2640 | |
2641 // Must have constant stride | |
2642 CountedLoopNode* head = lpt->_head->as_CountedLoop(); | |
3850
6987871cfb9b
7077439: Possible reference through NULL in loopPredicate.cpp:726
kvn
parents:
3845
diff
changeset
|
2643 if (!head->is_valid_counted_loop() || !head->is_normal_loop()) { |
1763 | 2644 return false; |
2645 } | |
2646 | |
2647 // Check that the body only contains a store of a loop invariant | |
2648 // value that is indexed by the loop phi. | |
2649 Node* store = NULL; | |
2650 Node* store_value = NULL; | |
2651 Node* shift = NULL; | |
2652 Node* offset = NULL; | |
2653 if (!match_fill_loop(lpt, store, store_value, shift, offset)) { | |
2654 return false; | |
2655 } | |
2656 | |
2445 | 2657 #ifndef PRODUCT |
2658 if (TraceLoopOpts) { | |
2659 tty->print("ArrayFill "); | |
2660 lpt->dump_head(); | |
2661 } | |
2662 #endif | |
2663 | |
1763 | 2664 // Now replace the whole loop body by a call to a fill routine that |
2665 // covers the same region as the loop. | |
2666 Node* base = store->in(MemNode::Address)->as_AddP()->in(AddPNode::Base); | |
2667 | |
2668 // Build an expression for the beginning of the copy region | |
2669 Node* index = head->init_trip(); | |
2670 #ifdef _LP64 | |
2671 index = new (C, 2) ConvI2LNode(index); | |
2672 _igvn.register_new_node_with_optimizer(index); | |
2673 #endif | |
2674 if (shift != NULL) { | |
2675 // byte arrays don't require a shift but others do. | |
2676 index = new (C, 3) LShiftXNode(index, shift->in(2)); | |
2677 _igvn.register_new_node_with_optimizer(index); | |
2678 } | |
2679 index = new (C, 4) AddPNode(base, base, index); | |
2680 _igvn.register_new_node_with_optimizer(index); | |
2681 Node* from = new (C, 4) AddPNode(base, index, offset); | |
2682 _igvn.register_new_node_with_optimizer(from); | |
2683 // Compute the number of elements to copy | |
2684 Node* len = new (C, 3) SubINode(head->limit(), head->init_trip()); | |
2685 _igvn.register_new_node_with_optimizer(len); | |
2686 | |
2687 BasicType t = store->as_Mem()->memory_type(); | |
2688 bool aligned = false; | |
2689 if (offset != NULL && head->init_trip()->is_Con()) { | |
2690 int element_size = type2aelembytes(t); | |
2691 aligned = (offset->find_intptr_t_type()->get_con() + head->init_trip()->get_int() * element_size) % HeapWordSize == 0; | |
2692 } | |
2693 | |
2694 // Build a call to the fill routine | |
2695 const char* fill_name; | |
2696 address fill = StubRoutines::select_fill_function(t, aligned, fill_name); | |
2697 assert(fill != NULL, "what?"); | |
2698 | |
2699 // Convert float/double to int/long for fill routines | |
2700 if (t == T_FLOAT) { | |
2701 store_value = new (C, 2) MoveF2INode(store_value); | |
2702 _igvn.register_new_node_with_optimizer(store_value); | |
2703 } else if (t == T_DOUBLE) { | |
2704 store_value = new (C, 2) MoveD2LNode(store_value); | |
2705 _igvn.register_new_node_with_optimizer(store_value); | |
2706 } | |
2707 | |
2708 Node* mem_phi = store->in(MemNode::Memory); | |
2709 Node* result_ctrl; | |
2710 Node* result_mem; | |
2711 const TypeFunc* call_type = OptoRuntime::array_fill_Type(); | |
2712 int size = call_type->domain()->cnt(); | |
2713 CallLeafNode *call = new (C, size) CallLeafNoFPNode(call_type, fill, | |
2714 fill_name, TypeAryPtr::get_array_body_type(t)); | |
2715 call->init_req(TypeFunc::Parms+0, from); | |
2716 call->init_req(TypeFunc::Parms+1, store_value); | |
1844
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2717 #ifdef _LP64 |
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2718 len = new (C, 2) ConvI2LNode(len); |
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2719 _igvn.register_new_node_with_optimizer(len); |
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2720 #endif |
1763 | 2721 call->init_req(TypeFunc::Parms+2, len); |
1844
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2722 #ifdef _LP64 |
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2723 call->init_req(TypeFunc::Parms+3, C->top()); |
75588558f1bf
6980792: Crash "exception happened outside interpreter, nmethods and vtable stubs (1)"
never
parents:
1813
diff
changeset
|
2724 #endif |
1763 | 2725 call->init_req( TypeFunc::Control, head->init_control()); |
2726 call->init_req( TypeFunc::I_O , C->top() ) ; // does no i/o | |
2727 call->init_req( TypeFunc::Memory , mem_phi->in(LoopNode::EntryControl) ); | |
2728 call->init_req( TypeFunc::ReturnAdr, C->start()->proj_out(TypeFunc::ReturnAdr) ); | |
2729 call->init_req( TypeFunc::FramePtr, C->start()->proj_out(TypeFunc::FramePtr) ); | |
2730 _igvn.register_new_node_with_optimizer(call); | |
2731 result_ctrl = new (C, 1) ProjNode(call,TypeFunc::Control); | |
2732 _igvn.register_new_node_with_optimizer(result_ctrl); | |
2733 result_mem = new (C, 1) ProjNode(call,TypeFunc::Memory); | |
2734 _igvn.register_new_node_with_optimizer(result_mem); | |
2735 | |
2736 // If this fill is tightly coupled to an allocation and overwrites | |
2737 // the whole body, allow it to take over the zeroing. | |
2738 AllocateNode* alloc = AllocateNode::Ideal_allocation(base, this); | |
2739 if (alloc != NULL && alloc->is_AllocateArray()) { | |
2740 Node* length = alloc->as_AllocateArray()->Ideal_length(); | |
2741 if (head->limit() == length && | |
2742 head->init_trip() == _igvn.intcon(0)) { | |
2743 if (TraceOptimizeFill) { | |
2744 tty->print_cr("Eliminated zeroing in allocation"); | |
2745 } | |
2746 alloc->maybe_set_complete(&_igvn); | |
2747 } else { | |
2748 #ifdef ASSERT | |
2749 if (TraceOptimizeFill) { | |
2750 tty->print_cr("filling array but bounds don't match"); | |
2751 alloc->dump(); | |
2752 head->init_trip()->dump(); | |
2753 head->limit()->dump(); | |
2754 length->dump(); | |
2755 } | |
2756 #endif | |
2757 } | |
2758 } | |
2759 | |
2760 // Redirect the old control and memory edges that are outside the loop. | |
2761 Node* exit = head->loopexit()->proj_out(0); | |
1813
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2762 // Sometimes the memory phi of the head is used as the outgoing |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2763 // state of the loop. It's safe in this case to replace it with the |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2764 // result_mem. |
c77e8f982901
6984979: OptimizeFill misses some cases with an odd memory graph
never
parents:
1785
diff
changeset
|
2765 _igvn.replace_node(store->in(MemNode::Memory), result_mem); |
1763 | 2766 _igvn.replace_node(exit, result_ctrl); |
2767 _igvn.replace_node(store, result_mem); | |
2768 // Any uses the increment outside of the loop become the loop limit. | |
2769 _igvn.replace_node(head->incr(), head->limit()); | |
2770 | |
2771 // Disconnect the head from the loop. | |
2772 for (uint i = 0; i < lpt->_body.size(); i++) { | |
2773 Node* n = lpt->_body.at(i); | |
2774 _igvn.replace_node(n, C->top()); | |
2775 } | |
2776 | |
2777 return true; | |
2778 } |