Mercurial > hg > truffle
annotate src/share/vm/opto/loopopts.cpp @ 1994:6cd6d394f280
7001033: assert(gch->gc_cause() == GCCause::_scavenge_alot || !gch->incremental_collection_failed())
7002546: regression on SpecJbb2005 on 7b118 comparing to 7b117 on small heaps
Summary: Relaxed assertion checking related to incremental_collection_failed flag to allow for ExplicitGCInvokesConcurrent behaviour where we do not want a failing scavenge to bail to a stop-world collection. Parameterized incremental_collection_will_fail() so we can selectively use, or not use, as appropriate, the statistical prediction at specific use sites. This essentially reverts the scavenge bail-out logic to what it was prior to some recent changes that had inadvertently started using the statistical prediction which can be noisy in the presence of bursty loads. Added some associated verbose non-product debugging messages.
Reviewed-by: johnc, tonyp
author | ysr |
---|---|
date | Tue, 07 Dec 2010 21:55:53 -0800 |
parents | f95d63e2154a |
children | 9dc311b8473e |
rev | line source |
---|---|
0 | 1 /* |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1254
diff
changeset
|
2 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1254
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1254
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1254
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "memory/allocation.inline.hpp" | |
27 #include "opto/addnode.hpp" | |
28 #include "opto/connode.hpp" | |
29 #include "opto/divnode.hpp" | |
30 #include "opto/loopnode.hpp" | |
31 #include "opto/mulnode.hpp" | |
32 #include "opto/rootnode.hpp" | |
33 #include "opto/subnode.hpp" | |
0 | 34 |
35 //============================================================================= | |
36 //------------------------------split_thru_phi--------------------------------- | |
37 // Split Node 'n' through merge point if there is enough win. | |
38 Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) { | |
69 | 39 if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) { |
40 // ConvI2L may have type information on it which is unsafe to push up | |
41 // so disable this for now | |
42 return NULL; | |
43 } | |
0 | 44 int wins = 0; |
45 assert( !n->is_CFG(), "" ); | |
46 assert( region->is_Region(), "" ); | |
64
b8f5ba577b02
6673473: (Escape Analysis) Add the instance's field information to PhiNode
kvn
parents:
35
diff
changeset
|
47 |
b8f5ba577b02
6673473: (Escape Analysis) Add the instance's field information to PhiNode
kvn
parents:
35
diff
changeset
|
48 const Type* type = n->bottom_type(); |
b8f5ba577b02
6673473: (Escape Analysis) Add the instance's field information to PhiNode
kvn
parents:
35
diff
changeset
|
49 const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr(); |
b8f5ba577b02
6673473: (Escape Analysis) Add the instance's field information to PhiNode
kvn
parents:
35
diff
changeset
|
50 Node *phi; |
223 | 51 if( t_oop != NULL && t_oop->is_known_instance_field() ) { |
64
b8f5ba577b02
6673473: (Escape Analysis) Add the instance's field information to PhiNode
kvn
parents:
35
diff
changeset
|
52 int iid = t_oop->instance_id(); |
b8f5ba577b02
6673473: (Escape Analysis) Add the instance's field information to PhiNode
kvn
parents:
35
diff
changeset
|
53 int index = C->get_alias_index(t_oop); |
b8f5ba577b02
6673473: (Escape Analysis) Add the instance's field information to PhiNode
kvn
parents:
35
diff
changeset
|
54 int offset = t_oop->offset(); |
b8f5ba577b02
6673473: (Escape Analysis) Add the instance's field information to PhiNode
kvn
parents:
35
diff
changeset
|
55 phi = new (C,region->req()) PhiNode(region, type, NULL, iid, index, offset); |
b8f5ba577b02
6673473: (Escape Analysis) Add the instance's field information to PhiNode
kvn
parents:
35
diff
changeset
|
56 } else { |
1254
4ee1c645110e
6924097: assert((_type == Type::MEMORY) == (_adr_type != 0),"adr_type for memory phis only")
kvn
parents:
866
diff
changeset
|
57 phi = PhiNode::make_blank(region, n); |
64
b8f5ba577b02
6673473: (Escape Analysis) Add the instance's field information to PhiNode
kvn
parents:
35
diff
changeset
|
58 } |
0 | 59 uint old_unique = C->unique(); |
60 for( uint i = 1; i < region->req(); i++ ) { | |
61 Node *x; | |
62 Node* the_clone = NULL; | |
63 if( region->in(i) == C->top() ) { | |
64 x = C->top(); // Dead path? Use a dead data op | |
65 } else { | |
66 x = n->clone(); // Else clone up the data op | |
67 the_clone = x; // Remember for possible deletion. | |
68 // Alter data node to use pre-phi inputs | |
69 if( n->in(0) == region ) | |
70 x->set_req( 0, region->in(i) ); | |
71 for( uint j = 1; j < n->req(); j++ ) { | |
72 Node *in = n->in(j); | |
73 if( in->is_Phi() && in->in(0) == region ) | |
74 x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone | |
75 } | |
76 } | |
77 // Check for a 'win' on some paths | |
78 const Type *t = x->Value(&_igvn); | |
79 | |
80 bool singleton = t->singleton(); | |
81 | |
82 // A TOP singleton indicates that there are no possible values incoming | |
83 // along a particular edge. In most cases, this is OK, and the Phi will | |
84 // be eliminated later in an Ideal call. However, we can't allow this to | |
85 // happen if the singleton occurs on loop entry, as the elimination of | |
86 // the PhiNode may cause the resulting node to migrate back to a previous | |
87 // loop iteration. | |
88 if( singleton && t == Type::TOP ) { | |
89 // Is_Loop() == false does not confirm the absence of a loop (e.g., an | |
90 // irreducible loop may not be indicated by an affirmative is_Loop()); | |
91 // therefore, the only top we can split thru a phi is on a backedge of | |
92 // a loop. | |
93 singleton &= region->is_Loop() && (i != LoopNode::EntryControl); | |
94 } | |
95 | |
96 if( singleton ) { | |
97 wins++; | |
98 x = ((PhaseGVN&)_igvn).makecon(t); | |
99 } else { | |
100 // We now call Identity to try to simplify the cloned node. | |
101 // Note that some Identity methods call phase->type(this). | |
102 // Make sure that the type array is big enough for | |
103 // our new node, even though we may throw the node away. | |
104 // (Note: This tweaking with igvn only works because x is a new node.) | |
105 _igvn.set_type(x, t); | |
293
c3e045194476
6731641: assert(m->adr_type() == mach->adr_type(),"matcher should not change adr type")
kvn
parents:
253
diff
changeset
|
106 // If x is a TypeNode, capture any more-precise type permanently into Node |
605 | 107 // otherwise it will be not updated during igvn->transform since |
293
c3e045194476
6731641: assert(m->adr_type() == mach->adr_type(),"matcher should not change adr type")
kvn
parents:
253
diff
changeset
|
108 // igvn->type(x) is set to x->Value() already. |
c3e045194476
6731641: assert(m->adr_type() == mach->adr_type(),"matcher should not change adr type")
kvn
parents:
253
diff
changeset
|
109 x->raise_bottom_type(t); |
0 | 110 Node *y = x->Identity(&_igvn); |
111 if( y != x ) { | |
112 wins++; | |
113 x = y; | |
114 } else { | |
115 y = _igvn.hash_find(x); | |
116 if( y ) { | |
117 wins++; | |
118 x = y; | |
119 } else { | |
120 // Else x is a new node we are keeping | |
121 // We do not need register_new_node_with_optimizer | |
122 // because set_type has already been called. | |
123 _igvn._worklist.push(x); | |
124 } | |
125 } | |
126 } | |
127 if (x != the_clone && the_clone != NULL) | |
128 _igvn.remove_dead_node(the_clone); | |
129 phi->set_req( i, x ); | |
130 } | |
131 // Too few wins? | |
132 if( wins <= policy ) { | |
133 _igvn.remove_dead_node(phi); | |
134 return NULL; | |
135 } | |
136 | |
137 // Record Phi | |
138 register_new_node( phi, region ); | |
139 | |
140 for( uint i2 = 1; i2 < phi->req(); i2++ ) { | |
141 Node *x = phi->in(i2); | |
142 // If we commoned up the cloned 'x' with another existing Node, | |
143 // the existing Node picks up a new use. We need to make the | |
144 // existing Node occur higher up so it dominates its uses. | |
145 Node *old_ctrl; | |
146 IdealLoopTree *old_loop; | |
147 | |
148 // The occasional new node | |
149 if( x->_idx >= old_unique ) { // Found a new, unplaced node? | |
150 old_ctrl = x->is_Con() ? C->root() : NULL; | |
151 old_loop = NULL; // Not in any prior loop | |
152 } else { | |
153 old_ctrl = x->is_Con() ? C->root() : get_ctrl(x); | |
154 old_loop = get_loop(old_ctrl); // Get prior loop | |
155 } | |
156 // New late point must dominate new use | |
157 Node *new_ctrl = dom_lca( old_ctrl, region->in(i2) ); | |
158 // Set new location | |
159 set_ctrl(x, new_ctrl); | |
160 IdealLoopTree *new_loop = get_loop( new_ctrl ); | |
161 // If changing loop bodies, see if we need to collect into new body | |
162 if( old_loop != new_loop ) { | |
163 if( old_loop && !old_loop->_child ) | |
164 old_loop->_body.yank(x); | |
165 if( !new_loop->_child ) | |
166 new_loop->_body.push(x); // Collect body info | |
167 } | |
168 } | |
169 | |
170 return phi; | |
171 } | |
172 | |
173 //------------------------------dominated_by------------------------------------ | |
174 // Replace the dominated test with an obvious true or false. Place it on the | |
175 // IGVN worklist for later cleanup. Move control-dependent data Nodes on the | |
176 // live path up to the dominating control. | |
177 void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff ) { | |
178 #ifndef PRODUCT | |
179 if( VerifyLoopOptimizations && PrintOpto ) tty->print_cr("dominating test"); | |
180 #endif | |
181 | |
182 | |
183 // prevdom is the dominating projection of the dominating test. | |
184 assert( iff->is_If(), "" ); | |
185 assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added"); | |
186 int pop = prevdom->Opcode(); | |
187 assert( pop == Op_IfFalse || pop == Op_IfTrue, "" ); | |
188 // 'con' is set to true or false to kill the dominated test. | |
189 Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO); | |
190 set_ctrl(con, C->root()); // Constant gets a new use | |
191 // Hack the dominated test | |
192 _igvn.hash_delete(iff); | |
193 iff->set_req(1, con); | |
194 _igvn._worklist.push(iff); | |
195 | |
196 // If I dont have a reachable TRUE and FALSE path following the IfNode then | |
197 // I can assume this path reaches an infinite loop. In this case it's not | |
198 // important to optimize the data Nodes - either the whole compilation will | |
199 // be tossed or this path (and all data Nodes) will go dead. | |
200 if( iff->outcnt() != 2 ) return; | |
201 | |
202 // Make control-dependent data Nodes on the live path (path that will remain | |
203 // once the dominated IF is removed) become control-dependent on the | |
204 // dominating projection. | |
205 Node* dp = ((IfNode*)iff)->proj_out(pop == Op_IfTrue); | |
206 IdealLoopTree *old_loop = get_loop(dp); | |
207 | |
208 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) { | |
209 Node* cd = dp->fast_out(i); // Control-dependent node | |
210 if( cd->depends_only_on_test() ) { | |
211 assert( cd->in(0) == dp, "" ); | |
212 _igvn.hash_delete( cd ); | |
213 cd->set_req(0, prevdom); | |
214 set_early_ctrl( cd ); | |
215 _igvn._worklist.push(cd); | |
216 IdealLoopTree *new_loop = get_loop(get_ctrl(cd)); | |
217 if( old_loop != new_loop ) { | |
218 if( !old_loop->_child ) old_loop->_body.yank(cd); | |
219 if( !new_loop->_child ) new_loop->_body.push(cd); | |
220 } | |
221 --i; | |
222 --imax; | |
223 } | |
224 } | |
225 } | |
226 | |
227 //------------------------------has_local_phi_input---------------------------- | |
228 // Return TRUE if 'n' has Phi inputs from its local block and no other | |
229 // block-local inputs (all non-local-phi inputs come from earlier blocks) | |
230 Node *PhaseIdealLoop::has_local_phi_input( Node *n ) { | |
231 Node *n_ctrl = get_ctrl(n); | |
232 // See if some inputs come from a Phi in this block, or from before | |
233 // this block. | |
234 uint i; | |
235 for( i = 1; i < n->req(); i++ ) { | |
236 Node *phi = n->in(i); | |
237 if( phi->is_Phi() && phi->in(0) == n_ctrl ) | |
238 break; | |
239 } | |
240 if( i >= n->req() ) | |
241 return NULL; // No Phi inputs; nowhere to clone thru | |
242 | |
243 // Check for inputs created between 'n' and the Phi input. These | |
244 // must split as well; they have already been given the chance | |
245 // (courtesy of a post-order visit) and since they did not we must | |
246 // recover the 'cost' of splitting them by being very profitable | |
247 // when splitting 'n'. Since this is unlikely we simply give up. | |
248 for( i = 1; i < n->req(); i++ ) { | |
249 Node *m = n->in(i); | |
250 if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) { | |
251 // We allow the special case of AddP's with no local inputs. | |
252 // This allows us to split-up address expressions. | |
253 if (m->is_AddP() && | |
254 get_ctrl(m->in(2)) != n_ctrl && | |
255 get_ctrl(m->in(3)) != n_ctrl) { | |
256 // Move the AddP up to dominating point | |
257 set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl))); | |
258 continue; | |
259 } | |
260 return NULL; | |
261 } | |
262 } | |
263 | |
264 return n_ctrl; | |
265 } | |
266 | |
267 //------------------------------remix_address_expressions---------------------- | |
268 // Rework addressing expressions to get the most loop-invariant stuff | |
269 // moved out. We'd like to do all associative operators, but it's especially | |
270 // important (common) to do address expressions. | |
271 Node *PhaseIdealLoop::remix_address_expressions( Node *n ) { | |
272 if (!has_ctrl(n)) return NULL; | |
273 Node *n_ctrl = get_ctrl(n); | |
274 IdealLoopTree *n_loop = get_loop(n_ctrl); | |
275 | |
276 // See if 'n' mixes loop-varying and loop-invariant inputs and | |
277 // itself is loop-varying. | |
278 | |
279 // Only interested in binary ops (and AddP) | |
280 if( n->req() < 3 || n->req() > 4 ) return NULL; | |
281 | |
282 Node *n1_ctrl = get_ctrl(n->in( 1)); | |
283 Node *n2_ctrl = get_ctrl(n->in( 2)); | |
284 Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3)); | |
285 IdealLoopTree *n1_loop = get_loop( n1_ctrl ); | |
286 IdealLoopTree *n2_loop = get_loop( n2_ctrl ); | |
287 IdealLoopTree *n3_loop = get_loop( n3_ctrl ); | |
288 | |
289 // Does one of my inputs spin in a tighter loop than self? | |
290 if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) || | |
291 (n_loop->is_member( n2_loop ) && n_loop != n2_loop) || | |
292 (n_loop->is_member( n3_loop ) && n_loop != n3_loop) ) | |
293 return NULL; // Leave well enough alone | |
294 | |
295 // Is at least one of my inputs loop-invariant? | |
296 if( n1_loop == n_loop && | |
297 n2_loop == n_loop && | |
298 n3_loop == n_loop ) | |
299 return NULL; // No loop-invariant inputs | |
300 | |
301 | |
302 int n_op = n->Opcode(); | |
303 | |
304 // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2). | |
305 if( n_op == Op_LShiftI ) { | |
306 // Scale is loop invariant | |
307 Node *scale = n->in(2); | |
308 Node *scale_ctrl = get_ctrl(scale); | |
309 IdealLoopTree *scale_loop = get_loop(scale_ctrl ); | |
310 if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) ) | |
311 return NULL; | |
312 const TypeInt *scale_t = scale->bottom_type()->isa_int(); | |
313 if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 ) | |
314 return NULL; // Dont bother with byte/short masking | |
315 // Add must vary with loop (else shift would be loop-invariant) | |
316 Node *add = n->in(1); | |
317 Node *add_ctrl = get_ctrl(add); | |
318 IdealLoopTree *add_loop = get_loop(add_ctrl); | |
319 //assert( n_loop == add_loop, "" ); | |
320 if( n_loop != add_loop ) return NULL; // happens w/ evil ZKM loops | |
321 | |
322 // Convert I-V into I+ (0-V); same for V-I | |
323 if( add->Opcode() == Op_SubI && | |
324 _igvn.type( add->in(1) ) != TypeInt::ZERO ) { | |
325 Node *zero = _igvn.intcon(0); | |
326 set_ctrl(zero, C->root()); | |
327 Node *neg = new (C, 3) SubINode( _igvn.intcon(0), add->in(2) ); | |
328 register_new_node( neg, get_ctrl(add->in(2) ) ); | |
329 add = new (C, 3) AddINode( add->in(1), neg ); | |
330 register_new_node( add, add_ctrl ); | |
331 } | |
332 if( add->Opcode() != Op_AddI ) return NULL; | |
333 // See if one add input is loop invariant | |
334 Node *add_var = add->in(1); | |
335 Node *add_var_ctrl = get_ctrl(add_var); | |
336 IdealLoopTree *add_var_loop = get_loop(add_var_ctrl ); | |
337 Node *add_invar = add->in(2); | |
338 Node *add_invar_ctrl = get_ctrl(add_invar); | |
339 IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl ); | |
340 if( add_var_loop == n_loop ) { | |
341 } else if( add_invar_loop == n_loop ) { | |
342 // Swap to find the invariant part | |
343 add_invar = add_var; | |
344 add_invar_ctrl = add_var_ctrl; | |
345 add_invar_loop = add_var_loop; | |
346 add_var = add->in(2); | |
347 Node *add_var_ctrl = get_ctrl(add_var); | |
348 IdealLoopTree *add_var_loop = get_loop(add_var_ctrl ); | |
349 } else // Else neither input is loop invariant | |
350 return NULL; | |
351 if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) ) | |
352 return NULL; // No invariant part of the add? | |
353 | |
354 // Yes! Reshape address expression! | |
355 Node *inv_scale = new (C, 3) LShiftINode( add_invar, scale ); | |
850
fd50a67f97d1
6860469: remix_address_expressions sets incorrect control causing crash in split_if_with_block_post
never
parents:
834
diff
changeset
|
356 Node *inv_scale_ctrl = |
fd50a67f97d1
6860469: remix_address_expressions sets incorrect control causing crash in split_if_with_block_post
never
parents:
834
diff
changeset
|
357 dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ? |
fd50a67f97d1
6860469: remix_address_expressions sets incorrect control causing crash in split_if_with_block_post
never
parents:
834
diff
changeset
|
358 add_invar_ctrl : scale_ctrl; |
fd50a67f97d1
6860469: remix_address_expressions sets incorrect control causing crash in split_if_with_block_post
never
parents:
834
diff
changeset
|
359 register_new_node( inv_scale, inv_scale_ctrl ); |
0 | 360 Node *var_scale = new (C, 3) LShiftINode( add_var, scale ); |
361 register_new_node( var_scale, n_ctrl ); | |
362 Node *var_add = new (C, 3) AddINode( var_scale, inv_scale ); | |
363 register_new_node( var_add, n_ctrl ); | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
364 _igvn.replace_node( n, var_add ); |
0 | 365 return var_add; |
366 } | |
367 | |
368 // Replace (I+V) with (V+I) | |
369 if( n_op == Op_AddI || | |
370 n_op == Op_AddL || | |
371 n_op == Op_AddF || | |
372 n_op == Op_AddD || | |
373 n_op == Op_MulI || | |
374 n_op == Op_MulL || | |
375 n_op == Op_MulF || | |
376 n_op == Op_MulD ) { | |
377 if( n2_loop == n_loop ) { | |
378 assert( n1_loop != n_loop, "" ); | |
379 n->swap_edges(1, 2); | |
380 } | |
381 } | |
382 | |
383 // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V), | |
384 // but not if I2 is a constant. | |
385 if( n_op == Op_AddP ) { | |
386 if( n2_loop == n_loop && n3_loop != n_loop ) { | |
387 if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) { | |
388 Node *n22_ctrl = get_ctrl(n->in(2)->in(2)); | |
389 Node *n23_ctrl = get_ctrl(n->in(2)->in(3)); | |
390 IdealLoopTree *n22loop = get_loop( n22_ctrl ); | |
391 IdealLoopTree *n23_loop = get_loop( n23_ctrl ); | |
392 if( n22loop != n_loop && n22loop->is_member(n_loop) && | |
393 n23_loop == n_loop ) { | |
394 Node *add1 = new (C, 4) AddPNode( n->in(1), n->in(2)->in(2), n->in(3) ); | |
395 // Stuff new AddP in the loop preheader | |
396 register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) ); | |
397 Node *add2 = new (C, 4) AddPNode( n->in(1), add1, n->in(2)->in(3) ); | |
398 register_new_node( add2, n_ctrl ); | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
399 _igvn.replace_node( n, add2 ); |
0 | 400 return add2; |
401 } | |
402 } | |
403 } | |
404 | |
405 // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V) | |
406 if( n2_loop != n_loop && n3_loop == n_loop ) { | |
407 if( n->in(3)->Opcode() == Op_AddI ) { | |
408 Node *V = n->in(3)->in(1); | |
409 Node *I = n->in(3)->in(2); | |
410 if( is_member(n_loop,get_ctrl(V)) ) { | |
411 } else { | |
412 Node *tmp = V; V = I; I = tmp; | |
413 } | |
414 if( !is_member(n_loop,get_ctrl(I)) ) { | |
415 Node *add1 = new (C, 4) AddPNode( n->in(1), n->in(2), I ); | |
416 // Stuff new AddP in the loop preheader | |
417 register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) ); | |
418 Node *add2 = new (C, 4) AddPNode( n->in(1), add1, V ); | |
419 register_new_node( add2, n_ctrl ); | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
420 _igvn.replace_node( n, add2 ); |
0 | 421 return add2; |
422 } | |
423 } | |
424 } | |
425 } | |
426 | |
427 return NULL; | |
428 } | |
429 | |
430 //------------------------------conditional_move------------------------------- | |
431 // Attempt to replace a Phi with a conditional move. We have some pretty | |
432 // strict profitability requirements. All Phis at the merge point must | |
433 // be converted, so we can remove the control flow. We need to limit the | |
434 // number of c-moves to a small handful. All code that was in the side-arms | |
435 // of the CFG diamond is now speculatively executed. This code has to be | |
436 // "cheap enough". We are pretty much limited to CFG diamonds that merge | |
437 // 1 or 2 items with a total of 1 or 2 ops executed speculatively. | |
438 Node *PhaseIdealLoop::conditional_move( Node *region ) { | |
439 | |
440 assert( region->is_Region(), "sanity check" ); | |
441 if( region->req() != 3 ) return NULL; | |
442 | |
443 // Check for CFG diamond | |
444 Node *lp = region->in(1); | |
445 Node *rp = region->in(2); | |
446 if( !lp || !rp ) return NULL; | |
447 Node *lp_c = lp->in(0); | |
448 if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL; | |
449 IfNode *iff = lp_c->as_If(); | |
450 | |
451 // Check for highly predictable branch. No point in CMOV'ing if | |
452 // we are going to predict accurately all the time. | |
453 // %%% This hides patterns produced by utility methods like Math.min. | |
454 if( iff->_prob < PROB_UNLIKELY_MAG(3) || | |
455 iff->_prob > PROB_LIKELY_MAG(3) ) | |
456 return NULL; | |
457 | |
458 // Check for ops pinned in an arm of the diamond. | |
459 // Can't remove the control flow in this case | |
460 if( lp->outcnt() > 1 ) return NULL; | |
461 if( rp->outcnt() > 1 ) return NULL; | |
462 | |
463 // Check profitability | |
464 int cost = 0; | |
35
e2ae28d2ce91
6667588: Don't generate duplicated CMP for float/double values
kvn
parents:
0
diff
changeset
|
465 int phis = 0; |
0 | 466 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { |
467 Node *out = region->fast_out(i); | |
468 if( !out->is_Phi() ) continue; // Ignore other control edges, etc | |
35
e2ae28d2ce91
6667588: Don't generate duplicated CMP for float/double values
kvn
parents:
0
diff
changeset
|
469 phis++; |
0 | 470 PhiNode* phi = out->as_Phi(); |
471 switch (phi->type()->basic_type()) { | |
472 case T_LONG: | |
473 cost++; // Probably encodes as 2 CMOV's | |
474 case T_INT: // These all CMOV fine | |
475 case T_FLOAT: | |
476 case T_DOUBLE: | |
477 case T_ADDRESS: // (RawPtr) | |
478 cost++; | |
479 break; | |
293
c3e045194476
6731641: assert(m->adr_type() == mach->adr_type(),"matcher should not change adr type")
kvn
parents:
253
diff
changeset
|
480 case T_NARROWOOP: // Fall through |
0 | 481 case T_OBJECT: { // Base oops are OK, but not derived oops |
293
c3e045194476
6731641: assert(m->adr_type() == mach->adr_type(),"matcher should not change adr type")
kvn
parents:
253
diff
changeset
|
482 const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr(); |
0 | 483 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a |
484 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus | |
485 // CMOVE'ing a derived pointer requires we also CMOVE the base. If we | |
486 // have a Phi for the base here that we convert to a CMOVE all is well | |
487 // and good. But if the base is dead, we'll not make a CMOVE. Later | |
488 // the allocator will have to produce a base by creating a CMOVE of the | |
489 // relevant bases. This puts the allocator in the business of | |
490 // manufacturing expensive instructions, generally a bad plan. | |
491 // Just Say No to Conditionally-Moved Derived Pointers. | |
492 if( tp && tp->offset() != 0 ) | |
493 return NULL; | |
494 cost++; | |
495 break; | |
496 } | |
497 default: | |
498 return NULL; // In particular, can't do memory or I/O | |
499 } | |
500 // Add in cost any speculative ops | |
501 for( uint j = 1; j < region->req(); j++ ) { | |
502 Node *proj = region->in(j); | |
503 Node *inp = phi->in(j); | |
504 if (get_ctrl(inp) == proj) { // Found local op | |
505 cost++; | |
506 // Check for a chain of dependent ops; these will all become | |
507 // speculative in a CMOV. | |
508 for( uint k = 1; k < inp->req(); k++ ) | |
509 if (get_ctrl(inp->in(k)) == proj) | |
510 return NULL; // Too much speculative goo | |
511 } | |
512 } | |
293
c3e045194476
6731641: assert(m->adr_type() == mach->adr_type(),"matcher should not change adr type")
kvn
parents:
253
diff
changeset
|
513 // See if the Phi is used by a Cmp or Narrow oop Decode/Encode. |
c3e045194476
6731641: assert(m->adr_type() == mach->adr_type(),"matcher should not change adr type")
kvn
parents:
253
diff
changeset
|
514 // This will likely Split-If, a higher-payoff operation. |
0 | 515 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) { |
516 Node* use = phi->fast_out(k); | |
293
c3e045194476
6731641: assert(m->adr_type() == mach->adr_type(),"matcher should not change adr type")
kvn
parents:
253
diff
changeset
|
517 if( use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP() ) |
0 | 518 return NULL; |
519 } | |
520 } | |
521 if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo | |
35
e2ae28d2ce91
6667588: Don't generate duplicated CMP for float/double values
kvn
parents:
0
diff
changeset
|
522 Node* bol = iff->in(1); |
e2ae28d2ce91
6667588: Don't generate duplicated CMP for float/double values
kvn
parents:
0
diff
changeset
|
523 assert( bol->Opcode() == Op_Bool, "" ); |
e2ae28d2ce91
6667588: Don't generate duplicated CMP for float/double values
kvn
parents:
0
diff
changeset
|
524 int cmp_op = bol->in(1)->Opcode(); |
e2ae28d2ce91
6667588: Don't generate duplicated CMP for float/double values
kvn
parents:
0
diff
changeset
|
525 // It is expensive to generate flags from a float compare. |
e2ae28d2ce91
6667588: Don't generate duplicated CMP for float/double values
kvn
parents:
0
diff
changeset
|
526 // Avoid duplicated float compare. |
e2ae28d2ce91
6667588: Don't generate duplicated CMP for float/double values
kvn
parents:
0
diff
changeset
|
527 if( phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL; |
0 | 528 |
529 // -------------- | |
530 // Now replace all Phis with CMOV's | |
531 Node *cmov_ctrl = iff->in(0); | |
532 uint flip = (lp->Opcode() == Op_IfTrue); | |
533 while( 1 ) { | |
534 PhiNode* phi = NULL; | |
535 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { | |
536 Node *out = region->fast_out(i); | |
537 if (out->is_Phi()) { | |
538 phi = out->as_Phi(); | |
539 break; | |
540 } | |
541 } | |
542 if (phi == NULL) break; | |
543 #ifndef PRODUCT | |
544 if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV"); | |
545 #endif | |
546 // Move speculative ops | |
547 for( uint j = 1; j < region->req(); j++ ) { | |
548 Node *proj = region->in(j); | |
549 Node *inp = phi->in(j); | |
550 if (get_ctrl(inp) == proj) { // Found local op | |
551 #ifndef PRODUCT | |
552 if( PrintOpto && VerifyLoopOptimizations ) { | |
553 tty->print(" speculate: "); | |
554 inp->dump(); | |
555 } | |
556 #endif | |
557 set_ctrl(inp, cmov_ctrl); | |
558 } | |
559 } | |
560 Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) ); | |
561 register_new_node( cmov, cmov_ctrl ); | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
562 _igvn.replace_node( phi, cmov ); |
0 | 563 #ifndef PRODUCT |
564 if( VerifyLoopOptimizations ) verify(); | |
565 #endif | |
566 } | |
567 | |
568 // The useless CFG diamond will fold up later; see the optimization in | |
569 // RegionNode::Ideal. | |
570 _igvn._worklist.push(region); | |
571 | |
572 return iff->in(1); | |
573 } | |
574 | |
575 //------------------------------split_if_with_blocks_pre----------------------- | |
576 // Do the real work in a non-recursive function. Data nodes want to be | |
577 // cloned in the pre-order so they can feed each other nicely. | |
578 Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) { | |
579 // Cloning these guys is unlikely to win | |
580 int n_op = n->Opcode(); | |
581 if( n_op == Op_MergeMem ) return n; | |
582 if( n->is_Proj() ) return n; | |
583 // Do not clone-up CmpFXXX variations, as these are always | |
584 // followed by a CmpI | |
585 if( n->is_Cmp() ) return n; | |
586 // Attempt to use a conditional move instead of a phi/branch | |
587 if( ConditionalMoveLimit > 0 && n_op == Op_Region ) { | |
588 Node *cmov = conditional_move( n ); | |
589 if( cmov ) return cmov; | |
590 } | |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
251
diff
changeset
|
591 if( n->is_CFG() || n->is_LoadStore() ) |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
251
diff
changeset
|
592 return n; |
0 | 593 if( n_op == Op_Opaque1 || // Opaque nodes cannot be mod'd |
594 n_op == Op_Opaque2 ) { | |
595 if( !C->major_progress() ) // If chance of no more loop opts... | |
596 _igvn._worklist.push(n); // maybe we'll remove them | |
597 return n; | |
598 } | |
599 | |
600 if( n->is_Con() ) return n; // No cloning for Con nodes | |
601 | |
602 Node *n_ctrl = get_ctrl(n); | |
603 if( !n_ctrl ) return n; // Dead node | |
604 | |
605 // Attempt to remix address expressions for loop invariants | |
606 Node *m = remix_address_expressions( n ); | |
607 if( m ) return m; | |
608 | |
609 // Determine if the Node has inputs from some local Phi. | |
610 // Returns the block to clone thru. | |
611 Node *n_blk = has_local_phi_input( n ); | |
612 if( !n_blk ) return n; | |
613 // Do not clone the trip counter through on a CountedLoop | |
614 // (messes up the canonical shape). | |
615 if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n; | |
616 | |
617 // Check for having no control input; not pinned. Allow | |
618 // dominating control. | |
619 if( n->in(0) ) { | |
620 Node *dom = idom(n_blk); | |
621 if( dom_lca( n->in(0), dom ) != n->in(0) ) | |
622 return n; | |
623 } | |
624 // Policy: when is it profitable. You must get more wins than | |
625 // policy before it is considered profitable. Policy is usually 0, | |
626 // so 1 win is considered profitable. Big merges will require big | |
627 // cloning, so get a larger policy. | |
628 int policy = n_blk->req() >> 2; | |
629 | |
630 // If the loop is a candidate for range check elimination, | |
631 // delay splitting through it's phi until a later loop optimization | |
632 if (n_blk->is_CountedLoop()) { | |
633 IdealLoopTree *lp = get_loop(n_blk); | |
634 if (lp && lp->_rce_candidate) { | |
635 return n; | |
636 } | |
637 } | |
638 | |
639 // Use same limit as split_if_with_blocks_post | |
640 if( C->unique() > 35000 ) return n; // Method too big | |
641 | |
642 // Split 'n' through the merge point if it is profitable | |
643 Node *phi = split_thru_phi( n, n_blk, policy ); | |
644 if( !phi ) return n; | |
645 | |
646 // Found a Phi to split thru! | |
647 // Replace 'n' with the new phi | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
648 _igvn.replace_node( n, phi ); |
0 | 649 // Moved a load around the loop, 'en-registering' something. |
650 if( n_blk->Opcode() == Op_Loop && n->is_Load() && | |
651 !phi->in(LoopNode::LoopBackControl)->is_Load() ) | |
652 C->set_major_progress(); | |
653 | |
654 return phi; | |
655 } | |
656 | |
657 static bool merge_point_too_heavy(Compile* C, Node* region) { | |
658 // Bail out if the region and its phis have too many users. | |
659 int weight = 0; | |
660 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { | |
661 weight += region->fast_out(i)->outcnt(); | |
662 } | |
663 int nodes_left = MaxNodeLimit - C->unique(); | |
664 if (weight * 8 > nodes_left) { | |
665 #ifndef PRODUCT | |
666 if (PrintOpto) | |
667 tty->print_cr("*** Split-if bails out: %d nodes, region weight %d", C->unique(), weight); | |
668 #endif | |
669 return true; | |
670 } else { | |
671 return false; | |
672 } | |
673 } | |
674 | |
675 static bool merge_point_safe(Node* region) { | |
676 // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode | |
677 // having a PhiNode input. This sidesteps the dangerous case where the split | |
678 // ConvI2LNode may become TOP if the input Value() does not | |
679 // overlap the ConvI2L range, leaving a node which may not dominate its | |
680 // uses. | |
681 // A better fix for this problem can be found in the BugTraq entry, but | |
682 // expediency for Mantis demands this hack. | |
834
0f2d888530e7
6855164: SIGSEGV during compilation of method involving loop over CharSequence.
cfang
parents:
605
diff
changeset
|
683 // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop |
0f2d888530e7
6855164: SIGSEGV during compilation of method involving loop over CharSequence.
cfang
parents:
605
diff
changeset
|
684 // split_if_with_blocks from splitting a block because we could not move around |
0f2d888530e7
6855164: SIGSEGV during compilation of method involving loop over CharSequence.
cfang
parents:
605
diff
changeset
|
685 // the FastLockNode. |
0 | 686 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { |
687 Node* n = region->fast_out(i); | |
688 if (n->is_Phi()) { | |
689 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
690 Node* m = n->fast_out(j); | |
834
0f2d888530e7
6855164: SIGSEGV during compilation of method involving loop over CharSequence.
cfang
parents:
605
diff
changeset
|
691 if (m->is_FastLock()) |
0 | 692 return false; |
834
0f2d888530e7
6855164: SIGSEGV during compilation of method involving loop over CharSequence.
cfang
parents:
605
diff
changeset
|
693 #ifdef _LP64 |
0f2d888530e7
6855164: SIGSEGV during compilation of method involving loop over CharSequence.
cfang
parents:
605
diff
changeset
|
694 if (m->Opcode() == Op_ConvI2L) |
0f2d888530e7
6855164: SIGSEGV during compilation of method involving loop over CharSequence.
cfang
parents:
605
diff
changeset
|
695 return false; |
0f2d888530e7
6855164: SIGSEGV during compilation of method involving loop over CharSequence.
cfang
parents:
605
diff
changeset
|
696 #endif |
0 | 697 } |
698 } | |
699 } | |
700 return true; | |
701 } | |
702 | |
703 | |
704 //------------------------------place_near_use--------------------------------- | |
705 // Place some computation next to use but not inside inner loops. | |
706 // For inner loop uses move it to the preheader area. | |
707 Node *PhaseIdealLoop::place_near_use( Node *useblock ) const { | |
708 IdealLoopTree *u_loop = get_loop( useblock ); | |
709 return (u_loop->_irreducible || u_loop->_child) | |
710 ? useblock | |
711 : u_loop->_head->in(LoopNode::EntryControl); | |
712 } | |
713 | |
714 | |
715 //------------------------------split_if_with_blocks_post---------------------- | |
716 // Do the real work in a non-recursive function. CFG hackery wants to be | |
717 // in the post-order, so it can dirty the I-DOM info and not use the dirtied | |
718 // info. | |
719 void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) { | |
720 | |
721 // Cloning Cmp through Phi's involves the split-if transform. | |
722 // FastLock is not used by an If | |
723 if( n->is_Cmp() && !n->is_FastLock() ) { | |
724 if( C->unique() > 35000 ) return; // Method too big | |
725 | |
726 // Do not do 'split-if' if irreducible loops are present. | |
727 if( _has_irreducible_loops ) | |
728 return; | |
729 | |
730 Node *n_ctrl = get_ctrl(n); | |
731 // Determine if the Node has inputs from some local Phi. | |
732 // Returns the block to clone thru. | |
733 Node *n_blk = has_local_phi_input( n ); | |
734 if( n_blk != n_ctrl ) return; | |
735 | |
736 if( merge_point_too_heavy(C, n_ctrl) ) | |
737 return; | |
738 | |
739 if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare? | |
740 Node *bol = n->unique_out(); | |
741 assert( bol->is_Bool(), "expect a bool here" ); | |
742 if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare? | |
743 Node *iff = bol->unique_out(); | |
744 | |
745 // Check some safety conditions | |
746 if( iff->is_If() ) { // Classic split-if? | |
747 if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if | |
748 } else if (iff->is_CMove()) { // Trying to split-up a CMOVE | |
749 if( get_ctrl(iff->in(2)) == n_ctrl || | |
750 get_ctrl(iff->in(3)) == n_ctrl ) | |
751 return; // Inputs not yet split-up | |
752 if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) { | |
753 return; // Loop-invar test gates loop-varying CMOVE | |
754 } | |
755 } else { | |
756 return; // some other kind of node, such as an Allocate | |
757 } | |
758 | |
759 // Do not do 'split-if' if some paths are dead. First do dead code | |
760 // elimination and then see if its still profitable. | |
761 for( uint i = 1; i < n_ctrl->req(); i++ ) | |
762 if( n_ctrl->in(i) == C->top() ) | |
763 return; | |
764 | |
765 // When is split-if profitable? Every 'win' on means some control flow | |
766 // goes dead, so it's almost always a win. | |
767 int policy = 0; | |
768 // If trying to do a 'Split-If' at the loop head, it is only | |
769 // profitable if the cmp folds up on BOTH paths. Otherwise we | |
770 // risk peeling a loop forever. | |
771 | |
772 // CNC - Disabled for now. Requires careful handling of loop | |
773 // body selection for the cloned code. Also, make sure we check | |
774 // for any input path not being in the same loop as n_ctrl. For | |
775 // irreducible loops we cannot check for 'n_ctrl->is_Loop()' | |
776 // because the alternative loop entry points won't be converted | |
777 // into LoopNodes. | |
778 IdealLoopTree *n_loop = get_loop(n_ctrl); | |
779 for( uint j = 1; j < n_ctrl->req(); j++ ) | |
780 if( get_loop(n_ctrl->in(j)) != n_loop ) | |
781 return; | |
782 | |
783 // Check for safety of the merge point. | |
784 if( !merge_point_safe(n_ctrl) ) { | |
785 return; | |
786 } | |
787 | |
788 // Split compare 'n' through the merge point if it is profitable | |
789 Node *phi = split_thru_phi( n, n_ctrl, policy ); | |
790 if( !phi ) return; | |
791 | |
792 // Found a Phi to split thru! | |
793 // Replace 'n' with the new phi | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
794 _igvn.replace_node( n, phi ); |
0 | 795 |
796 // Now split the bool up thru the phi | |
797 Node *bolphi = split_thru_phi( bol, n_ctrl, -1 ); | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
798 _igvn.replace_node( bol, bolphi ); |
0 | 799 assert( iff->in(1) == bolphi, "" ); |
800 if( bolphi->Value(&_igvn)->singleton() ) | |
801 return; | |
802 | |
803 // Conditional-move? Must split up now | |
804 if( !iff->is_If() ) { | |
805 Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 ); | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
806 _igvn.replace_node( iff, cmovphi ); |
0 | 807 return; |
808 } | |
809 | |
810 // Now split the IF | |
811 do_split_if( iff ); | |
812 return; | |
813 } | |
814 | |
815 // Check for an IF ready to split; one that has its | |
816 // condition codes input coming from a Phi at the block start. | |
817 int n_op = n->Opcode(); | |
818 | |
819 // Check for an IF being dominated by another IF same test | |
820 if( n_op == Op_If ) { | |
821 Node *bol = n->in(1); | |
822 uint max = bol->outcnt(); | |
823 // Check for same test used more than once? | |
824 if( n_op == Op_If && max > 1 && bol->is_Bool() ) { | |
825 // Search up IDOMs to see if this IF is dominated. | |
826 Node *cutoff = get_ctrl(bol); | |
827 | |
828 // Now search up IDOMs till cutoff, looking for a dominating test | |
829 Node *prevdom = n; | |
830 Node *dom = idom(prevdom); | |
831 while( dom != cutoff ) { | |
832 if( dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom ) { | |
833 // Replace the dominated test with an obvious true or false. | |
834 // Place it on the IGVN worklist for later cleanup. | |
835 C->set_major_progress(); | |
836 dominated_by( prevdom, n ); | |
837 #ifndef PRODUCT | |
838 if( VerifyLoopOptimizations ) verify(); | |
839 #endif | |
840 return; | |
841 } | |
842 prevdom = dom; | |
843 dom = idom(prevdom); | |
844 } | |
845 } | |
846 } | |
847 | |
848 // See if a shared loop-varying computation has no loop-varying uses. | |
849 // Happens if something is only used for JVM state in uncommon trap exits, | |
850 // like various versions of induction variable+offset. Clone the | |
851 // computation per usage to allow it to sink out of the loop. | |
852 if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about) | |
853 Node *n_ctrl = get_ctrl(n); | |
854 IdealLoopTree *n_loop = get_loop(n_ctrl); | |
855 if( n_loop != _ltree_root ) { | |
856 DUIterator_Fast imax, i = n->fast_outs(imax); | |
857 for (; i < imax; i++) { | |
858 Node* u = n->fast_out(i); | |
859 if( !has_ctrl(u) ) break; // Found control user | |
860 IdealLoopTree *u_loop = get_loop(get_ctrl(u)); | |
861 if( u_loop == n_loop ) break; // Found loop-varying use | |
862 if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop | |
863 if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003 | |
864 } | |
865 bool did_break = (i < imax); // Did we break out of the previous loop? | |
866 if (!did_break && n->outcnt() > 1) { // All uses in outer loops! | |
867 Node *late_load_ctrl; | |
868 if (n->is_Load()) { | |
869 // If n is a load, get and save the result from get_late_ctrl(), | |
870 // to be later used in calculating the control for n's clones. | |
871 clear_dom_lca_tags(); | |
872 late_load_ctrl = get_late_ctrl(n, n_ctrl); | |
873 } | |
874 // If n is a load, and the late control is the same as the current | |
875 // control, then the cloning of n is a pointless exercise, because | |
876 // GVN will ensure that we end up where we started. | |
877 if (!n->is_Load() || late_load_ctrl != n_ctrl) { | |
878 for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) { | |
879 Node *u = n->last_out(j); // Clone private computation per use | |
880 _igvn.hash_delete(u); | |
881 _igvn._worklist.push(u); | |
882 Node *x = n->clone(); // Clone computation | |
883 Node *x_ctrl = NULL; | |
884 if( u->is_Phi() ) { | |
885 // Replace all uses of normal nodes. Replace Phi uses | |
605 | 886 // individually, so the separate Nodes can sink down |
0 | 887 // different paths. |
888 uint k = 1; | |
889 while( u->in(k) != n ) k++; | |
890 u->set_req( k, x ); | |
891 // x goes next to Phi input path | |
892 x_ctrl = u->in(0)->in(k); | |
893 --j; | |
894 } else { // Normal use | |
895 // Replace all uses | |
896 for( uint k = 0; k < u->req(); k++ ) { | |
897 if( u->in(k) == n ) { | |
898 u->set_req( k, x ); | |
899 --j; | |
900 } | |
901 } | |
902 x_ctrl = get_ctrl(u); | |
903 } | |
904 | |
905 // Find control for 'x' next to use but not inside inner loops. | |
906 // For inner loop uses get the preheader area. | |
907 x_ctrl = place_near_use(x_ctrl); | |
908 | |
909 if (n->is_Load()) { | |
910 // For loads, add a control edge to a CFG node outside of the loop | |
911 // to force them to not combine and return back inside the loop | |
912 // during GVN optimization (4641526). | |
913 // | |
914 // Because we are setting the actual control input, factor in | |
915 // the result from get_late_ctrl() so we respect any | |
916 // anti-dependences. (6233005). | |
917 x_ctrl = dom_lca(late_load_ctrl, x_ctrl); | |
918 | |
919 // Don't allow the control input to be a CFG splitting node. | |
920 // Such nodes should only have ProjNodes as outs, e.g. IfNode | |
921 // should only have IfTrueNode and IfFalseNode (4985384). | |
922 x_ctrl = find_non_split_ctrl(x_ctrl); | |
923 assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone"); | |
924 | |
925 x->set_req(0, x_ctrl); | |
926 } | |
927 register_new_node(x, x_ctrl); | |
928 | |
929 // Some institutional knowledge is needed here: 'x' is | |
930 // yanked because if the optimizer runs GVN on it all the | |
931 // cloned x's will common up and undo this optimization and | |
932 // be forced back in the loop. This is annoying because it | |
933 // makes +VerifyOpto report false-positives on progress. I | |
934 // tried setting control edges on the x's to force them to | |
935 // not combine, but the matching gets worried when it tries | |
936 // to fold a StoreP and an AddP together (as part of an | |
937 // address expression) and the AddP and StoreP have | |
938 // different controls. | |
318
60bc5071073f
6738933: assert with base pointers must match with compressed oops enabled
never
parents:
293
diff
changeset
|
939 if( !x->is_Load() && !x->is_DecodeN() ) _igvn._worklist.yank(x); |
0 | 940 } |
941 _igvn.remove_dead_node(n); | |
942 } | |
943 } | |
944 } | |
945 } | |
946 | |
947 // Check for Opaque2's who's loop has disappeared - who's input is in the | |
948 // same loop nest as their output. Remove 'em, they are no longer useful. | |
949 if( n_op == Op_Opaque2 && | |
950 n->in(1) != NULL && | |
951 get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) { | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
952 _igvn.replace_node( n, n->in(1) ); |
0 | 953 } |
954 } | |
955 | |
956 //------------------------------split_if_with_blocks--------------------------- | |
957 // Check for aggressive application of 'split-if' optimization, | |
958 // using basic block level info. | |
959 void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) { | |
960 Node *n = C->root(); | |
961 visited.set(n->_idx); // first, mark node as visited | |
962 // Do pre-visit work for root | |
963 n = split_if_with_blocks_pre( n ); | |
964 uint cnt = n->outcnt(); | |
965 uint i = 0; | |
966 while (true) { | |
967 // Visit all children | |
968 if (i < cnt) { | |
969 Node* use = n->raw_out(i); | |
970 ++i; | |
971 if (use->outcnt() != 0 && !visited.test_set(use->_idx)) { | |
972 // Now do pre-visit work for this use | |
973 use = split_if_with_blocks_pre( use ); | |
974 nstack.push(n, i); // Save parent and next use's index. | |
975 n = use; // Process all children of current use. | |
976 cnt = use->outcnt(); | |
977 i = 0; | |
978 } | |
979 } | |
980 else { | |
981 // All of n's children have been processed, complete post-processing. | |
982 if (cnt != 0 && !n->is_Con()) { | |
983 assert(has_node(n), "no dead nodes"); | |
984 split_if_with_blocks_post( n ); | |
985 } | |
986 if (nstack.is_empty()) { | |
987 // Finished all nodes on stack. | |
988 break; | |
989 } | |
990 // Get saved parent node and next use's index. Visit the rest of uses. | |
991 n = nstack.node(); | |
992 cnt = n->outcnt(); | |
993 i = nstack.index(); | |
994 nstack.pop(); | |
995 } | |
996 } | |
997 } | |
998 | |
999 | |
1000 //============================================================================= | |
1001 // | |
1002 // C L O N E A L O O P B O D Y | |
1003 // | |
1004 | |
1005 //------------------------------clone_iff-------------------------------------- | |
1006 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps. | |
1007 // "Nearly" because all Nodes have been cloned from the original in the loop, | |
1008 // but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs | |
1009 // through the Phi recursively, and return a Bool. | |
1010 BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) { | |
1011 | |
1012 // Convert this Phi into a Phi merging Bools | |
1013 uint i; | |
1014 for( i = 1; i < phi->req(); i++ ) { | |
1015 Node *b = phi->in(i); | |
1016 if( b->is_Phi() ) { | |
1017 _igvn.hash_delete(phi); | |
1018 _igvn._worklist.push(phi); | |
1019 phi->set_req(i, clone_iff( b->as_Phi(), loop )); | |
1020 } else { | |
1021 assert( b->is_Bool(), "" ); | |
1022 } | |
1023 } | |
1024 | |
1025 Node *sample_bool = phi->in(1); | |
1026 Node *sample_cmp = sample_bool->in(1); | |
1027 | |
1028 // Make Phis to merge the Cmp's inputs. | |
1029 int size = phi->in(0)->req(); | |
1030 PhiNode *phi1 = new (C, size) PhiNode( phi->in(0), Type::TOP ); | |
1031 PhiNode *phi2 = new (C, size) PhiNode( phi->in(0), Type::TOP ); | |
1032 for( i = 1; i < phi->req(); i++ ) { | |
1033 Node *n1 = phi->in(i)->in(1)->in(1); | |
1034 Node *n2 = phi->in(i)->in(1)->in(2); | |
1035 phi1->set_req( i, n1 ); | |
1036 phi2->set_req( i, n2 ); | |
1037 phi1->set_type( phi1->type()->meet(n1->bottom_type()) ); | |
1038 phi2->set_type( phi2->type()->meet(n2->bottom_type()) ); | |
1039 } | |
1040 // See if these Phis have been made before. | |
1041 // Register with optimizer | |
1042 Node *hit1 = _igvn.hash_find_insert(phi1); | |
1043 if( hit1 ) { // Hit, toss just made Phi | |
1044 _igvn.remove_dead_node(phi1); // Remove new phi | |
1045 assert( hit1->is_Phi(), "" ); | |
1046 phi1 = (PhiNode*)hit1; // Use existing phi | |
1047 } else { // Miss | |
1048 _igvn.register_new_node_with_optimizer(phi1); | |
1049 } | |
1050 Node *hit2 = _igvn.hash_find_insert(phi2); | |
1051 if( hit2 ) { // Hit, toss just made Phi | |
1052 _igvn.remove_dead_node(phi2); // Remove new phi | |
1053 assert( hit2->is_Phi(), "" ); | |
1054 phi2 = (PhiNode*)hit2; // Use existing phi | |
1055 } else { // Miss | |
1056 _igvn.register_new_node_with_optimizer(phi2); | |
1057 } | |
1058 // Register Phis with loop/block info | |
1059 set_ctrl(phi1, phi->in(0)); | |
1060 set_ctrl(phi2, phi->in(0)); | |
1061 // Make a new Cmp | |
1062 Node *cmp = sample_cmp->clone(); | |
1063 cmp->set_req( 1, phi1 ); | |
1064 cmp->set_req( 2, phi2 ); | |
1065 _igvn.register_new_node_with_optimizer(cmp); | |
1066 set_ctrl(cmp, phi->in(0)); | |
1067 | |
1068 // Make a new Bool | |
1069 Node *b = sample_bool->clone(); | |
1070 b->set_req(1,cmp); | |
1071 _igvn.register_new_node_with_optimizer(b); | |
1072 set_ctrl(b, phi->in(0)); | |
1073 | |
1074 assert( b->is_Bool(), "" ); | |
1075 return (BoolNode*)b; | |
1076 } | |
1077 | |
1078 //------------------------------clone_bool------------------------------------- | |
1079 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps. | |
1080 // "Nearly" because all Nodes have been cloned from the original in the loop, | |
1081 // but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs | |
1082 // through the Phi recursively, and return a Bool. | |
1083 CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) { | |
1084 uint i; | |
1085 // Convert this Phi into a Phi merging Bools | |
1086 for( i = 1; i < phi->req(); i++ ) { | |
1087 Node *b = phi->in(i); | |
1088 if( b->is_Phi() ) { | |
1089 _igvn.hash_delete(phi); | |
1090 _igvn._worklist.push(phi); | |
1091 phi->set_req(i, clone_bool( b->as_Phi(), loop )); | |
1092 } else { | |
1093 assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" ); | |
1094 } | |
1095 } | |
1096 | |
1097 Node *sample_cmp = phi->in(1); | |
1098 | |
1099 // Make Phis to merge the Cmp's inputs. | |
1100 int size = phi->in(0)->req(); | |
1101 PhiNode *phi1 = new (C, size) PhiNode( phi->in(0), Type::TOP ); | |
1102 PhiNode *phi2 = new (C, size) PhiNode( phi->in(0), Type::TOP ); | |
1103 for( uint j = 1; j < phi->req(); j++ ) { | |
1104 Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP | |
1105 Node *n1, *n2; | |
1106 if( cmp_top->is_Cmp() ) { | |
1107 n1 = cmp_top->in(1); | |
1108 n2 = cmp_top->in(2); | |
1109 } else { | |
1110 n1 = n2 = cmp_top; | |
1111 } | |
1112 phi1->set_req( j, n1 ); | |
1113 phi2->set_req( j, n2 ); | |
1114 phi1->set_type( phi1->type()->meet(n1->bottom_type()) ); | |
1115 phi2->set_type( phi2->type()->meet(n2->bottom_type()) ); | |
1116 } | |
1117 | |
1118 // See if these Phis have been made before. | |
1119 // Register with optimizer | |
1120 Node *hit1 = _igvn.hash_find_insert(phi1); | |
1121 if( hit1 ) { // Hit, toss just made Phi | |
1122 _igvn.remove_dead_node(phi1); // Remove new phi | |
1123 assert( hit1->is_Phi(), "" ); | |
1124 phi1 = (PhiNode*)hit1; // Use existing phi | |
1125 } else { // Miss | |
1126 _igvn.register_new_node_with_optimizer(phi1); | |
1127 } | |
1128 Node *hit2 = _igvn.hash_find_insert(phi2); | |
1129 if( hit2 ) { // Hit, toss just made Phi | |
1130 _igvn.remove_dead_node(phi2); // Remove new phi | |
1131 assert( hit2->is_Phi(), "" ); | |
1132 phi2 = (PhiNode*)hit2; // Use existing phi | |
1133 } else { // Miss | |
1134 _igvn.register_new_node_with_optimizer(phi2); | |
1135 } | |
1136 // Register Phis with loop/block info | |
1137 set_ctrl(phi1, phi->in(0)); | |
1138 set_ctrl(phi2, phi->in(0)); | |
1139 // Make a new Cmp | |
1140 Node *cmp = sample_cmp->clone(); | |
1141 cmp->set_req( 1, phi1 ); | |
1142 cmp->set_req( 2, phi2 ); | |
1143 _igvn.register_new_node_with_optimizer(cmp); | |
1144 set_ctrl(cmp, phi->in(0)); | |
1145 | |
1146 assert( cmp->is_Cmp(), "" ); | |
1147 return (CmpNode*)cmp; | |
1148 } | |
1149 | |
1150 //------------------------------sink_use--------------------------------------- | |
1151 // If 'use' was in the loop-exit block, it now needs to be sunk | |
1152 // below the post-loop merge point. | |
1153 void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) { | |
1154 if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) { | |
1155 set_ctrl(use, post_loop); | |
1156 for (DUIterator j = use->outs(); use->has_out(j); j++) | |
1157 sink_use(use->out(j), post_loop); | |
1158 } | |
1159 } | |
1160 | |
1161 //------------------------------clone_loop------------------------------------- | |
1162 // | |
1163 // C L O N E A L O O P B O D Y | |
1164 // | |
1165 // This is the basic building block of the loop optimizations. It clones an | |
1166 // entire loop body. It makes an old_new loop body mapping; with this mapping | |
1167 // you can find the new-loop equivalent to an old-loop node. All new-loop | |
1168 // nodes are exactly equal to their old-loop counterparts, all edges are the | |
1169 // same. All exits from the old-loop now have a RegionNode that merges the | |
1170 // equivalent new-loop path. This is true even for the normal "loop-exit" | |
1171 // condition. All uses of loop-invariant old-loop values now come from (one | |
1172 // or more) Phis that merge their new-loop equivalents. | |
1173 // | |
1174 // This operation leaves the graph in an illegal state: there are two valid | |
1175 // control edges coming from the loop pre-header to both loop bodies. I'll | |
1176 // definitely have to hack the graph after running this transform. | |
1177 // | |
1178 // From this building block I will further edit edges to perform loop peeling | |
1179 // or loop unrolling or iteration splitting (Range-Check-Elimination), etc. | |
1180 // | |
1181 // Parameter side_by_size_idom: | |
1182 // When side_by_size_idom is NULL, the dominator tree is constructed for | |
1183 // the clone loop to dominate the original. Used in construction of | |
1184 // pre-main-post loop sequence. | |
1185 // When nonnull, the clone and original are side-by-side, both are | |
1186 // dominated by the side_by_side_idom node. Used in construction of | |
1187 // unswitched loops. | |
1188 void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd, | |
1189 Node* side_by_side_idom) { | |
1190 | |
1191 // Step 1: Clone the loop body. Make the old->new mapping. | |
1192 uint i; | |
1193 for( i = 0; i < loop->_body.size(); i++ ) { | |
1194 Node *old = loop->_body.at(i); | |
1195 Node *nnn = old->clone(); | |
1196 old_new.map( old->_idx, nnn ); | |
1197 _igvn.register_new_node_with_optimizer(nnn); | |
1198 } | |
1199 | |
1200 | |
1201 // Step 2: Fix the edges in the new body. If the old input is outside the | |
1202 // loop use it. If the old input is INside the loop, use the corresponding | |
1203 // new node instead. | |
1204 for( i = 0; i < loop->_body.size(); i++ ) { | |
1205 Node *old = loop->_body.at(i); | |
1206 Node *nnn = old_new[old->_idx]; | |
1207 // Fix CFG/Loop controlling the new node | |
1208 if (has_ctrl(old)) { | |
1209 set_ctrl(nnn, old_new[get_ctrl(old)->_idx]); | |
1210 } else { | |
1211 set_loop(nnn, loop->_parent); | |
1212 if (old->outcnt() > 0) { | |
1213 set_idom( nnn, old_new[idom(old)->_idx], dd ); | |
1214 } | |
1215 } | |
1216 // Correct edges to the new node | |
1217 for( uint j = 0; j < nnn->req(); j++ ) { | |
1218 Node *n = nnn->in(j); | |
1219 if( n ) { | |
1220 IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n ); | |
1221 if( loop->is_member( old_in_loop ) ) | |
1222 nnn->set_req(j, old_new[n->_idx]); | |
1223 } | |
1224 } | |
1225 _igvn.hash_find_insert(nnn); | |
1226 } | |
1227 Node *newhead = old_new[loop->_head->_idx]; | |
1228 set_idom(newhead, newhead->in(LoopNode::EntryControl), dd); | |
1229 | |
1230 | |
1231 // Step 3: Now fix control uses. Loop varying control uses have already | |
1232 // been fixed up (as part of all input edges in Step 2). Loop invariant | |
1233 // control uses must be either an IfFalse or an IfTrue. Make a merge | |
1234 // point to merge the old and new IfFalse/IfTrue nodes; make the use | |
1235 // refer to this. | |
1236 ResourceArea *area = Thread::current()->resource_area(); | |
1237 Node_List worklist(area); | |
1238 uint new_counter = C->unique(); | |
1239 for( i = 0; i < loop->_body.size(); i++ ) { | |
1240 Node* old = loop->_body.at(i); | |
1241 if( !old->is_CFG() ) continue; | |
1242 Node* nnn = old_new[old->_idx]; | |
1243 | |
1244 // Copy uses to a worklist, so I can munge the def-use info | |
1245 // with impunity. | |
1246 for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++) | |
1247 worklist.push(old->fast_out(j)); | |
1248 | |
1249 while( worklist.size() ) { // Visit all uses | |
1250 Node *use = worklist.pop(); | |
1251 if (!has_node(use)) continue; // Ignore dead nodes | |
1252 IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use ); | |
1253 if( !loop->is_member( use_loop ) && use->is_CFG() ) { | |
1254 // Both OLD and USE are CFG nodes here. | |
1255 assert( use->is_Proj(), "" ); | |
1256 | |
1257 // Clone the loop exit control projection | |
1258 Node *newuse = use->clone(); | |
1259 newuse->set_req(0,nnn); | |
1260 _igvn.register_new_node_with_optimizer(newuse); | |
1261 set_loop(newuse, use_loop); | |
1262 set_idom(newuse, nnn, dom_depth(nnn) + 1 ); | |
1263 | |
1264 // We need a Region to merge the exit from the peeled body and the | |
1265 // exit from the old loop body. | |
1266 RegionNode *r = new (C, 3) RegionNode(3); | |
1267 // Map the old use to the new merge point | |
1268 old_new.map( use->_idx, r ); | |
1269 uint dd_r = MIN2(dom_depth(newuse),dom_depth(use)); | |
1270 assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" ); | |
1271 | |
1272 // The original user of 'use' uses 'r' instead. | |
1273 for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) { | |
1274 Node* useuse = use->last_out(l); | |
1275 _igvn.hash_delete(useuse); | |
1276 _igvn._worklist.push(useuse); | |
1277 uint uses_found = 0; | |
1278 if( useuse->in(0) == use ) { | |
1279 useuse->set_req(0, r); | |
1280 uses_found++; | |
1281 if( useuse->is_CFG() ) { | |
1282 assert( dom_depth(useuse) > dd_r, "" ); | |
1283 set_idom(useuse, r, dom_depth(useuse)); | |
1284 } | |
1285 } | |
1286 for( uint k = 1; k < useuse->req(); k++ ) { | |
1287 if( useuse->in(k) == use ) { | |
1288 useuse->set_req(k, r); | |
1289 uses_found++; | |
1290 } | |
1291 } | |
1292 l -= uses_found; // we deleted 1 or more copies of this edge | |
1293 } | |
1294 | |
1295 // Now finish up 'r' | |
1296 r->set_req( 1, newuse ); | |
1297 r->set_req( 2, use ); | |
1298 _igvn.register_new_node_with_optimizer(r); | |
1299 set_loop(r, use_loop); | |
1300 set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r); | |
1301 } // End of if a loop-exit test | |
1302 } | |
1303 } | |
1304 | |
1305 // Step 4: If loop-invariant use is not control, it must be dominated by a | |
1306 // loop exit IfFalse/IfTrue. Find "proper" loop exit. Make a Region | |
1307 // there if needed. Make a Phi there merging old and new used values. | |
1308 Node_List *split_if_set = NULL; | |
1309 Node_List *split_bool_set = NULL; | |
1310 Node_List *split_cex_set = NULL; | |
1311 for( i = 0; i < loop->_body.size(); i++ ) { | |
1312 Node* old = loop->_body.at(i); | |
1313 Node* nnn = old_new[old->_idx]; | |
1314 // Copy uses to a worklist, so I can munge the def-use info | |
1315 // with impunity. | |
1316 for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++) | |
1317 worklist.push(old->fast_out(j)); | |
1318 | |
1319 while( worklist.size() ) { | |
1320 Node *use = worklist.pop(); | |
1321 if (!has_node(use)) continue; // Ignore dead nodes | |
1322 if (use->in(0) == C->top()) continue; | |
1323 IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use ); | |
1324 // Check for data-use outside of loop - at least one of OLD or USE | |
1325 // must not be a CFG node. | |
1326 if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) { | |
1327 | |
1328 // If the Data use is an IF, that means we have an IF outside of the | |
1329 // loop that is switching on a condition that is set inside of the | |
1330 // loop. Happens if people set a loop-exit flag; then test the flag | |
1331 // in the loop to break the loop, then test is again outside of the | |
1332 // loop to determine which way the loop exited. | |
1333 if( use->is_If() || use->is_CMove() ) { | |
1334 // Since this code is highly unlikely, we lazily build the worklist | |
1335 // of such Nodes to go split. | |
1336 if( !split_if_set ) | |
1337 split_if_set = new Node_List(area); | |
1338 split_if_set->push(use); | |
1339 } | |
1340 if( use->is_Bool() ) { | |
1341 if( !split_bool_set ) | |
1342 split_bool_set = new Node_List(area); | |
1343 split_bool_set->push(use); | |
1344 } | |
1345 if( use->Opcode() == Op_CreateEx ) { | |
1346 if( !split_cex_set ) | |
1347 split_cex_set = new Node_List(area); | |
1348 split_cex_set->push(use); | |
1349 } | |
1350 | |
1351 | |
1352 // Get "block" use is in | |
1353 uint idx = 0; | |
1354 while( use->in(idx) != old ) idx++; | |
1355 Node *prev = use->is_CFG() ? use : get_ctrl(use); | |
1356 assert( !loop->is_member( get_loop( prev ) ), "" ); | |
1357 Node *cfg = prev->_idx >= new_counter | |
1358 ? prev->in(2) | |
1359 : idom(prev); | |
1360 if( use->is_Phi() ) // Phi use is in prior block | |
1361 cfg = prev->in(idx); // NOT in block of Phi itself | |
1362 if (cfg->is_top()) { // Use is dead? | |
1363 _igvn.hash_delete(use); | |
1364 _igvn._worklist.push(use); | |
1365 use->set_req(idx, C->top()); | |
1366 continue; | |
1367 } | |
1368 | |
1369 while( !loop->is_member( get_loop( cfg ) ) ) { | |
1370 prev = cfg; | |
1371 cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg); | |
1372 } | |
1373 // If the use occurs after merging several exits from the loop, then | |
1374 // old value must have dominated all those exits. Since the same old | |
1375 // value was used on all those exits we did not need a Phi at this | |
1376 // merge point. NOW we do need a Phi here. Each loop exit value | |
1377 // is now merged with the peeled body exit; each exit gets its own | |
1378 // private Phi and those Phis need to be merged here. | |
1379 Node *phi; | |
1380 if( prev->is_Region() ) { | |
1381 if( idx == 0 ) { // Updating control edge? | |
1382 phi = prev; // Just use existing control | |
1383 } else { // Else need a new Phi | |
1384 phi = PhiNode::make( prev, old ); | |
1385 // Now recursively fix up the new uses of old! | |
1386 for( uint i = 1; i < prev->req(); i++ ) { | |
1387 worklist.push(phi); // Onto worklist once for each 'old' input | |
1388 } | |
1389 } | |
1390 } else { | |
1391 // Get new RegionNode merging old and new loop exits | |
1392 prev = old_new[prev->_idx]; | |
1393 assert( prev, "just made this in step 7" ); | |
1394 if( idx == 0 ) { // Updating control edge? | |
1395 phi = prev; // Just use existing control | |
1396 } else { // Else need a new Phi | |
1397 // Make a new Phi merging data values properly | |
1398 phi = PhiNode::make( prev, old ); | |
1399 phi->set_req( 1, nnn ); | |
1400 } | |
1401 } | |
1402 // If inserting a new Phi, check for prior hits | |
1403 if( idx != 0 ) { | |
1404 Node *hit = _igvn.hash_find_insert(phi); | |
1405 if( hit == NULL ) { | |
1406 _igvn.register_new_node_with_optimizer(phi); // Register new phi | |
1407 } else { // or | |
1408 // Remove the new phi from the graph and use the hit | |
1409 _igvn.remove_dead_node(phi); | |
1410 phi = hit; // Use existing phi | |
1411 } | |
1412 set_ctrl(phi, prev); | |
1413 } | |
1414 // Make 'use' use the Phi instead of the old loop body exit value | |
1415 _igvn.hash_delete(use); | |
1416 _igvn._worklist.push(use); | |
1417 use->set_req(idx, phi); | |
1418 if( use->_idx >= new_counter ) { // If updating new phis | |
1419 // Not needed for correctness, but prevents a weak assert | |
1420 // in AddPNode from tripping (when we end up with different | |
1421 // base & derived Phis that will become the same after | |
1422 // IGVN does CSE). | |
1423 Node *hit = _igvn.hash_find_insert(use); | |
1424 if( hit ) // Go ahead and re-hash for hits. | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
1425 _igvn.replace_node( use, hit ); |
0 | 1426 } |
1427 | |
1428 // If 'use' was in the loop-exit block, it now needs to be sunk | |
1429 // below the post-loop merge point. | |
1430 sink_use( use, prev ); | |
1431 } | |
1432 } | |
1433 } | |
1434 | |
1435 // Check for IFs that need splitting/cloning. Happens if an IF outside of | |
1436 // the loop uses a condition set in the loop. The original IF probably | |
1437 // takes control from one or more OLD Regions (which in turn get from NEW | |
1438 // Regions). In any case, there will be a set of Phis for each merge point | |
1439 // from the IF up to where the original BOOL def exists the loop. | |
1440 if( split_if_set ) { | |
1441 while( split_if_set->size() ) { | |
1442 Node *iff = split_if_set->pop(); | |
1443 if( iff->in(1)->is_Phi() ) { | |
1444 BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop ); | |
1445 _igvn.hash_delete(iff); | |
1446 _igvn._worklist.push(iff); | |
1447 iff->set_req(1, b); | |
1448 } | |
1449 } | |
1450 } | |
1451 if( split_bool_set ) { | |
1452 while( split_bool_set->size() ) { | |
1453 Node *b = split_bool_set->pop(); | |
1454 Node *phi = b->in(1); | |
1455 assert( phi->is_Phi(), "" ); | |
1456 CmpNode *cmp = clone_bool( (PhiNode*)phi, loop ); | |
1457 _igvn.hash_delete(b); | |
1458 _igvn._worklist.push(b); | |
1459 b->set_req(1, cmp); | |
1460 } | |
1461 } | |
1462 if( split_cex_set ) { | |
1463 while( split_cex_set->size() ) { | |
1464 Node *b = split_cex_set->pop(); | |
1465 assert( b->in(0)->is_Region(), "" ); | |
1466 assert( b->in(1)->is_Phi(), "" ); | |
1467 assert( b->in(0)->in(0) == b->in(1)->in(0), "" ); | |
1468 split_up( b, b->in(0), NULL ); | |
1469 } | |
1470 } | |
1471 | |
1472 } | |
1473 | |
1474 | |
1475 //---------------------- stride_of_possible_iv ------------------------------------- | |
1476 // Looks for an iff/bool/comp with one operand of the compare | |
1477 // being a cycle involving an add and a phi, | |
1478 // with an optional truncation (left-shift followed by a right-shift) | |
1479 // of the add. Returns zero if not an iv. | |
1480 int PhaseIdealLoop::stride_of_possible_iv(Node* iff) { | |
1481 Node* trunc1 = NULL; | |
1482 Node* trunc2 = NULL; | |
1483 const TypeInt* ttype = NULL; | |
1484 if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) { | |
1485 return 0; | |
1486 } | |
1487 BoolNode* bl = iff->in(1)->as_Bool(); | |
1488 Node* cmp = bl->in(1); | |
1489 if (!cmp || cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU) { | |
1490 return 0; | |
1491 } | |
1492 // Must have an invariant operand | |
1493 if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) { | |
1494 return 0; | |
1495 } | |
1496 Node* add2 = NULL; | |
1497 Node* cmp1 = cmp->in(1); | |
1498 if (cmp1->is_Phi()) { | |
1499 // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) ))) | |
1500 Node* phi = cmp1; | |
1501 for (uint i = 1; i < phi->req(); i++) { | |
1502 Node* in = phi->in(i); | |
1503 Node* add = CountedLoopNode::match_incr_with_optional_truncation(in, | |
1504 &trunc1, &trunc2, &ttype); | |
1505 if (add && add->in(1) == phi) { | |
1506 add2 = add->in(2); | |
1507 break; | |
1508 } | |
1509 } | |
1510 } else { | |
1511 // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) ))) | |
1512 Node* addtrunc = cmp1; | |
1513 Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc, | |
1514 &trunc1, &trunc2, &ttype); | |
1515 if (add && add->in(1)->is_Phi()) { | |
1516 Node* phi = add->in(1); | |
1517 for (uint i = 1; i < phi->req(); i++) { | |
1518 if (phi->in(i) == addtrunc) { | |
1519 add2 = add->in(2); | |
1520 break; | |
1521 } | |
1522 } | |
1523 } | |
1524 } | |
1525 if (add2 != NULL) { | |
1526 const TypeInt* add2t = _igvn.type(add2)->is_int(); | |
1527 if (add2t->is_con()) { | |
1528 return add2t->get_con(); | |
1529 } | |
1530 } | |
1531 return 0; | |
1532 } | |
1533 | |
1534 | |
1535 //---------------------- stay_in_loop ------------------------------------- | |
1536 // Return the (unique) control output node that's in the loop (if it exists.) | |
1537 Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) { | |
1538 Node* unique = NULL; | |
1539 if (!n) return NULL; | |
1540 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |
1541 Node* use = n->fast_out(i); | |
1542 if (!has_ctrl(use) && loop->is_member(get_loop(use))) { | |
1543 if (unique != NULL) { | |
1544 return NULL; | |
1545 } | |
1546 unique = use; | |
1547 } | |
1548 } | |
1549 return unique; | |
1550 } | |
1551 | |
1552 //------------------------------ register_node ------------------------------------- | |
1553 // Utility to register node "n" with PhaseIdealLoop | |
1554 void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) { | |
1555 _igvn.register_new_node_with_optimizer(n); | |
1556 loop->_body.push(n); | |
1557 if (n->is_CFG()) { | |
1558 set_loop(n, loop); | |
1559 set_idom(n, pred, ddepth); | |
1560 } else { | |
1561 set_ctrl(n, pred); | |
1562 } | |
1563 } | |
1564 | |
1565 //------------------------------ proj_clone ------------------------------------- | |
1566 // Utility to create an if-projection | |
1567 ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) { | |
1568 ProjNode* c = p->clone()->as_Proj(); | |
1569 c->set_req(0, iff); | |
1570 return c; | |
1571 } | |
1572 | |
1573 //------------------------------ short_circuit_if ------------------------------------- | |
1574 // Force the iff control output to be the live_proj | |
1575 Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) { | |
1576 int proj_con = live_proj->_con; | |
1577 assert(proj_con == 0 || proj_con == 1, "false or true projection"); | |
1578 Node *con = _igvn.intcon(proj_con); | |
1579 set_ctrl(con, C->root()); | |
1580 if (iff) { | |
1581 iff->set_req(1, con); | |
1582 } | |
1583 return con; | |
1584 } | |
1585 | |
1586 //------------------------------ insert_if_before_proj ------------------------------------- | |
1587 // Insert a new if before an if projection (* - new node) | |
1588 // | |
1589 // before | |
1590 // if(test) | |
1591 // / \ | |
1592 // v v | |
1593 // other-proj proj (arg) | |
1594 // | |
1595 // after | |
1596 // if(test) | |
1597 // / \ | |
1598 // / v | |
1599 // | * proj-clone | |
1600 // v | | |
1601 // other-proj v | |
1602 // * new_if(relop(cmp[IU](left,right))) | |
1603 // / \ | |
1604 // v v | |
1605 // * new-proj proj | |
1606 // (returned) | |
1607 // | |
1608 ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) { | |
1609 IfNode* iff = proj->in(0)->as_If(); | |
1610 IdealLoopTree *loop = get_loop(proj); | |
1611 ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj(); | |
1612 int ddepth = dom_depth(proj); | |
1613 | |
1614 _igvn.hash_delete(iff); | |
1615 _igvn._worklist.push(iff); | |
1616 _igvn.hash_delete(proj); | |
1617 _igvn._worklist.push(proj); | |
1618 | |
1619 proj->set_req(0, NULL); // temporary disconnect | |
1620 ProjNode* proj2 = proj_clone(proj, iff); | |
1621 register_node(proj2, loop, iff, ddepth); | |
1622 | |
1623 Node* cmp = Signed ? (Node*) new (C,3)CmpINode(left, right) : (Node*) new (C,3)CmpUNode(left, right); | |
1624 register_node(cmp, loop, proj2, ddepth); | |
1625 | |
1626 BoolNode* bol = new (C,2)BoolNode(cmp, relop); | |
1627 register_node(bol, loop, proj2, ddepth); | |
1628 | |
1629 IfNode* new_if = new (C,2)IfNode(proj2, bol, iff->_prob, iff->_fcnt); | |
1630 register_node(new_if, loop, proj2, ddepth); | |
1631 | |
1632 proj->set_req(0, new_if); // reattach | |
1633 set_idom(proj, new_if, ddepth); | |
1634 | |
1635 ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj(); | |
1636 register_node(new_exit, get_loop(other_proj), new_if, ddepth); | |
1637 | |
1638 return new_exit; | |
1639 } | |
1640 | |
1641 //------------------------------ insert_region_before_proj ------------------------------------- | |
1642 // Insert a region before an if projection (* - new node) | |
1643 // | |
1644 // before | |
1645 // if(test) | |
1646 // / | | |
1647 // v | | |
1648 // proj v | |
1649 // other-proj | |
1650 // | |
1651 // after | |
1652 // if(test) | |
1653 // / | | |
1654 // v | | |
1655 // * proj-clone v | |
1656 // | other-proj | |
1657 // v | |
1658 // * new-region | |
1659 // | | |
1660 // v | |
1661 // * dum_if | |
1662 // / \ | |
1663 // v \ | |
1664 // * dum-proj v | |
1665 // proj | |
1666 // | |
1667 RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) { | |
1668 IfNode* iff = proj->in(0)->as_If(); | |
1669 IdealLoopTree *loop = get_loop(proj); | |
1670 ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj(); | |
1671 int ddepth = dom_depth(proj); | |
1672 | |
1673 _igvn.hash_delete(iff); | |
1674 _igvn._worklist.push(iff); | |
1675 _igvn.hash_delete(proj); | |
1676 _igvn._worklist.push(proj); | |
1677 | |
1678 proj->set_req(0, NULL); // temporary disconnect | |
1679 ProjNode* proj2 = proj_clone(proj, iff); | |
1680 register_node(proj2, loop, iff, ddepth); | |
1681 | |
1682 RegionNode* reg = new (C,2)RegionNode(2); | |
1683 reg->set_req(1, proj2); | |
1684 register_node(reg, loop, iff, ddepth); | |
1685 | |
1686 IfNode* dum_if = new (C,2)IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt); | |
1687 register_node(dum_if, loop, reg, ddepth); | |
1688 | |
1689 proj->set_req(0, dum_if); // reattach | |
1690 set_idom(proj, dum_if, ddepth); | |
1691 | |
1692 ProjNode* dum_proj = proj_clone(other_proj, dum_if); | |
1693 register_node(dum_proj, loop, dum_if, ddepth); | |
1694 | |
1695 return reg; | |
1696 } | |
1697 | |
1698 //------------------------------ insert_cmpi_loop_exit ------------------------------------- | |
1699 // Clone a signed compare loop exit from an unsigned compare and | |
1700 // insert it before the unsigned cmp on the stay-in-loop path. | |
1701 // All new nodes inserted in the dominator tree between the original | |
1702 // if and it's projections. The original if test is replaced with | |
1703 // a constant to force the stay-in-loop path. | |
1704 // | |
1705 // This is done to make sure that the original if and it's projections | |
1706 // still dominate the same set of control nodes, that the ctrl() relation | |
1707 // from data nodes to them is preserved, and that their loop nesting is | |
1708 // preserved. | |
1709 // | |
1710 // before | |
1711 // if(i <u limit) unsigned compare loop exit | |
1712 // / | | |
1713 // v v | |
1714 // exit-proj stay-in-loop-proj | |
1715 // | |
1716 // after | |
1717 // if(stay-in-loop-const) original if | |
1718 // / | | |
1719 // / v | |
1720 // / if(i < limit) new signed test | |
1721 // / / | | |
1722 // / / v | |
1723 // / / if(i <u limit) new cloned unsigned test | |
1724 // / / / | | |
1725 // v v v | | |
1726 // region | | |
1727 // | | | |
1728 // dum-if | | |
1729 // / | | | |
1730 // ether | | | |
1731 // v v | |
1732 // exit-proj stay-in-loop-proj | |
1733 // | |
1734 IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) { | |
1735 const bool Signed = true; | |
1736 const bool Unsigned = false; | |
1737 | |
1738 BoolNode* bol = if_cmpu->in(1)->as_Bool(); | |
1739 if (bol->_test._test != BoolTest::lt) return NULL; | |
1740 CmpNode* cmpu = bol->in(1)->as_Cmp(); | |
1741 if (cmpu->Opcode() != Op_CmpU) return NULL; | |
1742 int stride = stride_of_possible_iv(if_cmpu); | |
1743 if (stride == 0) return NULL; | |
1744 | |
1745 ProjNode* lp_continue = stay_in_loop(if_cmpu, loop)->as_Proj(); | |
1746 ProjNode* lp_exit = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj(); | |
1747 | |
1748 Node* limit = NULL; | |
1749 if (stride > 0) { | |
1750 limit = cmpu->in(2); | |
1751 } else { | |
1752 limit = _igvn.makecon(TypeInt::ZERO); | |
1753 set_ctrl(limit, C->root()); | |
1754 } | |
1755 // Create a new region on the exit path | |
1756 RegionNode* reg = insert_region_before_proj(lp_exit); | |
1757 | |
1758 // Clone the if-cmpu-true-false using a signed compare | |
1759 BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge; | |
1760 ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue); | |
1761 reg->add_req(cmpi_exit); | |
1762 | |
1763 // Clone the if-cmpu-true-false | |
1764 BoolTest::mask rel_u = bol->_test._test; | |
1765 ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue); | |
1766 reg->add_req(cmpu_exit); | |
1767 | |
1768 // Force original if to stay in loop. | |
1769 short_circuit_if(if_cmpu, lp_continue); | |
1770 | |
1771 return cmpi_exit->in(0)->as_If(); | |
1772 } | |
1773 | |
1774 //------------------------------ remove_cmpi_loop_exit ------------------------------------- | |
1775 // Remove a previously inserted signed compare loop exit. | |
1776 void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) { | |
1777 Node* lp_proj = stay_in_loop(if_cmp, loop); | |
1778 assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI && | |
1779 stay_in_loop(lp_proj, loop)->is_If() && | |
1780 stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu"); | |
1781 Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO); | |
1782 set_ctrl(con, C->root()); | |
1783 if_cmp->set_req(1, con); | |
1784 } | |
1785 | |
1786 //------------------------------ scheduled_nodelist ------------------------------------- | |
1787 // Create a post order schedule of nodes that are in the | |
1788 // "member" set. The list is returned in "sched". | |
1789 // The first node in "sched" is the loop head, followed by | |
1790 // nodes which have no inputs in the "member" set, and then | |
1791 // followed by the nodes that have an immediate input dependence | |
1792 // on a node in "sched". | |
1793 void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) { | |
1794 | |
1795 assert(member.test(loop->_head->_idx), "loop head must be in member set"); | |
1796 Arena *a = Thread::current()->resource_area(); | |
1797 VectorSet visited(a); | |
1798 Node_Stack nstack(a, loop->_body.size()); | |
1799 | |
1800 Node* n = loop->_head; // top of stack is cached in "n" | |
1801 uint idx = 0; | |
1802 visited.set(n->_idx); | |
1803 | |
1804 // Initially push all with no inputs from within member set | |
1805 for(uint i = 0; i < loop->_body.size(); i++ ) { | |
1806 Node *elt = loop->_body.at(i); | |
1807 if (member.test(elt->_idx)) { | |
1808 bool found = false; | |
1809 for (uint j = 0; j < elt->req(); j++) { | |
1810 Node* def = elt->in(j); | |
1811 if (def && member.test(def->_idx) && def != elt) { | |
1812 found = true; | |
1813 break; | |
1814 } | |
1815 } | |
1816 if (!found && elt != loop->_head) { | |
1817 nstack.push(n, idx); | |
1818 n = elt; | |
1819 assert(!visited.test(n->_idx), "not seen yet"); | |
1820 visited.set(n->_idx); | |
1821 } | |
1822 } | |
1823 } | |
1824 | |
1825 // traverse out's that are in the member set | |
1826 while (true) { | |
1827 if (idx < n->outcnt()) { | |
1828 Node* use = n->raw_out(idx); | |
1829 idx++; | |
1830 if (!visited.test_set(use->_idx)) { | |
1831 if (member.test(use->_idx)) { | |
1832 nstack.push(n, idx); | |
1833 n = use; | |
1834 idx = 0; | |
1835 } | |
1836 } | |
1837 } else { | |
1838 // All outputs processed | |
1839 sched.push(n); | |
1840 if (nstack.is_empty()) break; | |
1841 n = nstack.node(); | |
1842 idx = nstack.index(); | |
1843 nstack.pop(); | |
1844 } | |
1845 } | |
1846 } | |
1847 | |
1848 | |
1849 //------------------------------ has_use_in_set ------------------------------------- | |
1850 // Has a use in the vector set | |
1851 bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) { | |
1852 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
1853 Node* use = n->fast_out(j); | |
1854 if (vset.test(use->_idx)) { | |
1855 return true; | |
1856 } | |
1857 } | |
1858 return false; | |
1859 } | |
1860 | |
1861 | |
1862 //------------------------------ has_use_internal_to_set ------------------------------------- | |
1863 // Has use internal to the vector set (ie. not in a phi at the loop head) | |
1864 bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) { | |
1865 Node* head = loop->_head; | |
1866 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
1867 Node* use = n->fast_out(j); | |
1868 if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) { | |
1869 return true; | |
1870 } | |
1871 } | |
1872 return false; | |
1873 } | |
1874 | |
1875 | |
1876 //------------------------------ clone_for_use_outside_loop ------------------------------------- | |
1877 // clone "n" for uses that are outside of loop | |
1878 void PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) { | |
1879 | |
1880 assert(worklist.size() == 0, "should be empty"); | |
1881 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
1882 Node* use = n->fast_out(j); | |
1883 if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) { | |
1884 worklist.push(use); | |
1885 } | |
1886 } | |
1887 while( worklist.size() ) { | |
1888 Node *use = worklist.pop(); | |
1889 if (!has_node(use) || use->in(0) == C->top()) continue; | |
1890 uint j; | |
1891 for (j = 0; j < use->req(); j++) { | |
1892 if (use->in(j) == n) break; | |
1893 } | |
1894 assert(j < use->req(), "must be there"); | |
1895 | |
1896 // clone "n" and insert it between the inputs of "n" and the use outside the loop | |
1897 Node* n_clone = n->clone(); | |
1898 _igvn.hash_delete(use); | |
1899 use->set_req(j, n_clone); | |
1900 _igvn._worklist.push(use); | |
251 | 1901 Node* use_c; |
0 | 1902 if (!use->is_Phi()) { |
251 | 1903 use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0); |
0 | 1904 } else { |
1905 // Use in a phi is considered a use in the associated predecessor block | |
251 | 1906 use_c = use->in(0)->in(j); |
0 | 1907 } |
251 | 1908 set_ctrl(n_clone, use_c); |
1909 assert(!loop->is_member(get_loop(use_c)), "should be outside loop"); | |
1910 get_loop(use_c)->_body.push(n_clone); | |
0 | 1911 _igvn.register_new_node_with_optimizer(n_clone); |
1912 #if !defined(PRODUCT) | |
1913 if (TracePartialPeeling) { | |
1914 tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx); | |
1915 } | |
1916 #endif | |
1917 } | |
1918 } | |
1919 | |
1920 | |
1921 //------------------------------ clone_for_special_use_inside_loop ------------------------------------- | |
1922 // clone "n" for special uses that are in the not_peeled region. | |
1923 // If these def-uses occur in separate blocks, the code generator | |
1924 // marks the method as not compilable. For example, if a "BoolNode" | |
1925 // is in a different basic block than the "IfNode" that uses it, then | |
1926 // the compilation is aborted in the code generator. | |
1927 void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n, | |
1928 VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) { | |
1929 if (n->is_Phi() || n->is_Load()) { | |
1930 return; | |
1931 } | |
1932 assert(worklist.size() == 0, "should be empty"); | |
1933 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
1934 Node* use = n->fast_out(j); | |
1935 if ( not_peel.test(use->_idx) && | |
1936 (use->is_If() || use->is_CMove() || use->is_Bool()) && | |
1937 use->in(1) == n) { | |
1938 worklist.push(use); | |
1939 } | |
1940 } | |
1941 if (worklist.size() > 0) { | |
1942 // clone "n" and insert it between inputs of "n" and the use | |
1943 Node* n_clone = n->clone(); | |
1944 loop->_body.push(n_clone); | |
1945 _igvn.register_new_node_with_optimizer(n_clone); | |
1946 set_ctrl(n_clone, get_ctrl(n)); | |
1947 sink_list.push(n_clone); | |
1948 not_peel <<= n_clone->_idx; // add n_clone to not_peel set. | |
1949 #if !defined(PRODUCT) | |
1950 if (TracePartialPeeling) { | |
1951 tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx); | |
1952 } | |
1953 #endif | |
1954 while( worklist.size() ) { | |
1955 Node *use = worklist.pop(); | |
1956 _igvn.hash_delete(use); | |
1957 _igvn._worklist.push(use); | |
1958 for (uint j = 1; j < use->req(); j++) { | |
1959 if (use->in(j) == n) { | |
1960 use->set_req(j, n_clone); | |
1961 } | |
1962 } | |
1963 } | |
1964 } | |
1965 } | |
1966 | |
1967 | |
1968 //------------------------------ insert_phi_for_loop ------------------------------------- | |
1969 // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist | |
1970 void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) { | |
1971 Node *phi = PhiNode::make(lp, back_edge_val); | |
1972 phi->set_req(LoopNode::EntryControl, lp_entry_val); | |
1973 // Use existing phi if it already exists | |
1974 Node *hit = _igvn.hash_find_insert(phi); | |
1975 if( hit == NULL ) { | |
1976 _igvn.register_new_node_with_optimizer(phi); | |
1977 set_ctrl(phi, lp); | |
1978 } else { | |
1979 // Remove the new phi from the graph and use the hit | |
1980 _igvn.remove_dead_node(phi); | |
1981 phi = hit; | |
1982 } | |
1983 _igvn.hash_delete(use); | |
1984 _igvn._worklist.push(use); | |
1985 use->set_req(idx, phi); | |
1986 } | |
1987 | |
1988 #ifdef ASSERT | |
1989 //------------------------------ is_valid_loop_partition ------------------------------------- | |
1990 // Validate the loop partition sets: peel and not_peel | |
1991 bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list, | |
1992 VectorSet& not_peel ) { | |
1993 uint i; | |
1994 // Check that peel_list entries are in the peel set | |
1995 for (i = 0; i < peel_list.size(); i++) { | |
1996 if (!peel.test(peel_list.at(i)->_idx)) { | |
1997 return false; | |
1998 } | |
1999 } | |
2000 // Check at loop members are in one of peel set or not_peel set | |
2001 for (i = 0; i < loop->_body.size(); i++ ) { | |
2002 Node *def = loop->_body.at(i); | |
2003 uint di = def->_idx; | |
2004 // Check that peel set elements are in peel_list | |
2005 if (peel.test(di)) { | |
2006 if (not_peel.test(di)) { | |
2007 return false; | |
2008 } | |
2009 // Must be in peel_list also | |
2010 bool found = false; | |
2011 for (uint j = 0; j < peel_list.size(); j++) { | |
2012 if (peel_list.at(j)->_idx == di) { | |
2013 found = true; | |
2014 break; | |
2015 } | |
2016 } | |
2017 if (!found) { | |
2018 return false; | |
2019 } | |
2020 } else if (not_peel.test(di)) { | |
2021 if (peel.test(di)) { | |
2022 return false; | |
2023 } | |
2024 } else { | |
2025 return false; | |
2026 } | |
2027 } | |
2028 return true; | |
2029 } | |
2030 | |
2031 //------------------------------ is_valid_clone_loop_exit_use ------------------------------------- | |
2032 // Ensure a use outside of loop is of the right form | |
2033 bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) { | |
2034 Node *use_c = has_ctrl(use) ? get_ctrl(use) : use; | |
2035 return (use->is_Phi() && | |
2036 use_c->is_Region() && use_c->req() == 3 && | |
2037 (use_c->in(exit_idx)->Opcode() == Op_IfTrue || | |
2038 use_c->in(exit_idx)->Opcode() == Op_IfFalse || | |
2039 use_c->in(exit_idx)->Opcode() == Op_JumpProj) && | |
2040 loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) ); | |
2041 } | |
2042 | |
2043 //------------------------------ is_valid_clone_loop_form ------------------------------------- | |
2044 // Ensure that all uses outside of loop are of the right form | |
2045 bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list, | |
2046 uint orig_exit_idx, uint clone_exit_idx) { | |
2047 uint len = peel_list.size(); | |
2048 for (uint i = 0; i < len; i++) { | |
2049 Node *def = peel_list.at(i); | |
2050 | |
2051 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { | |
2052 Node *use = def->fast_out(j); | |
2053 Node *use_c = has_ctrl(use) ? get_ctrl(use) : use; | |
2054 if (!loop->is_member(get_loop(use_c))) { | |
2055 // use is not in the loop, check for correct structure | |
2056 if (use->in(0) == def) { | |
2057 // Okay | |
2058 } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) { | |
2059 return false; | |
2060 } | |
2061 } | |
2062 } | |
2063 } | |
2064 return true; | |
2065 } | |
2066 #endif | |
2067 | |
2068 //------------------------------ partial_peel ------------------------------------- | |
2069 // Partially peel (aka loop rotation) the top portion of a loop (called | |
2070 // the peel section below) by cloning it and placing one copy just before | |
2071 // the new loop head and the other copy at the bottom of the new loop. | |
2072 // | |
2073 // before after where it came from | |
2074 // | |
2075 // stmt1 stmt1 | |
2076 // loop: stmt2 clone | |
2077 // stmt2 if condA goto exitA clone | |
2078 // if condA goto exitA new_loop: new | |
2079 // stmt3 stmt3 clone | |
2080 // if !condB goto loop if condB goto exitB clone | |
2081 // exitB: stmt2 orig | |
2082 // stmt4 if !condA goto new_loop orig | |
2083 // exitA: goto exitA | |
2084 // exitB: | |
2085 // stmt4 | |
2086 // exitA: | |
2087 // | |
2088 // Step 1: find the cut point: an exit test on probable | |
2089 // induction variable. | |
2090 // Step 2: schedule (with cloning) operations in the peel | |
2091 // section that can be executed after the cut into | |
2092 // the section that is not peeled. This may need | |
2093 // to clone operations into exit blocks. For | |
2094 // instance, a reference to A[i] in the not-peel | |
2095 // section and a reference to B[i] in an exit block | |
2096 // may cause a left-shift of i by 2 to be placed | |
2097 // in the peel block. This step will clone the left | |
2098 // shift into the exit block and sink the left shift | |
2099 // from the peel to the not-peel section. | |
2100 // Step 3: clone the loop, retarget the control, and insert | |
2101 // phis for values that are live across the new loop | |
2102 // head. This is very dependent on the graph structure | |
2103 // from clone_loop. It creates region nodes for | |
2104 // exit control and associated phi nodes for values | |
2105 // flow out of the loop through that exit. The region | |
2106 // node is dominated by the clone's control projection. | |
2107 // So the clone's peel section is placed before the | |
2108 // new loop head, and the clone's not-peel section is | |
2109 // forms the top part of the new loop. The original | |
2110 // peel section forms the tail of the new loop. | |
2111 // Step 4: update the dominator tree and recompute the | |
2112 // dominator depth. | |
2113 // | |
2114 // orig | |
2115 // | |
2116 // stmt1 | |
2117 // | | |
2118 // v | |
2119 // loop<----+ | |
2120 // | | | |
2121 // stmt2 | | |
2122 // | | | |
2123 // v | | |
2124 // ifA | | |
2125 // / | | | |
2126 // v v | | |
2127 // false true ^ <-- last_peel | |
2128 // / | | | |
2129 // / ===|==cut | | |
2130 // / stmt3 | <-- first_not_peel | |
2131 // / | | | |
2132 // | v | | |
2133 // v ifB | | |
2134 // exitA: / \ | | |
2135 // / \ | | |
2136 // v v | | |
2137 // false true | | |
2138 // / \ | | |
2139 // / ----+ | |
2140 // | | |
2141 // v | |
2142 // exitB: | |
2143 // stmt4 | |
2144 // | |
2145 // | |
2146 // after clone loop | |
2147 // | |
2148 // stmt1 | |
2149 // / \ | |
2150 // clone / \ orig | |
2151 // / \ | |
2152 // / \ | |
2153 // v v | |
2154 // +---->loop loop<----+ | |
2155 // | | | | | |
2156 // | stmt2 stmt2 | | |
2157 // | | | | | |
2158 // | v v | | |
2159 // | ifA ifA | | |
2160 // | | \ / | | | |
2161 // | v v v v | | |
2162 // ^ true false false true ^ <-- last_peel | |
2163 // | | ^ \ / | | | |
2164 // | cut==|== \ \ / ===|==cut | | |
2165 // | stmt3 \ \ / stmt3 | <-- first_not_peel | |
2166 // | | dom | | | | | |
2167 // | v \ 1v v2 v | | |
2168 // | ifB regionA ifB | | |
2169 // | / \ | / \ | | |
2170 // | / \ v / \ | | |
2171 // | v v exitA: v v | | |
2172 // | true false false true | | |
2173 // | / ^ \ / \ | | |
2174 // +---- \ \ / ----+ | |
2175 // dom \ / | |
2176 // \ 1v v2 | |
2177 // regionB | |
2178 // | | |
2179 // v | |
2180 // exitB: | |
2181 // stmt4 | |
2182 // | |
2183 // | |
2184 // after partial peel | |
2185 // | |
2186 // stmt1 | |
2187 // / | |
2188 // clone / orig | |
2189 // / TOP | |
2190 // / \ | |
2191 // v v | |
2192 // TOP->region region----+ | |
2193 // | | | | |
2194 // stmt2 stmt2 | | |
2195 // | | | | |
2196 // v v | | |
2197 // ifA ifA | | |
2198 // | \ / | | | |
2199 // v v v v | | |
2200 // true false false true | <-- last_peel | |
2201 // | ^ \ / +------|---+ | |
2202 // +->newloop \ \ / === ==cut | | | |
2203 // | stmt3 \ \ / TOP | | | |
2204 // | | dom | | stmt3 | | <-- first_not_peel | |
2205 // | v \ 1v v2 v | | | |
2206 // | ifB regionA ifB ^ v | |
2207 // | / \ | / \ | | | |
2208 // | / \ v / \ | | | |
2209 // | v v exitA: v v | | | |
2210 // | true false false true | | | |
2211 // | / ^ \ / \ | | | |
2212 // | | \ \ / v | | | |
2213 // | | dom \ / TOP | | | |
2214 // | | \ 1v v2 | | | |
2215 // ^ v regionB | | | |
2216 // | | | | | | |
2217 // | | v ^ v | |
2218 // | | exitB: | | | |
2219 // | | stmt4 | | | |
2220 // | +------------>-----------------+ | | |
2221 // | | | |
2222 // +-----------------<---------------------+ | |
2223 // | |
2224 // | |
2225 // final graph | |
2226 // | |
2227 // stmt1 | |
2228 // | | |
2229 // v | |
2230 // ........> ifA clone | |
2231 // : / | | |
2232 // dom / | | |
2233 // : v v | |
2234 // : false true | |
2235 // : | | | |
2236 // : | stmt2 clone | |
2237 // : | | | |
2238 // : | v | |
2239 // : | newloop<-----+ | |
2240 // : | | | | |
2241 // : | stmt3 clone | | |
2242 // : | | | | |
2243 // : | v | | |
2244 // : | ifB | | |
2245 // : | / \ | | |
2246 // : | v v | | |
2247 // : | false true | | |
2248 // : | | | | | |
2249 // : | v stmt2 | | |
2250 // : | exitB: | | | |
2251 // : | stmt4 v | | |
2252 // : | ifA orig | | |
2253 // : | / \ | | |
2254 // : | / \ | | |
2255 // : | v v | | |
2256 // : | false true | | |
2257 // : | / \ | | |
2258 // : v v -----+ | |
2259 // RegionA | |
2260 // | | |
2261 // v | |
2262 // exitA | |
2263 // | |
2264 bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) { | |
2265 | |
108 | 2266 if (!loop->_head->is_Loop()) { |
2267 return false; } | |
2268 | |
0 | 2269 LoopNode *head = loop->_head->as_Loop(); |
2270 | |
2271 if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) { | |
2272 return false; | |
2273 } | |
2274 | |
2275 // Check for complex exit control | |
2276 for(uint ii = 0; ii < loop->_body.size(); ii++ ) { | |
2277 Node *n = loop->_body.at(ii); | |
2278 int opc = n->Opcode(); | |
2279 if (n->is_Call() || | |
2280 opc == Op_Catch || | |
2281 opc == Op_CatchProj || | |
2282 opc == Op_Jump || | |
2283 opc == Op_JumpProj) { | |
2284 #if !defined(PRODUCT) | |
2285 if (TracePartialPeeling) { | |
2286 tty->print_cr("\nExit control too complex: lp: %d", head->_idx); | |
2287 } | |
2288 #endif | |
2289 return false; | |
2290 } | |
2291 } | |
2292 | |
2293 int dd = dom_depth(head); | |
2294 | |
2295 // Step 1: find cut point | |
2296 | |
2297 // Walk up dominators to loop head looking for first loop exit | |
2298 // which is executed on every path thru loop. | |
2299 IfNode *peel_if = NULL; | |
2300 IfNode *peel_if_cmpu = NULL; | |
2301 | |
2302 Node *iff = loop->tail(); | |
2303 while( iff != head ) { | |
2304 if( iff->is_If() ) { | |
2305 Node *ctrl = get_ctrl(iff->in(1)); | |
2306 if (ctrl->is_top()) return false; // Dead test on live IF. | |
2307 // If loop-varying exit-test, check for induction variable | |
2308 if( loop->is_member(get_loop(ctrl)) && | |
2309 loop->is_loop_exit(iff) && | |
2310 is_possible_iv_test(iff)) { | |
2311 Node* cmp = iff->in(1)->in(1); | |
2312 if (cmp->Opcode() == Op_CmpI) { | |
2313 peel_if = iff->as_If(); | |
2314 } else { | |
2315 assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU"); | |
2316 peel_if_cmpu = iff->as_If(); | |
2317 } | |
2318 } | |
2319 } | |
2320 iff = idom(iff); | |
2321 } | |
2322 // Prefer signed compare over unsigned compare. | |
2323 IfNode* new_peel_if = NULL; | |
2324 if (peel_if == NULL) { | |
2325 if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) { | |
2326 return false; // No peel point found | |
2327 } | |
2328 new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop); | |
2329 if (new_peel_if == NULL) { | |
2330 return false; // No peel point found | |
2331 } | |
2332 peel_if = new_peel_if; | |
2333 } | |
2334 Node* last_peel = stay_in_loop(peel_if, loop); | |
2335 Node* first_not_peeled = stay_in_loop(last_peel, loop); | |
2336 if (first_not_peeled == NULL || first_not_peeled == head) { | |
2337 return false; | |
2338 } | |
2339 | |
2340 #if !defined(PRODUCT) | |
2341 if (TracePartialPeeling) { | |
2342 tty->print_cr("before partial peel one iteration"); | |
2343 Node_List wl; | |
2344 Node* t = head->in(2); | |
2345 while (true) { | |
2346 wl.push(t); | |
2347 if (t == head) break; | |
2348 t = idom(t); | |
2349 } | |
2350 while (wl.size() > 0) { | |
2351 Node* tt = wl.pop(); | |
2352 tt->dump(); | |
2353 if (tt == last_peel) tty->print_cr("-- cut --"); | |
2354 } | |
2355 } | |
2356 #endif | |
2357 ResourceArea *area = Thread::current()->resource_area(); | |
2358 VectorSet peel(area); | |
2359 VectorSet not_peel(area); | |
2360 Node_List peel_list(area); | |
2361 Node_List worklist(area); | |
2362 Node_List sink_list(area); | |
2363 | |
2364 // Set of cfg nodes to peel are those that are executable from | |
2365 // the head through last_peel. | |
2366 assert(worklist.size() == 0, "should be empty"); | |
2367 worklist.push(head); | |
2368 peel.set(head->_idx); | |
2369 while (worklist.size() > 0) { | |
2370 Node *n = worklist.pop(); | |
2371 if (n != last_peel) { | |
2372 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
2373 Node* use = n->fast_out(j); | |
2374 if (use->is_CFG() && | |
2375 loop->is_member(get_loop(use)) && | |
2376 !peel.test_set(use->_idx)) { | |
2377 worklist.push(use); | |
2378 } | |
2379 } | |
2380 } | |
2381 } | |
2382 | |
2383 // Set of non-cfg nodes to peel are those that are control | |
2384 // dependent on the cfg nodes. | |
2385 uint i; | |
2386 for(i = 0; i < loop->_body.size(); i++ ) { | |
2387 Node *n = loop->_body.at(i); | |
2388 Node *n_c = has_ctrl(n) ? get_ctrl(n) : n; | |
2389 if (peel.test(n_c->_idx)) { | |
2390 peel.set(n->_idx); | |
2391 } else { | |
2392 not_peel.set(n->_idx); | |
2393 } | |
2394 } | |
2395 | |
2396 // Step 2: move operations from the peeled section down into the | |
2397 // not-peeled section | |
2398 | |
2399 // Get a post order schedule of nodes in the peel region | |
2400 // Result in right-most operand. | |
2401 scheduled_nodelist(loop, peel, peel_list ); | |
2402 | |
2403 assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition"); | |
2404 | |
2405 // For future check for too many new phis | |
2406 uint old_phi_cnt = 0; | |
2407 for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { | |
2408 Node* use = head->fast_out(j); | |
2409 if (use->is_Phi()) old_phi_cnt++; | |
2410 } | |
2411 | |
2412 #if !defined(PRODUCT) | |
2413 if (TracePartialPeeling) { | |
2414 tty->print_cr("\npeeled list"); | |
2415 } | |
2416 #endif | |
2417 | |
2418 // Evacuate nodes in peel region into the not_peeled region if possible | |
2419 uint new_phi_cnt = 0; | |
2420 for (i = 0; i < peel_list.size();) { | |
2421 Node* n = peel_list.at(i); | |
2422 #if !defined(PRODUCT) | |
2423 if (TracePartialPeeling) n->dump(); | |
2424 #endif | |
2425 bool incr = true; | |
2426 if ( !n->is_CFG() ) { | |
2427 | |
2428 if ( has_use_in_set(n, not_peel) ) { | |
2429 | |
2430 // If not used internal to the peeled region, | |
2431 // move "n" from peeled to not_peeled region. | |
2432 | |
2433 if ( !has_use_internal_to_set(n, peel, loop) ) { | |
2434 | |
2435 // if not pinned and not a load (which maybe anti-dependent on a store) | |
2436 // and not a CMove (Matcher expects only bool->cmove). | |
2437 if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) { | |
2438 clone_for_use_outside_loop( loop, n, worklist ); | |
2439 | |
2440 sink_list.push(n); | |
2441 peel >>= n->_idx; // delete n from peel set. | |
2442 not_peel <<= n->_idx; // add n to not_peel set. | |
2443 peel_list.remove(i); | |
2444 incr = false; | |
2445 #if !defined(PRODUCT) | |
2446 if (TracePartialPeeling) { | |
2447 tty->print_cr("sink to not_peeled region: %d newbb: %d", | |
2448 n->_idx, get_ctrl(n)->_idx); | |
2449 } | |
2450 #endif | |
2451 } | |
2452 } else { | |
2453 // Otherwise check for special def-use cases that span | |
2454 // the peel/not_peel boundary such as bool->if | |
2455 clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist ); | |
2456 new_phi_cnt++; | |
2457 } | |
2458 } | |
2459 } | |
2460 if (incr) i++; | |
2461 } | |
2462 | |
2463 if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) { | |
2464 #if !defined(PRODUCT) | |
2465 if (TracePartialPeeling) { | |
2466 tty->print_cr("\nToo many new phis: %d old %d new cmpi: %c", | |
2467 new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F'); | |
2468 } | |
2469 #endif | |
2470 if (new_peel_if != NULL) { | |
2471 remove_cmpi_loop_exit(new_peel_if, loop); | |
2472 } | |
2473 // Inhibit more partial peeling on this loop | |
2474 assert(!head->is_partial_peel_loop(), "not partial peeled"); | |
2475 head->mark_partial_peel_failed(); | |
2476 return false; | |
2477 } | |
2478 | |
2479 // Step 3: clone loop, retarget control, and insert new phis | |
2480 | |
2481 // Create new loop head for new phis and to hang | |
2482 // the nodes being moved (sinked) from the peel region. | |
2483 LoopNode* new_head = new (C, 3) LoopNode(last_peel, last_peel); | |
2484 _igvn.register_new_node_with_optimizer(new_head); | |
2485 assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled"); | |
2486 first_not_peeled->set_req(0, new_head); | |
2487 set_loop(new_head, loop); | |
2488 loop->_body.push(new_head); | |
2489 not_peel.set(new_head->_idx); | |
2490 set_idom(new_head, last_peel, dom_depth(first_not_peeled)); | |
2491 set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled)); | |
2492 | |
2493 while (sink_list.size() > 0) { | |
2494 Node* n = sink_list.pop(); | |
2495 set_ctrl(n, new_head); | |
2496 } | |
2497 | |
2498 assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition"); | |
2499 | |
2500 clone_loop( loop, old_new, dd ); | |
2501 | |
2502 const uint clone_exit_idx = 1; | |
2503 const uint orig_exit_idx = 2; | |
2504 assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop"); | |
2505 | |
2506 Node* head_clone = old_new[head->_idx]; | |
2507 LoopNode* new_head_clone = old_new[new_head->_idx]->as_Loop(); | |
2508 Node* orig_tail_clone = head_clone->in(2); | |
2509 | |
2510 // Add phi if "def" node is in peel set and "use" is not | |
2511 | |
2512 for(i = 0; i < peel_list.size(); i++ ) { | |
2513 Node *def = peel_list.at(i); | |
2514 if (!def->is_CFG()) { | |
2515 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { | |
2516 Node *use = def->fast_out(j); | |
2517 if (has_node(use) && use->in(0) != C->top() && | |
2518 (!peel.test(use->_idx) || | |
2519 (use->is_Phi() && use->in(0) == head)) ) { | |
2520 worklist.push(use); | |
2521 } | |
2522 } | |
2523 while( worklist.size() ) { | |
2524 Node *use = worklist.pop(); | |
2525 for (uint j = 1; j < use->req(); j++) { | |
2526 Node* n = use->in(j); | |
2527 if (n == def) { | |
2528 | |
2529 // "def" is in peel set, "use" is not in peel set | |
2530 // or "use" is in the entry boundary (a phi) of the peel set | |
2531 | |
2532 Node* use_c = has_ctrl(use) ? get_ctrl(use) : use; | |
2533 | |
2534 if ( loop->is_member(get_loop( use_c )) ) { | |
2535 // use is in loop | |
2536 if (old_new[use->_idx] != NULL) { // null for dead code | |
2537 Node* use_clone = old_new[use->_idx]; | |
2538 _igvn.hash_delete(use); | |
2539 use->set_req(j, C->top()); | |
2540 _igvn._worklist.push(use); | |
2541 insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone ); | |
2542 } | |
2543 } else { | |
2544 assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format"); | |
2545 // use is not in the loop, check if the live range includes the cut | |
2546 Node* lp_if = use_c->in(orig_exit_idx)->in(0); | |
2547 if (not_peel.test(lp_if->_idx)) { | |
2548 assert(j == orig_exit_idx, "use from original loop"); | |
2549 insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone ); | |
2550 } | |
2551 } | |
2552 } | |
2553 } | |
2554 } | |
2555 } | |
2556 } | |
2557 | |
2558 // Step 3b: retarget control | |
2559 | |
2560 // Redirect control to the new loop head if a cloned node in | |
2561 // the not_peeled region has control that points into the peeled region. | |
2562 // This necessary because the cloned peeled region will be outside | |
2563 // the loop. | |
2564 // from to | |
2565 // cloned-peeled <---+ | |
2566 // new_head_clone: | <--+ | |
2567 // cloned-not_peeled in(0) in(0) | |
2568 // orig-peeled | |
2569 | |
2570 for(i = 0; i < loop->_body.size(); i++ ) { | |
2571 Node *n = loop->_body.at(i); | |
2572 if (!n->is_CFG() && n->in(0) != NULL && | |
2573 not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) { | |
2574 Node* n_clone = old_new[n->_idx]; | |
2575 _igvn.hash_delete(n_clone); | |
2576 n_clone->set_req(0, new_head_clone); | |
2577 _igvn._worklist.push(n_clone); | |
2578 } | |
2579 } | |
2580 | |
2581 // Backedge of the surviving new_head (the clone) is original last_peel | |
2582 _igvn.hash_delete(new_head_clone); | |
2583 new_head_clone->set_req(LoopNode::LoopBackControl, last_peel); | |
2584 _igvn._worklist.push(new_head_clone); | |
2585 | |
2586 // Cut first node in original not_peel set | |
2587 _igvn.hash_delete(new_head); | |
2588 new_head->set_req(LoopNode::EntryControl, C->top()); | |
2589 new_head->set_req(LoopNode::LoopBackControl, C->top()); | |
2590 _igvn._worklist.push(new_head); | |
2591 | |
2592 // Copy head_clone back-branch info to original head | |
2593 // and remove original head's loop entry and | |
2594 // clone head's back-branch | |
2595 _igvn.hash_delete(head); | |
2596 _igvn.hash_delete(head_clone); | |
2597 head->set_req(LoopNode::EntryControl, head_clone->in(LoopNode::LoopBackControl)); | |
2598 head->set_req(LoopNode::LoopBackControl, C->top()); | |
2599 head_clone->set_req(LoopNode::LoopBackControl, C->top()); | |
2600 _igvn._worklist.push(head); | |
2601 _igvn._worklist.push(head_clone); | |
2602 | |
2603 // Similarly modify the phis | |
2604 for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) { | |
2605 Node* use = head->fast_out(k); | |
2606 if (use->is_Phi() && use->outcnt() > 0) { | |
2607 Node* use_clone = old_new[use->_idx]; | |
2608 _igvn.hash_delete(use); | |
2609 _igvn.hash_delete(use_clone); | |
2610 use->set_req(LoopNode::EntryControl, use_clone->in(LoopNode::LoopBackControl)); | |
2611 use->set_req(LoopNode::LoopBackControl, C->top()); | |
2612 use_clone->set_req(LoopNode::LoopBackControl, C->top()); | |
2613 _igvn._worklist.push(use); | |
2614 _igvn._worklist.push(use_clone); | |
2615 } | |
2616 } | |
2617 | |
2618 // Step 4: update dominator tree and dominator depth | |
2619 | |
2620 set_idom(head, orig_tail_clone, dd); | |
2621 recompute_dom_depth(); | |
2622 | |
2623 // Inhibit more partial peeling on this loop | |
2624 new_head_clone->set_partial_peel_loop(); | |
2625 C->set_major_progress(); | |
2626 | |
2627 #if !defined(PRODUCT) | |
2628 if (TracePartialPeeling) { | |
2629 tty->print_cr("\nafter partial peel one iteration"); | |
2630 Node_List wl(area); | |
2631 Node* t = last_peel; | |
2632 while (true) { | |
2633 wl.push(t); | |
2634 if (t == head_clone) break; | |
2635 t = idom(t); | |
2636 } | |
2637 while (wl.size() > 0) { | |
2638 Node* tt = wl.pop(); | |
2639 if (tt == head) tty->print_cr("orig head"); | |
2640 else if (tt == new_head_clone) tty->print_cr("new head"); | |
2641 else if (tt == head_clone) tty->print_cr("clone head"); | |
2642 tt->dump(); | |
2643 } | |
2644 } | |
2645 #endif | |
2646 return true; | |
2647 } | |
2648 | |
2649 //------------------------------reorg_offsets---------------------------------- | |
2650 // Reorganize offset computations to lower register pressure. Mostly | |
2651 // prevent loop-fallout uses of the pre-incremented trip counter (which are | |
2652 // then alive with the post-incremented trip counter forcing an extra | |
2653 // register move) | |
2654 void PhaseIdealLoop::reorg_offsets( IdealLoopTree *loop ) { | |
2655 | |
2656 CountedLoopNode *cl = loop->_head->as_CountedLoop(); | |
2657 CountedLoopEndNode *cle = cl->loopexit(); | |
2658 if( !cle ) return; // The occasional dead loop | |
2659 // Find loop exit control | |
2660 Node *exit = cle->proj_out(false); | |
2661 assert( exit->Opcode() == Op_IfFalse, "" ); | |
2662 | |
2663 // Check for the special case of folks using the pre-incremented | |
2664 // trip-counter on the fall-out path (forces the pre-incremented | |
2665 // and post-incremented trip counter to be live at the same time). | |
2666 // Fix this by adjusting to use the post-increment trip counter. | |
2667 Node *phi = cl->phi(); | |
2668 if( !phi ) return; // Dead infinite loop | |
367
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
318
diff
changeset
|
2669 |
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
318
diff
changeset
|
2670 // Shape messed up, probably by iteration_split_impl |
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
318
diff
changeset
|
2671 if (phi->in(LoopNode::LoopBackControl) != cl->incr()) return; |
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
318
diff
changeset
|
2672 |
0 | 2673 bool progress = true; |
2674 while (progress) { | |
2675 progress = false; | |
2676 for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) { | |
2677 Node* use = phi->fast_out(i); // User of trip-counter | |
2678 if (!has_ctrl(use)) continue; | |
2679 Node *u_ctrl = get_ctrl(use); | |
2680 if( use->is_Phi() ) { | |
2681 u_ctrl = NULL; | |
2682 for( uint j = 1; j < use->req(); j++ ) | |
2683 if( use->in(j) == phi ) | |
2684 u_ctrl = dom_lca( u_ctrl, use->in(0)->in(j) ); | |
2685 } | |
2686 IdealLoopTree *u_loop = get_loop(u_ctrl); | |
2687 // Look for loop-invariant use | |
2688 if( u_loop == loop ) continue; | |
2689 if( loop->is_member( u_loop ) ) continue; | |
2690 // Check that use is live out the bottom. Assuming the trip-counter | |
2691 // update is right at the bottom, uses of of the loop middle are ok. | |
2692 if( dom_lca( exit, u_ctrl ) != exit ) continue; | |
2693 // protect against stride not being a constant | |
2694 if( !cle->stride_is_con() ) continue; | |
2695 // Hit! Refactor use to use the post-incremented tripcounter. | |
2696 // Compute a post-increment tripcounter. | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
164
diff
changeset
|
2697 Node *opaq = new (C, 2) Opaque2Node( C, cle->incr() ); |
0 | 2698 register_new_node( opaq, u_ctrl ); |
2699 Node *neg_stride = _igvn.intcon(-cle->stride_con()); | |
2700 set_ctrl(neg_stride, C->root()); | |
2701 Node *post = new (C, 3) AddINode( opaq, neg_stride); | |
2702 register_new_node( post, u_ctrl ); | |
2703 _igvn.hash_delete(use); | |
2704 _igvn._worklist.push(use); | |
2705 for( uint j = 1; j < use->req(); j++ ) | |
2706 if( use->in(j) == phi ) | |
2707 use->set_req(j, post); | |
2708 // Since DU info changed, rerun loop | |
2709 progress = true; | |
2710 break; | |
2711 } | |
2712 } | |
2713 | |
2714 } |