Mercurial > hg > truffle
annotate src/share/vm/opto/loopnode.hpp @ 1145:e018e6884bd8
6631166: CMS: better heuristics when combatting fragmentation
Summary: Autonomic per-worker free block cache sizing, tunable coalition policies, fixes to per-size block statistics, retuned gain and bandwidth of some feedback loop filters to allow quicker reactivity to abrupt changes in ambient demand, and other heuristics to reduce fragmentation of the CMS old gen. Also tightened some assertions, including those related to locking.
Reviewed-by: jmasa
author | ysr |
---|---|
date | Wed, 23 Dec 2009 09:23:54 -0800 |
parents | 046932b72aa2 |
children | b2b6a9bf6238 |
rev | line source |
---|---|
0 | 1 /* |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
2 * Copyright 1998-2009 Sun Microsystems, Inc. All Rights Reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
20 * CA 95054 USA or visit www.sun.com if you need additional information or | |
21 * have any questions. | |
22 * | |
23 */ | |
24 | |
25 class CmpNode; | |
26 class CountedLoopEndNode; | |
27 class CountedLoopNode; | |
28 class IdealLoopTree; | |
29 class LoopNode; | |
30 class Node; | |
31 class PhaseIdealLoop; | |
32 class VectorSet; | |
33 struct small_cache; | |
34 | |
35 // | |
36 // I D E A L I Z E D L O O P S | |
37 // | |
38 // Idealized loops are the set of loops I perform more interesting | |
39 // transformations on, beyond simple hoisting. | |
40 | |
41 //------------------------------LoopNode--------------------------------------- | |
42 // Simple loop header. Fall in path on left, loop-back path on right. | |
43 class LoopNode : public RegionNode { | |
44 // Size is bigger to hold the flags. However, the flags do not change | |
45 // the semantics so it does not appear in the hash & cmp functions. | |
46 virtual uint size_of() const { return sizeof(*this); } | |
47 protected: | |
48 short _loop_flags; | |
49 // Names for flag bitfields | |
50 enum { pre_post_main=0, inner_loop=8, partial_peel_loop=16, partial_peel_failed=32 }; | |
51 char _unswitch_count; | |
52 enum { _unswitch_max=3 }; | |
53 | |
54 public: | |
55 // Names for edge indices | |
56 enum { Self=0, EntryControl, LoopBackControl }; | |
57 | |
58 int is_inner_loop() const { return _loop_flags & inner_loop; } | |
59 void set_inner_loop() { _loop_flags |= inner_loop; } | |
60 | |
61 int is_partial_peel_loop() const { return _loop_flags & partial_peel_loop; } | |
62 void set_partial_peel_loop() { _loop_flags |= partial_peel_loop; } | |
63 int partial_peel_has_failed() const { return _loop_flags & partial_peel_failed; } | |
64 void mark_partial_peel_failed() { _loop_flags |= partial_peel_failed; } | |
65 | |
66 int unswitch_max() { return _unswitch_max; } | |
67 int unswitch_count() { return _unswitch_count; } | |
68 void set_unswitch_count(int val) { | |
69 assert (val <= unswitch_max(), "too many unswitches"); | |
70 _unswitch_count = val; | |
71 } | |
72 | |
73 LoopNode( Node *entry, Node *backedge ) : RegionNode(3), _loop_flags(0), _unswitch_count(0) { | |
74 init_class_id(Class_Loop); | |
75 init_req(EntryControl, entry); | |
76 init_req(LoopBackControl, backedge); | |
77 } | |
78 | |
79 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); | |
80 virtual int Opcode() const; | |
81 bool can_be_counted_loop(PhaseTransform* phase) const { | |
82 return req() == 3 && in(0) != NULL && | |
83 in(1) != NULL && phase->type(in(1)) != Type::TOP && | |
84 in(2) != NULL && phase->type(in(2)) != Type::TOP; | |
85 } | |
86 #ifndef PRODUCT | |
87 virtual void dump_spec(outputStream *st) const; | |
88 #endif | |
89 }; | |
90 | |
91 //------------------------------Counted Loops---------------------------------- | |
92 // Counted loops are all trip-counted loops, with exactly 1 trip-counter exit | |
93 // path (and maybe some other exit paths). The trip-counter exit is always | |
94 // last in the loop. The trip-counter does not have to stride by a constant, | |
95 // but it does have to stride by a loop-invariant amount; the exit value is | |
96 // also loop invariant. | |
97 | |
98 // CountedLoopNodes and CountedLoopEndNodes come in matched pairs. The | |
99 // CountedLoopNode has the incoming loop control and the loop-back-control | |
100 // which is always the IfTrue before the matching CountedLoopEndNode. The | |
101 // CountedLoopEndNode has an incoming control (possibly not the | |
102 // CountedLoopNode if there is control flow in the loop), the post-increment | |
103 // trip-counter value, and the limit. The trip-counter value is always of | |
104 // the form (Op old-trip-counter stride). The old-trip-counter is produced | |
105 // by a Phi connected to the CountedLoopNode. The stride is loop invariant. | |
106 // The Op is any commutable opcode, including Add, Mul, Xor. The | |
107 // CountedLoopEndNode also takes in the loop-invariant limit value. | |
108 | |
109 // From a CountedLoopNode I can reach the matching CountedLoopEndNode via the | |
110 // loop-back control. From CountedLoopEndNodes I can reach CountedLoopNodes | |
111 // via the old-trip-counter from the Op node. | |
112 | |
113 //------------------------------CountedLoopNode-------------------------------- | |
114 // CountedLoopNodes head simple counted loops. CountedLoopNodes have as | |
115 // inputs the incoming loop-start control and the loop-back control, so they | |
116 // act like RegionNodes. They also take in the initial trip counter, the | |
117 // loop-invariant stride and the loop-invariant limit value. CountedLoopNodes | |
118 // produce a loop-body control and the trip counter value. Since | |
119 // CountedLoopNodes behave like RegionNodes I still have a standard CFG model. | |
120 | |
121 class CountedLoopNode : public LoopNode { | |
122 // Size is bigger to hold _main_idx. However, _main_idx does not change | |
123 // the semantics so it does not appear in the hash & cmp functions. | |
124 virtual uint size_of() const { return sizeof(*this); } | |
125 | |
126 // For Pre- and Post-loops during debugging ONLY, this holds the index of | |
127 // the Main CountedLoop. Used to assert that we understand the graph shape. | |
128 node_idx_t _main_idx; | |
129 | |
130 // Known trip count calculated by policy_maximally_unroll | |
131 int _trip_count; | |
132 | |
133 // Expected trip count from profile data | |
134 float _profile_trip_cnt; | |
135 | |
136 // Log2 of original loop bodies in unrolled loop | |
137 int _unrolled_count_log2; | |
138 | |
139 // Node count prior to last unrolling - used to decide if | |
140 // unroll,optimize,unroll,optimize,... is making progress | |
141 int _node_count_before_unroll; | |
142 | |
143 public: | |
144 CountedLoopNode( Node *entry, Node *backedge ) | |
145 : LoopNode(entry, backedge), _trip_count(max_jint), | |
146 _profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0), | |
147 _node_count_before_unroll(0) { | |
148 init_class_id(Class_CountedLoop); | |
149 // Initialize _trip_count to the largest possible value. | |
150 // Will be reset (lower) if the loop's trip count is known. | |
151 } | |
152 | |
153 virtual int Opcode() const; | |
154 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); | |
155 | |
156 Node *init_control() const { return in(EntryControl); } | |
157 Node *back_control() const { return in(LoopBackControl); } | |
158 CountedLoopEndNode *loopexit() const; | |
159 Node *init_trip() const; | |
160 Node *stride() const; | |
161 int stride_con() const; | |
162 bool stride_is_con() const; | |
163 Node *limit() const; | |
164 Node *incr() const; | |
165 Node *phi() const; | |
166 | |
167 // Match increment with optional truncation | |
168 static Node* match_incr_with_optional_truncation(Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type); | |
169 | |
170 // A 'main' loop has a pre-loop and a post-loop. The 'main' loop | |
171 // can run short a few iterations and may start a few iterations in. | |
172 // It will be RCE'd and unrolled and aligned. | |
173 | |
174 // A following 'post' loop will run any remaining iterations. Used | |
175 // during Range Check Elimination, the 'post' loop will do any final | |
176 // iterations with full checks. Also used by Loop Unrolling, where | |
177 // the 'post' loop will do any epilog iterations needed. Basically, | |
178 // a 'post' loop can not profitably be further unrolled or RCE'd. | |
179 | |
180 // A preceding 'pre' loop will run at least 1 iteration (to do peeling), | |
181 // it may do under-flow checks for RCE and may do alignment iterations | |
182 // so the following main loop 'knows' that it is striding down cache | |
183 // lines. | |
184 | |
185 // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or | |
186 // Aligned, may be missing it's pre-loop. | |
187 enum { Normal=0, Pre=1, Main=2, Post=3, PrePostFlagsMask=3, Main_Has_No_Pre_Loop=4 }; | |
188 int is_normal_loop() const { return (_loop_flags&PrePostFlagsMask) == Normal; } | |
189 int is_pre_loop () const { return (_loop_flags&PrePostFlagsMask) == Pre; } | |
190 int is_main_loop () const { return (_loop_flags&PrePostFlagsMask) == Main; } | |
191 int is_post_loop () const { return (_loop_flags&PrePostFlagsMask) == Post; } | |
192 int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; } | |
193 void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; } | |
194 | |
367
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
196
diff
changeset
|
195 int main_idx() const { return _main_idx; } |
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
196
diff
changeset
|
196 |
0 | 197 |
198 void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; } | |
199 void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; } | |
200 void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; } | |
201 void set_normal_loop( ) { _loop_flags &= ~PrePostFlagsMask; } | |
202 | |
203 void set_trip_count(int tc) { _trip_count = tc; } | |
204 int trip_count() { return _trip_count; } | |
205 | |
206 void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; } | |
207 float profile_trip_cnt() { return _profile_trip_cnt; } | |
208 | |
209 void double_unrolled_count() { _unrolled_count_log2++; } | |
210 int unrolled_count() { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); } | |
211 | |
212 void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; } | |
213 int node_count_before_unroll() { return _node_count_before_unroll; } | |
214 | |
215 #ifndef PRODUCT | |
216 virtual void dump_spec(outputStream *st) const; | |
217 #endif | |
218 }; | |
219 | |
220 //------------------------------CountedLoopEndNode----------------------------- | |
221 // CountedLoopEndNodes end simple trip counted loops. They act much like | |
222 // IfNodes. | |
223 class CountedLoopEndNode : public IfNode { | |
224 public: | |
225 enum { TestControl, TestValue }; | |
226 | |
227 CountedLoopEndNode( Node *control, Node *test, float prob, float cnt ) | |
228 : IfNode( control, test, prob, cnt) { | |
229 init_class_id(Class_CountedLoopEnd); | |
230 } | |
231 virtual int Opcode() const; | |
232 | |
233 Node *cmp_node() const { return (in(TestValue)->req() >=2) ? in(TestValue)->in(1) : NULL; } | |
234 Node *incr() const { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; } | |
235 Node *limit() const { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; } | |
236 Node *stride() const { Node *tmp = incr (); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; } | |
237 Node *phi() const { Node *tmp = incr (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; } | |
238 Node *init_trip() const { Node *tmp = phi (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; } | |
239 int stride_con() const; | |
240 bool stride_is_con() const { Node *tmp = stride (); return (tmp != NULL && tmp->is_Con()); } | |
241 BoolTest::mask test_trip() const { return in(TestValue)->as_Bool()->_test._test; } | |
242 CountedLoopNode *loopnode() const { | |
243 Node *ln = phi()->in(0); | |
244 assert( ln->Opcode() == Op_CountedLoop, "malformed loop" ); | |
245 return (CountedLoopNode*)ln; } | |
246 | |
247 #ifndef PRODUCT | |
248 virtual void dump_spec(outputStream *st) const; | |
249 #endif | |
250 }; | |
251 | |
252 | |
253 inline CountedLoopEndNode *CountedLoopNode::loopexit() const { | |
254 Node *bc = back_control(); | |
255 if( bc == NULL ) return NULL; | |
256 Node *le = bc->in(0); | |
257 if( le->Opcode() != Op_CountedLoopEnd ) | |
258 return NULL; | |
259 return (CountedLoopEndNode*)le; | |
260 } | |
261 inline Node *CountedLoopNode::init_trip() const { return loopexit() ? loopexit()->init_trip() : NULL; } | |
262 inline Node *CountedLoopNode::stride() const { return loopexit() ? loopexit()->stride() : NULL; } | |
263 inline int CountedLoopNode::stride_con() const { return loopexit() ? loopexit()->stride_con() : 0; } | |
264 inline bool CountedLoopNode::stride_is_con() const { return loopexit() && loopexit()->stride_is_con(); } | |
265 inline Node *CountedLoopNode::limit() const { return loopexit() ? loopexit()->limit() : NULL; } | |
266 inline Node *CountedLoopNode::incr() const { return loopexit() ? loopexit()->incr() : NULL; } | |
267 inline Node *CountedLoopNode::phi() const { return loopexit() ? loopexit()->phi() : NULL; } | |
268 | |
269 | |
270 // -----------------------------IdealLoopTree---------------------------------- | |
271 class IdealLoopTree : public ResourceObj { | |
272 public: | |
273 IdealLoopTree *_parent; // Parent in loop tree | |
274 IdealLoopTree *_next; // Next sibling in loop tree | |
275 IdealLoopTree *_child; // First child in loop tree | |
276 | |
277 // The head-tail backedge defines the loop. | |
278 // If tail is NULL then this loop has multiple backedges as part of the | |
279 // same loop. During cleanup I'll peel off the multiple backedges; merge | |
280 // them at the loop bottom and flow 1 real backedge into the loop. | |
281 Node *_head; // Head of loop | |
282 Node *_tail; // Tail of loop | |
283 inline Node *tail(); // Handle lazy update of _tail field | |
284 PhaseIdealLoop* _phase; | |
285 | |
286 Node_List _body; // Loop body for inner loops | |
287 | |
288 uint8 _nest; // Nesting depth | |
289 uint8 _irreducible:1, // True if irreducible | |
290 _has_call:1, // True if has call safepoint | |
291 _has_sfpt:1, // True if has non-call safepoint | |
292 _rce_candidate:1; // True if candidate for range check elimination | |
293 | |
39
76256d272075
6667612: (Escape Analysis) disable loop cloning if it has a scalar replaceable allocation
kvn
parents:
17
diff
changeset
|
294 Node_List* _required_safept; // A inner loop cannot delete these safepts; |
76256d272075
6667612: (Escape Analysis) disable loop cloning if it has a scalar replaceable allocation
kvn
parents:
17
diff
changeset
|
295 bool _allow_optimizations; // Allow loop optimizations |
0 | 296 |
297 IdealLoopTree( PhaseIdealLoop* phase, Node *head, Node *tail ) | |
298 : _parent(0), _next(0), _child(0), | |
299 _head(head), _tail(tail), | |
300 _phase(phase), | |
301 _required_safept(NULL), | |
39
76256d272075
6667612: (Escape Analysis) disable loop cloning if it has a scalar replaceable allocation
kvn
parents:
17
diff
changeset
|
302 _allow_optimizations(true), |
0 | 303 _nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0) |
304 { } | |
305 | |
306 // Is 'l' a member of 'this'? | |
307 int is_member( const IdealLoopTree *l ) const; // Test for nested membership | |
308 | |
309 // Set loop nesting depth. Accumulate has_call bits. | |
310 int set_nest( uint depth ); | |
311 | |
312 // Split out multiple fall-in edges from the loop header. Move them to a | |
313 // private RegionNode before the loop. This becomes the loop landing pad. | |
314 void split_fall_in( PhaseIdealLoop *phase, int fall_in_cnt ); | |
315 | |
316 // Split out the outermost loop from this shared header. | |
317 void split_outer_loop( PhaseIdealLoop *phase ); | |
318 | |
319 // Merge all the backedges from the shared header into a private Region. | |
320 // Feed that region as the one backedge to this loop. | |
321 void merge_many_backedges( PhaseIdealLoop *phase ); | |
322 | |
323 // Split shared headers and insert loop landing pads. | |
324 // Insert a LoopNode to replace the RegionNode. | |
325 // Returns TRUE if loop tree is structurally changed. | |
326 bool beautify_loops( PhaseIdealLoop *phase ); | |
327 | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
328 // Perform iteration-splitting on inner loops. Split iterations to |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
329 // avoid range checks or one-shot null checks. Returns false if the |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
330 // current round of loop opts should stop. |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
331 bool iteration_split( PhaseIdealLoop *phase, Node_List &old_new ); |
0 | 332 |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
333 // Driver for various flavors of iteration splitting. Returns false |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
334 // if the current round of loop opts should stop. |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
335 bool iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ); |
0 | 336 |
337 // Given dominators, try to find loops with calls that must always be | |
338 // executed (call dominates loop tail). These loops do not need non-call | |
339 // safepoints (ncsfpt). | |
340 void check_safepts(VectorSet &visited, Node_List &stack); | |
341 | |
342 // Allpaths backwards scan from loop tail, terminating each path at first safepoint | |
343 // encountered. | |
344 void allpaths_check_safepts(VectorSet &visited, Node_List &stack); | |
345 | |
346 // Convert to counted loops where possible | |
347 void counted_loop( PhaseIdealLoop *phase ); | |
348 | |
349 // Check for Node being a loop-breaking test | |
350 Node *is_loop_exit(Node *iff) const; | |
351 | |
352 // Returns true if ctrl is executed on every complete iteration | |
353 bool dominates_backedge(Node* ctrl); | |
354 | |
355 // Remove simplistic dead code from loop body | |
356 void DCE_loop_body(); | |
357 | |
358 // Look for loop-exit tests with my 50/50 guesses from the Parsing stage. | |
359 // Replace with a 1-in-10 exit guess. | |
360 void adjust_loop_exit_prob( PhaseIdealLoop *phase ); | |
361 | |
362 // Return TRUE or FALSE if the loop should never be RCE'd or aligned. | |
363 // Useful for unrolling loops with NO array accesses. | |
364 bool policy_peel_only( PhaseIdealLoop *phase ) const; | |
365 | |
366 // Return TRUE or FALSE if the loop should be unswitched -- clone | |
367 // loop with an invariant test | |
368 bool policy_unswitching( PhaseIdealLoop *phase ) const; | |
369 | |
370 // Micro-benchmark spamming. Remove empty loops. | |
371 bool policy_do_remove_empty_loop( PhaseIdealLoop *phase ); | |
372 | |
373 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can | |
374 // make some loop-invariant test (usually a null-check) happen before the | |
375 // loop. | |
376 bool policy_peeling( PhaseIdealLoop *phase ) const; | |
377 | |
378 // Return TRUE or FALSE if the loop should be maximally unrolled. Stash any | |
379 // known trip count in the counted loop node. | |
380 bool policy_maximally_unroll( PhaseIdealLoop *phase ) const; | |
381 | |
382 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if | |
383 // the loop is a CountedLoop and the body is small enough. | |
384 bool policy_unroll( PhaseIdealLoop *phase ) const; | |
385 | |
386 // Return TRUE or FALSE if the loop should be range-check-eliminated. | |
387 // Gather a list of IF tests that are dominated by iteration splitting; | |
388 // also gather the end of the first split and the start of the 2nd split. | |
389 bool policy_range_check( PhaseIdealLoop *phase ) const; | |
390 | |
391 // Return TRUE or FALSE if the loop should be cache-line aligned. | |
392 // Gather the expression that does the alignment. Note that only | |
605 | 393 // one array base can be aligned in a loop (unless the VM guarantees |
0 | 394 // mutual alignment). Note that if we vectorize short memory ops |
395 // into longer memory ops, we may want to increase alignment. | |
396 bool policy_align( PhaseIdealLoop *phase ) const; | |
397 | |
398 // Compute loop trip count from profile data | |
399 void compute_profile_trip_cnt( PhaseIdealLoop *phase ); | |
400 | |
401 // Reassociate invariant expressions. | |
402 void reassociate_invariants(PhaseIdealLoop *phase); | |
403 // Reassociate invariant add and subtract expressions. | |
404 Node* reassociate_add_sub(Node* n1, PhaseIdealLoop *phase); | |
405 // Return nonzero index of invariant operand if invariant and variant | |
605 | 406 // are combined with an Add or Sub. Helper for reassociate_invariants. |
0 | 407 int is_invariant_addition(Node* n, PhaseIdealLoop *phase); |
408 | |
409 // Return true if n is invariant | |
410 bool is_invariant(Node* n) const; | |
411 | |
412 // Put loop body on igvn work list | |
413 void record_for_igvn(); | |
414 | |
415 bool is_loop() { return !_irreducible && _tail && !_tail->is_top(); } | |
416 bool is_inner() { return is_loop() && _child == NULL; } | |
417 bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); } | |
418 | |
419 #ifndef PRODUCT | |
420 void dump_head( ) const; // Dump loop head only | |
421 void dump() const; // Dump this loop recursively | |
422 void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const; | |
423 #endif | |
424 | |
425 }; | |
426 | |
427 // -----------------------------PhaseIdealLoop--------------------------------- | |
428 // Computes the mapping from Nodes to IdealLoopTrees. Organizes IdealLoopTrees into a | |
429 // loop tree. Drives the loop-based transformations on the ideal graph. | |
430 class PhaseIdealLoop : public PhaseTransform { | |
431 friend class IdealLoopTree; | |
432 friend class SuperWord; | |
433 // Pre-computed def-use info | |
434 PhaseIterGVN &_igvn; | |
435 | |
436 // Head of loop tree | |
437 IdealLoopTree *_ltree_root; | |
438 | |
439 // Array of pre-order numbers, plus post-visited bit. | |
440 // ZERO for not pre-visited. EVEN for pre-visited but not post-visited. | |
441 // ODD for post-visited. Other bits are the pre-order number. | |
442 uint *_preorders; | |
443 uint _max_preorder; | |
444 | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
445 const PhaseIdealLoop* _verify_me; |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
446 bool _verify_only; |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
447 |
0 | 448 // Allocate _preorders[] array |
449 void allocate_preorders() { | |
450 _max_preorder = C->unique()+8; | |
451 _preorders = NEW_RESOURCE_ARRAY(uint, _max_preorder); | |
452 memset(_preorders, 0, sizeof(uint) * _max_preorder); | |
453 } | |
454 | |
455 // Allocate _preorders[] array | |
456 void reallocate_preorders() { | |
457 if ( _max_preorder < C->unique() ) { | |
458 _preorders = REALLOC_RESOURCE_ARRAY(uint, _preorders, _max_preorder, C->unique()); | |
459 _max_preorder = C->unique(); | |
460 } | |
461 memset(_preorders, 0, sizeof(uint) * _max_preorder); | |
462 } | |
463 | |
464 // Check to grow _preorders[] array for the case when build_loop_tree_impl() | |
465 // adds new nodes. | |
466 void check_grow_preorders( ) { | |
467 if ( _max_preorder < C->unique() ) { | |
468 uint newsize = _max_preorder<<1; // double size of array | |
469 _preorders = REALLOC_RESOURCE_ARRAY(uint, _preorders, _max_preorder, newsize); | |
470 memset(&_preorders[_max_preorder],0,sizeof(uint)*(newsize-_max_preorder)); | |
471 _max_preorder = newsize; | |
472 } | |
473 } | |
474 // Check for pre-visited. Zero for NOT visited; non-zero for visited. | |
475 int is_visited( Node *n ) const { return _preorders[n->_idx]; } | |
476 // Pre-order numbers are written to the Nodes array as low-bit-set values. | |
477 void set_preorder_visited( Node *n, int pre_order ) { | |
478 assert( !is_visited( n ), "already set" ); | |
479 _preorders[n->_idx] = (pre_order<<1); | |
480 }; | |
481 // Return pre-order number. | |
482 int get_preorder( Node *n ) const { assert( is_visited(n), "" ); return _preorders[n->_idx]>>1; } | |
483 | |
484 // Check for being post-visited. | |
485 // Should be previsited already (checked with assert(is_visited(n))). | |
486 int is_postvisited( Node *n ) const { assert( is_visited(n), "" ); return _preorders[n->_idx]&1; } | |
487 | |
488 // Mark as post visited | |
489 void set_postvisited( Node *n ) { assert( !is_postvisited( n ), "" ); _preorders[n->_idx] |= 1; } | |
490 | |
491 // Set/get control node out. Set lower bit to distinguish from IdealLoopTree | |
492 // Returns true if "n" is a data node, false if it's a control node. | |
493 bool has_ctrl( Node *n ) const { return ((intptr_t)_nodes[n->_idx]) & 1; } | |
494 | |
495 // clear out dead code after build_loop_late | |
496 Node_List _deadlist; | |
497 | |
498 // Support for faster execution of get_late_ctrl()/dom_lca() | |
499 // when a node has many uses and dominator depth is deep. | |
500 Node_Array _dom_lca_tags; | |
501 void init_dom_lca_tags(); | |
502 void clear_dom_lca_tags(); | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
503 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
504 // Helper for debugging bad dominance relationships |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
505 bool verify_dominance(Node* n, Node* use, Node* LCA, Node* early); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
506 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
507 Node* compute_lca_of_uses(Node* n, Node* early, bool verify = false); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
508 |
0 | 509 // Inline wrapper for frequent cases: |
510 // 1) only one use | |
511 // 2) a use is the same as the current LCA passed as 'n1' | |
512 Node *dom_lca_for_get_late_ctrl( Node *lca, Node *n, Node *tag ) { | |
513 assert( n->is_CFG(), "" ); | |
514 // Fast-path NULL lca | |
515 if( lca != NULL && lca != n ) { | |
516 assert( lca->is_CFG(), "" ); | |
517 // find LCA of all uses | |
518 n = dom_lca_for_get_late_ctrl_internal( lca, n, tag ); | |
519 } | |
520 return find_non_split_ctrl(n); | |
521 } | |
522 Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag ); | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
523 |
0 | 524 // true if CFG node d dominates CFG node n |
525 bool is_dominator(Node *d, Node *n); | |
526 | |
527 // Helper function for directing control inputs away from CFG split | |
528 // points. | |
529 Node *find_non_split_ctrl( Node *ctrl ) const { | |
530 if (ctrl != NULL) { | |
531 if (ctrl->is_MultiBranch()) { | |
532 ctrl = ctrl->in(0); | |
533 } | |
534 assert(ctrl->is_CFG(), "CFG"); | |
535 } | |
536 return ctrl; | |
537 } | |
538 | |
539 public: | |
540 bool has_node( Node* n ) const { return _nodes[n->_idx] != NULL; } | |
541 // check if transform created new nodes that need _ctrl recorded | |
542 Node *get_late_ctrl( Node *n, Node *early ); | |
543 Node *get_early_ctrl( Node *n ); | |
544 void set_early_ctrl( Node *n ); | |
545 void set_subtree_ctrl( Node *root ); | |
546 void set_ctrl( Node *n, Node *ctrl ) { | |
547 assert( !has_node(n) || has_ctrl(n), "" ); | |
548 assert( ctrl->in(0), "cannot set dead control node" ); | |
549 assert( ctrl == find_non_split_ctrl(ctrl), "must set legal crtl" ); | |
550 _nodes.map( n->_idx, (Node*)((intptr_t)ctrl + 1) ); | |
551 } | |
552 // Set control and update loop membership | |
553 void set_ctrl_and_loop(Node* n, Node* ctrl) { | |
554 IdealLoopTree* old_loop = get_loop(get_ctrl(n)); | |
555 IdealLoopTree* new_loop = get_loop(ctrl); | |
556 if (old_loop != new_loop) { | |
557 if (old_loop->_child == NULL) old_loop->_body.yank(n); | |
558 if (new_loop->_child == NULL) new_loop->_body.push(n); | |
559 } | |
560 set_ctrl(n, ctrl); | |
561 } | |
562 // Control nodes can be replaced or subsumed. During this pass they | |
563 // get their replacement Node in slot 1. Instead of updating the block | |
564 // location of all Nodes in the subsumed block, we lazily do it. As we | |
565 // pull such a subsumed block out of the array, we write back the final | |
566 // correct block. | |
567 Node *get_ctrl( Node *i ) { | |
568 assert(has_node(i), ""); | |
569 Node *n = get_ctrl_no_update(i); | |
570 _nodes.map( i->_idx, (Node*)((intptr_t)n + 1) ); | |
571 assert(has_node(i) && has_ctrl(i), ""); | |
572 assert(n == find_non_split_ctrl(n), "must return legal ctrl" ); | |
573 return n; | |
574 } | |
575 | |
576 private: | |
577 Node *get_ctrl_no_update( Node *i ) const { | |
578 assert( has_ctrl(i), "" ); | |
579 Node *n = (Node*)(((intptr_t)_nodes[i->_idx]) & ~1); | |
580 if (!n->in(0)) { | |
581 // Skip dead CFG nodes | |
582 do { | |
583 n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1); | |
584 } while (!n->in(0)); | |
585 n = find_non_split_ctrl(n); | |
586 } | |
587 return n; | |
588 } | |
589 | |
590 // Check for loop being set | |
591 // "n" must be a control node. Returns true if "n" is known to be in a loop. | |
592 bool has_loop( Node *n ) const { | |
593 assert(!has_node(n) || !has_ctrl(n), ""); | |
594 return has_node(n); | |
595 } | |
596 // Set loop | |
597 void set_loop( Node *n, IdealLoopTree *loop ) { | |
598 _nodes.map(n->_idx, (Node*)loop); | |
599 } | |
600 // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms. Replace | |
601 // the 'old_node' with 'new_node'. Kill old-node. Add a reference | |
602 // from old_node to new_node to support the lazy update. Reference | |
603 // replaces loop reference, since that is not neede for dead node. | |
604 public: | |
605 void lazy_update( Node *old_node, Node *new_node ) { | |
606 assert( old_node != new_node, "no cycles please" ); | |
607 //old_node->set_req( 1, new_node /*NO DU INFO*/ ); | |
608 // Nodes always have DU info now, so re-use the side array slot | |
609 // for this node to provide the forwarding pointer. | |
610 _nodes.map( old_node->_idx, (Node*)((intptr_t)new_node + 1) ); | |
611 } | |
612 void lazy_replace( Node *old_node, Node *new_node ) { | |
613 _igvn.hash_delete(old_node); | |
614 _igvn.subsume_node( old_node, new_node ); | |
615 lazy_update( old_node, new_node ); | |
616 } | |
617 void lazy_replace_proj( Node *old_node, Node *new_node ) { | |
618 assert( old_node->req() == 1, "use this for Projs" ); | |
619 _igvn.hash_delete(old_node); // Must hash-delete before hacking edges | |
620 old_node->add_req( NULL ); | |
621 lazy_replace( old_node, new_node ); | |
622 } | |
623 | |
624 private: | |
625 | |
626 // Place 'n' in some loop nest, where 'n' is a CFG node | |
627 void build_loop_tree(); | |
628 int build_loop_tree_impl( Node *n, int pre_order ); | |
629 // Insert loop into the existing loop tree. 'innermost' is a leaf of the | |
630 // loop tree, not the root. | |
631 IdealLoopTree *sort( IdealLoopTree *loop, IdealLoopTree *innermost ); | |
632 | |
633 // Place Data nodes in some loop nest | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
634 void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
635 void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
636 void build_loop_late_post ( Node* n ); |
0 | 637 |
638 // Array of immediate dominance info for each CFG node indexed by node idx | |
639 private: | |
640 uint _idom_size; | |
641 Node **_idom; // Array of immediate dominators | |
642 uint *_dom_depth; // Used for fast LCA test | |
643 GrowableArray<uint>* _dom_stk; // For recomputation of dom depth | |
644 | |
645 Node* idom_no_update(Node* d) const { | |
646 assert(d->_idx < _idom_size, "oob"); | |
647 Node* n = _idom[d->_idx]; | |
648 assert(n != NULL,"Bad immediate dominator info."); | |
649 while (n->in(0) == NULL) { // Skip dead CFG nodes | |
650 //n = n->in(1); | |
651 n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1); | |
652 assert(n != NULL,"Bad immediate dominator info."); | |
653 } | |
654 return n; | |
655 } | |
656 Node *idom(Node* d) const { | |
657 uint didx = d->_idx; | |
658 Node *n = idom_no_update(d); | |
659 _idom[didx] = n; // Lazily remove dead CFG nodes from table. | |
660 return n; | |
661 } | |
662 uint dom_depth(Node* d) const { | |
663 assert(d->_idx < _idom_size, ""); | |
664 return _dom_depth[d->_idx]; | |
665 } | |
666 void set_idom(Node* d, Node* n, uint dom_depth); | |
667 // Locally compute IDOM using dom_lca call | |
668 Node *compute_idom( Node *region ) const; | |
669 // Recompute dom_depth | |
670 void recompute_dom_depth(); | |
671 | |
672 // Is safept not required by an outer loop? | |
673 bool is_deleteable_safept(Node* sfpt); | |
674 | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
675 // Perform verification that the graph is valid. |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
676 PhaseIdealLoop( PhaseIterGVN &igvn) : |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
677 PhaseTransform(Ideal_Loop), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
678 _igvn(igvn), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
679 _dom_lca_tags(C->comp_arena()), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
680 _verify_me(NULL), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
681 _verify_only(true) { |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
682 build_and_optimize(false); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
683 } |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
684 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
685 // build the loop tree and perform any requested optimizations |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
686 void build_and_optimize(bool do_split_if); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
687 |
0 | 688 public: |
689 // Dominators for the sea of nodes | |
690 void Dominators(); | |
691 Node *dom_lca( Node *n1, Node *n2 ) const { | |
692 return find_non_split_ctrl(dom_lca_internal(n1, n2)); | |
693 } | |
694 Node *dom_lca_internal( Node *n1, Node *n2 ) const; | |
695 | |
696 // Compute the Ideal Node to Loop mapping | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
697 PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) : |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
698 PhaseTransform(Ideal_Loop), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
699 _igvn(igvn), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
700 _dom_lca_tags(C->comp_arena()), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
701 _verify_me(NULL), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
702 _verify_only(false) { |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
703 build_and_optimize(do_split_ifs); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
704 } |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
705 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
706 // Verify that verify_me made the same decisions as a fresh run. |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
707 PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me) : |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
708 PhaseTransform(Ideal_Loop), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
709 _igvn(igvn), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
710 _dom_lca_tags(C->comp_arena()), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
711 _verify_me(verify_me), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
712 _verify_only(false) { |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
713 build_and_optimize(false); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
714 } |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
715 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
716 // Build and verify the loop tree without modifying the graph. This |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
717 // is useful to verify that all inputs properly dominate their uses. |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
718 static void verify(PhaseIterGVN& igvn) { |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
719 #ifdef ASSERT |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
720 PhaseIdealLoop v(igvn); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
721 #endif |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
722 } |
0 | 723 |
724 // True if the method has at least 1 irreducible loop | |
725 bool _has_irreducible_loops; | |
726 | |
727 // Per-Node transform | |
728 virtual Node *transform( Node *a_node ) { return 0; } | |
729 | |
730 Node *is_counted_loop( Node *x, IdealLoopTree *loop ); | |
731 | |
732 // Return a post-walked LoopNode | |
733 IdealLoopTree *get_loop( Node *n ) const { | |
734 // Dead nodes have no loop, so return the top level loop instead | |
735 if (!has_node(n)) return _ltree_root; | |
736 assert(!has_ctrl(n), ""); | |
737 return (IdealLoopTree*)_nodes[n->_idx]; | |
738 } | |
739 | |
740 // Is 'n' a (nested) member of 'loop'? | |
741 int is_member( const IdealLoopTree *loop, Node *n ) const { | |
742 return loop->is_member(get_loop(n)); } | |
743 | |
744 // This is the basic building block of the loop optimizations. It clones an | |
745 // entire loop body. It makes an old_new loop body mapping; with this | |
746 // mapping you can find the new-loop equivalent to an old-loop node. All | |
747 // new-loop nodes are exactly equal to their old-loop counterparts, all | |
748 // edges are the same. All exits from the old-loop now have a RegionNode | |
749 // that merges the equivalent new-loop path. This is true even for the | |
750 // normal "loop-exit" condition. All uses of loop-invariant old-loop values | |
751 // now come from (one or more) Phis that merge their new-loop equivalents. | |
752 // Parameter side_by_side_idom: | |
753 // When side_by_size_idom is NULL, the dominator tree is constructed for | |
754 // the clone loop to dominate the original. Used in construction of | |
755 // pre-main-post loop sequence. | |
756 // When nonnull, the clone and original are side-by-side, both are | |
757 // dominated by the passed in side_by_side_idom node. Used in | |
758 // construction of unswitched loops. | |
759 void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth, | |
760 Node* side_by_side_idom = NULL); | |
761 | |
762 // If we got the effect of peeling, either by actually peeling or by | |
763 // making a pre-loop which must execute at least once, we can remove | |
764 // all loop-invariant dominated tests in the main body. | |
765 void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new ); | |
766 | |
767 // Generate code to do a loop peel for the given loop (and body). | |
768 // old_new is a temp array. | |
769 void do_peeling( IdealLoopTree *loop, Node_List &old_new ); | |
770 | |
771 // Add pre and post loops around the given loop. These loops are used | |
772 // during RCE, unrolling and aligning loops. | |
773 void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ); | |
774 // If Node n lives in the back_ctrl block, we clone a private version of n | |
775 // in preheader_ctrl block and return that, otherwise return n. | |
776 Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n ); | |
777 | |
778 // Take steps to maximally unroll the loop. Peel any odd iterations, then | |
779 // unroll to do double iterations. The next round of major loop transforms | |
780 // will repeat till the doubled loop body does all remaining iterations in 1 | |
781 // pass. | |
782 void do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ); | |
783 | |
784 // Unroll the loop body one step - make each trip do 2 iterations. | |
785 void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ); | |
786 | |
787 // Return true if exp is a constant times an induction var | |
788 bool is_scaled_iv(Node* exp, Node* iv, int* p_scale); | |
789 | |
790 // Return true if exp is a scaled induction var plus (or minus) constant | |
791 bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0); | |
792 | |
793 // Eliminate range-checks and other trip-counter vs loop-invariant tests. | |
794 void do_range_check( IdealLoopTree *loop, Node_List &old_new ); | |
795 | |
796 // Create a slow version of the loop by cloning the loop | |
797 // and inserting an if to select fast-slow versions. | |
798 ProjNode* create_slow_version_of_loop(IdealLoopTree *loop, | |
799 Node_List &old_new); | |
800 | |
801 // Clone loop with an invariant test (that does not exit) and | |
802 // insert a clone of the test that selects which version to | |
803 // execute. | |
804 void do_unswitching (IdealLoopTree *loop, Node_List &old_new); | |
805 | |
806 // Find candidate "if" for unswitching | |
807 IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const; | |
808 | |
809 // Range Check Elimination uses this function! | |
810 // Constrain the main loop iterations so the affine function: | |
811 // scale_con * I + offset < limit | |
812 // always holds true. That is, either increase the number of iterations in | |
813 // the pre-loop or the post-loop until the condition holds true in the main | |
814 // loop. Scale_con, offset and limit are all loop invariant. | |
815 void add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ); | |
816 | |
817 // Partially peel loop up through last_peel node. | |
818 bool partial_peel( IdealLoopTree *loop, Node_List &old_new ); | |
819 | |
820 // Create a scheduled list of nodes control dependent on ctrl set. | |
821 void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched ); | |
822 // Has a use in the vector set | |
823 bool has_use_in_set( Node* n, VectorSet& vset ); | |
824 // Has use internal to the vector set (ie. not in a phi at the loop head) | |
825 bool has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ); | |
826 // clone "n" for uses that are outside of loop | |
827 void clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ); | |
828 // clone "n" for special uses that are in the not_peeled region | |
829 void clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n, | |
830 VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ); | |
831 // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist | |
832 void insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ); | |
833 #ifdef ASSERT | |
834 // Validate the loop partition sets: peel and not_peel | |
835 bool is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list, VectorSet& not_peel ); | |
836 // Ensure that uses outside of loop are of the right form | |
837 bool is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list, | |
838 uint orig_exit_idx, uint clone_exit_idx); | |
839 bool is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx); | |
840 #endif | |
841 | |
842 // Returns nonzero constant stride if-node is a possible iv test (otherwise returns zero.) | |
843 int stride_of_possible_iv( Node* iff ); | |
844 bool is_possible_iv_test( Node* iff ) { return stride_of_possible_iv(iff) != 0; } | |
845 // Return the (unique) control output node that's in the loop (if it exists.) | |
846 Node* stay_in_loop( Node* n, IdealLoopTree *loop); | |
847 // Insert a signed compare loop exit cloned from an unsigned compare. | |
848 IfNode* insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop); | |
849 void remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop); | |
850 // Utility to register node "n" with PhaseIdealLoop | |
851 void register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth); | |
852 // Utility to create an if-projection | |
853 ProjNode* proj_clone(ProjNode* p, IfNode* iff); | |
854 // Force the iff control output to be the live_proj | |
855 Node* short_circuit_if(IfNode* iff, ProjNode* live_proj); | |
856 // Insert a region before an if projection | |
857 RegionNode* insert_region_before_proj(ProjNode* proj); | |
858 // Insert a new if before an if projection | |
859 ProjNode* insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj); | |
860 | |
861 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps. | |
862 // "Nearly" because all Nodes have been cloned from the original in the loop, | |
863 // but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs | |
864 // through the Phi recursively, and return a Bool. | |
865 BoolNode *clone_iff( PhiNode *phi, IdealLoopTree *loop ); | |
866 CmpNode *clone_bool( PhiNode *phi, IdealLoopTree *loop ); | |
867 | |
868 | |
869 // Rework addressing expressions to get the most loop-invariant stuff | |
870 // moved out. We'd like to do all associative operators, but it's especially | |
871 // important (common) to do address expressions. | |
872 Node *remix_address_expressions( Node *n ); | |
873 | |
874 // Attempt to use a conditional move instead of a phi/branch | |
875 Node *conditional_move( Node *n ); | |
876 | |
877 // Reorganize offset computations to lower register pressure. | |
878 // Mostly prevent loop-fallout uses of the pre-incremented trip counter | |
879 // (which are then alive with the post-incremented trip counter | |
880 // forcing an extra register move) | |
881 void reorg_offsets( IdealLoopTree *loop ); | |
882 | |
883 // Check for aggressive application of 'split-if' optimization, | |
884 // using basic block level info. | |
885 void split_if_with_blocks ( VectorSet &visited, Node_Stack &nstack ); | |
886 Node *split_if_with_blocks_pre ( Node *n ); | |
887 void split_if_with_blocks_post( Node *n ); | |
888 Node *has_local_phi_input( Node *n ); | |
889 // Mark an IfNode as being dominated by a prior test, | |
890 // without actually altering the CFG (and hence IDOM info). | |
891 void dominated_by( Node *prevdom, Node *iff ); | |
892 | |
893 // Split Node 'n' through merge point | |
894 Node *split_thru_region( Node *n, Node *region ); | |
895 // Split Node 'n' through merge point if there is enough win. | |
896 Node *split_thru_phi( Node *n, Node *region, int policy ); | |
897 // Found an If getting its condition-code input from a Phi in the | |
898 // same block. Split thru the Region. | |
899 void do_split_if( Node *iff ); | |
900 | |
901 private: | |
902 // Return a type based on condition control flow | |
903 const TypeInt* filtered_type( Node *n, Node* n_ctrl); | |
904 const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); } | |
905 // Helpers for filtered type | |
906 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl); | |
907 | |
908 // Helper functions | |
909 void register_new_node( Node *n, Node *blk ); | |
910 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache ); | |
911 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ); | |
912 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ); | |
913 bool split_up( Node *n, Node *blk1, Node *blk2 ); | |
914 void sink_use( Node *use, Node *post_loop ); | |
915 Node *place_near_use( Node *useblock ) const; | |
916 | |
917 bool _created_loop_node; | |
918 public: | |
919 void set_created_loop_node() { _created_loop_node = true; } | |
920 bool created_loop_node() { return _created_loop_node; } | |
921 | |
922 #ifndef PRODUCT | |
923 void dump( ) const; | |
924 void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const; | |
925 void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const; | |
926 void verify() const; // Major slow :-) | |
927 void verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const; | |
928 IdealLoopTree *get_loop_idx(Node* n) const { | |
929 // Dead nodes have no loop, so return the top level loop instead | |
930 return _nodes[n->_idx] ? (IdealLoopTree*)_nodes[n->_idx] : _ltree_root; | |
931 } | |
932 // Print some stats | |
933 static void print_statistics(); | |
934 static int _loop_invokes; // Count of PhaseIdealLoop invokes | |
935 static int _loop_work; // Sum of PhaseIdealLoop x _unique | |
936 #endif | |
937 }; | |
938 | |
939 inline Node* IdealLoopTree::tail() { | |
940 // Handle lazy update of _tail field | |
941 Node *n = _tail; | |
942 //while( !n->in(0) ) // Skip dead CFG nodes | |
943 //n = n->in(1); | |
944 if (n->in(0) == NULL) | |
945 n = _phase->get_ctrl(n); | |
946 _tail = n; | |
947 return n; | |
948 } | |
949 | |
950 | |
951 // Iterate over the loop tree using a preorder, left-to-right traversal. | |
952 // | |
953 // Example that visits all counted loops from within PhaseIdealLoop | |
954 // | |
955 // for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { | |
956 // IdealLoopTree* lpt = iter.current(); | |
957 // if (!lpt->is_counted()) continue; | |
958 // ... | |
959 class LoopTreeIterator : public StackObj { | |
960 private: | |
961 IdealLoopTree* _root; | |
962 IdealLoopTree* _curnt; | |
963 | |
964 public: | |
965 LoopTreeIterator(IdealLoopTree* root) : _root(root), _curnt(root) {} | |
966 | |
967 bool done() { return _curnt == NULL; } // Finished iterating? | |
968 | |
969 void next(); // Advance to next loop tree | |
970 | |
971 IdealLoopTree* current() { return _curnt; } // Return current value of iterator. | |
972 }; |