Mercurial > hg > truffle
annotate src/share/vm/opto/loopnode.hpp @ 1994:6cd6d394f280
7001033: assert(gch->gc_cause() == GCCause::_scavenge_alot || !gch->incremental_collection_failed())
7002546: regression on SpecJbb2005 on 7b118 comparing to 7b117 on small heaps
Summary: Relaxed assertion checking related to incremental_collection_failed flag to allow for ExplicitGCInvokesConcurrent behaviour where we do not want a failing scavenge to bail to a stop-world collection. Parameterized incremental_collection_will_fail() so we can selectively use, or not use, as appropriate, the statistical prediction at specific use sites. This essentially reverts the scavenge bail-out logic to what it was prior to some recent changes that had inadvertently started using the statistical prediction which can be noisy in the presence of bursty loads. Added some associated verbose non-product debugging messages.
Reviewed-by: johnc, tonyp
author | ysr |
---|---|
date | Tue, 07 Dec 2010 21:55:53 -0800 |
parents | f95d63e2154a |
children | 194c9fdee631 |
rev | line source |
---|---|
0 | 1 /* |
1972 | 2 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #ifndef SHARE_VM_OPTO_LOOPNODE_HPP |
26 #define SHARE_VM_OPTO_LOOPNODE_HPP | |
27 | |
28 #include "opto/cfgnode.hpp" | |
29 #include "opto/multnode.hpp" | |
30 #include "opto/phaseX.hpp" | |
31 #include "opto/subnode.hpp" | |
32 #include "opto/type.hpp" | |
33 | |
0 | 34 class CmpNode; |
35 class CountedLoopEndNode; | |
36 class CountedLoopNode; | |
37 class IdealLoopTree; | |
38 class LoopNode; | |
39 class Node; | |
40 class PhaseIdealLoop; | |
41 class VectorSet; | |
1172 | 42 class Invariance; |
0 | 43 struct small_cache; |
44 | |
45 // | |
46 // I D E A L I Z E D L O O P S | |
47 // | |
48 // Idealized loops are the set of loops I perform more interesting | |
49 // transformations on, beyond simple hoisting. | |
50 | |
51 //------------------------------LoopNode--------------------------------------- | |
52 // Simple loop header. Fall in path on left, loop-back path on right. | |
53 class LoopNode : public RegionNode { | |
54 // Size is bigger to hold the flags. However, the flags do not change | |
55 // the semantics so it does not appear in the hash & cmp functions. | |
56 virtual uint size_of() const { return sizeof(*this); } | |
57 protected: | |
58 short _loop_flags; | |
59 // Names for flag bitfields | |
60 enum { pre_post_main=0, inner_loop=8, partial_peel_loop=16, partial_peel_failed=32 }; | |
61 char _unswitch_count; | |
62 enum { _unswitch_max=3 }; | |
63 | |
64 public: | |
65 // Names for edge indices | |
66 enum { Self=0, EntryControl, LoopBackControl }; | |
67 | |
68 int is_inner_loop() const { return _loop_flags & inner_loop; } | |
69 void set_inner_loop() { _loop_flags |= inner_loop; } | |
70 | |
71 int is_partial_peel_loop() const { return _loop_flags & partial_peel_loop; } | |
72 void set_partial_peel_loop() { _loop_flags |= partial_peel_loop; } | |
73 int partial_peel_has_failed() const { return _loop_flags & partial_peel_failed; } | |
74 void mark_partial_peel_failed() { _loop_flags |= partial_peel_failed; } | |
75 | |
76 int unswitch_max() { return _unswitch_max; } | |
77 int unswitch_count() { return _unswitch_count; } | |
78 void set_unswitch_count(int val) { | |
79 assert (val <= unswitch_max(), "too many unswitches"); | |
80 _unswitch_count = val; | |
81 } | |
82 | |
83 LoopNode( Node *entry, Node *backedge ) : RegionNode(3), _loop_flags(0), _unswitch_count(0) { | |
84 init_class_id(Class_Loop); | |
85 init_req(EntryControl, entry); | |
86 init_req(LoopBackControl, backedge); | |
87 } | |
88 | |
89 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); | |
90 virtual int Opcode() const; | |
91 bool can_be_counted_loop(PhaseTransform* phase) const { | |
92 return req() == 3 && in(0) != NULL && | |
93 in(1) != NULL && phase->type(in(1)) != Type::TOP && | |
94 in(2) != NULL && phase->type(in(2)) != Type::TOP; | |
95 } | |
96 #ifndef PRODUCT | |
97 virtual void dump_spec(outputStream *st) const; | |
98 #endif | |
99 }; | |
100 | |
101 //------------------------------Counted Loops---------------------------------- | |
102 // Counted loops are all trip-counted loops, with exactly 1 trip-counter exit | |
103 // path (and maybe some other exit paths). The trip-counter exit is always | |
104 // last in the loop. The trip-counter does not have to stride by a constant, | |
105 // but it does have to stride by a loop-invariant amount; the exit value is | |
106 // also loop invariant. | |
107 | |
108 // CountedLoopNodes and CountedLoopEndNodes come in matched pairs. The | |
109 // CountedLoopNode has the incoming loop control and the loop-back-control | |
110 // which is always the IfTrue before the matching CountedLoopEndNode. The | |
111 // CountedLoopEndNode has an incoming control (possibly not the | |
112 // CountedLoopNode if there is control flow in the loop), the post-increment | |
113 // trip-counter value, and the limit. The trip-counter value is always of | |
114 // the form (Op old-trip-counter stride). The old-trip-counter is produced | |
115 // by a Phi connected to the CountedLoopNode. The stride is loop invariant. | |
116 // The Op is any commutable opcode, including Add, Mul, Xor. The | |
117 // CountedLoopEndNode also takes in the loop-invariant limit value. | |
118 | |
119 // From a CountedLoopNode I can reach the matching CountedLoopEndNode via the | |
120 // loop-back control. From CountedLoopEndNodes I can reach CountedLoopNodes | |
121 // via the old-trip-counter from the Op node. | |
122 | |
123 //------------------------------CountedLoopNode-------------------------------- | |
124 // CountedLoopNodes head simple counted loops. CountedLoopNodes have as | |
125 // inputs the incoming loop-start control and the loop-back control, so they | |
126 // act like RegionNodes. They also take in the initial trip counter, the | |
127 // loop-invariant stride and the loop-invariant limit value. CountedLoopNodes | |
128 // produce a loop-body control and the trip counter value. Since | |
129 // CountedLoopNodes behave like RegionNodes I still have a standard CFG model. | |
130 | |
131 class CountedLoopNode : public LoopNode { | |
132 // Size is bigger to hold _main_idx. However, _main_idx does not change | |
133 // the semantics so it does not appear in the hash & cmp functions. | |
134 virtual uint size_of() const { return sizeof(*this); } | |
135 | |
136 // For Pre- and Post-loops during debugging ONLY, this holds the index of | |
137 // the Main CountedLoop. Used to assert that we understand the graph shape. | |
138 node_idx_t _main_idx; | |
139 | |
140 // Known trip count calculated by policy_maximally_unroll | |
141 int _trip_count; | |
142 | |
143 // Expected trip count from profile data | |
144 float _profile_trip_cnt; | |
145 | |
146 // Log2 of original loop bodies in unrolled loop | |
147 int _unrolled_count_log2; | |
148 | |
149 // Node count prior to last unrolling - used to decide if | |
150 // unroll,optimize,unroll,optimize,... is making progress | |
151 int _node_count_before_unroll; | |
152 | |
153 public: | |
154 CountedLoopNode( Node *entry, Node *backedge ) | |
155 : LoopNode(entry, backedge), _trip_count(max_jint), | |
156 _profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0), | |
157 _node_count_before_unroll(0) { | |
158 init_class_id(Class_CountedLoop); | |
159 // Initialize _trip_count to the largest possible value. | |
160 // Will be reset (lower) if the loop's trip count is known. | |
161 } | |
162 | |
163 virtual int Opcode() const; | |
164 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); | |
165 | |
166 Node *init_control() const { return in(EntryControl); } | |
167 Node *back_control() const { return in(LoopBackControl); } | |
168 CountedLoopEndNode *loopexit() const; | |
169 Node *init_trip() const; | |
170 Node *stride() const; | |
171 int stride_con() const; | |
172 bool stride_is_con() const; | |
173 Node *limit() const; | |
174 Node *incr() const; | |
175 Node *phi() const; | |
176 | |
177 // Match increment with optional truncation | |
178 static Node* match_incr_with_optional_truncation(Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type); | |
179 | |
180 // A 'main' loop has a pre-loop and a post-loop. The 'main' loop | |
181 // can run short a few iterations and may start a few iterations in. | |
182 // It will be RCE'd and unrolled and aligned. | |
183 | |
184 // A following 'post' loop will run any remaining iterations. Used | |
185 // during Range Check Elimination, the 'post' loop will do any final | |
186 // iterations with full checks. Also used by Loop Unrolling, where | |
187 // the 'post' loop will do any epilog iterations needed. Basically, | |
188 // a 'post' loop can not profitably be further unrolled or RCE'd. | |
189 | |
190 // A preceding 'pre' loop will run at least 1 iteration (to do peeling), | |
191 // it may do under-flow checks for RCE and may do alignment iterations | |
192 // so the following main loop 'knows' that it is striding down cache | |
193 // lines. | |
194 | |
195 // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or | |
196 // Aligned, may be missing it's pre-loop. | |
197 enum { Normal=0, Pre=1, Main=2, Post=3, PrePostFlagsMask=3, Main_Has_No_Pre_Loop=4 }; | |
198 int is_normal_loop() const { return (_loop_flags&PrePostFlagsMask) == Normal; } | |
199 int is_pre_loop () const { return (_loop_flags&PrePostFlagsMask) == Pre; } | |
200 int is_main_loop () const { return (_loop_flags&PrePostFlagsMask) == Main; } | |
201 int is_post_loop () const { return (_loop_flags&PrePostFlagsMask) == Post; } | |
202 int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; } | |
203 void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; } | |
204 | |
367
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
196
diff
changeset
|
205 int main_idx() const { return _main_idx; } |
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
196
diff
changeset
|
206 |
0 | 207 |
208 void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; } | |
209 void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; } | |
210 void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; } | |
211 void set_normal_loop( ) { _loop_flags &= ~PrePostFlagsMask; } | |
212 | |
213 void set_trip_count(int tc) { _trip_count = tc; } | |
214 int trip_count() { return _trip_count; } | |
215 | |
216 void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; } | |
217 float profile_trip_cnt() { return _profile_trip_cnt; } | |
218 | |
219 void double_unrolled_count() { _unrolled_count_log2++; } | |
220 int unrolled_count() { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); } | |
221 | |
222 void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; } | |
223 int node_count_before_unroll() { return _node_count_before_unroll; } | |
224 | |
225 #ifndef PRODUCT | |
226 virtual void dump_spec(outputStream *st) const; | |
227 #endif | |
228 }; | |
229 | |
230 //------------------------------CountedLoopEndNode----------------------------- | |
231 // CountedLoopEndNodes end simple trip counted loops. They act much like | |
232 // IfNodes. | |
233 class CountedLoopEndNode : public IfNode { | |
234 public: | |
235 enum { TestControl, TestValue }; | |
236 | |
237 CountedLoopEndNode( Node *control, Node *test, float prob, float cnt ) | |
238 : IfNode( control, test, prob, cnt) { | |
239 init_class_id(Class_CountedLoopEnd); | |
240 } | |
241 virtual int Opcode() const; | |
242 | |
243 Node *cmp_node() const { return (in(TestValue)->req() >=2) ? in(TestValue)->in(1) : NULL; } | |
244 Node *incr() const { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; } | |
245 Node *limit() const { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; } | |
246 Node *stride() const { Node *tmp = incr (); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; } | |
247 Node *phi() const { Node *tmp = incr (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; } | |
248 Node *init_trip() const { Node *tmp = phi (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; } | |
249 int stride_con() const; | |
250 bool stride_is_con() const { Node *tmp = stride (); return (tmp != NULL && tmp->is_Con()); } | |
251 BoolTest::mask test_trip() const { return in(TestValue)->as_Bool()->_test._test; } | |
252 CountedLoopNode *loopnode() const { | |
253 Node *ln = phi()->in(0); | |
254 assert( ln->Opcode() == Op_CountedLoop, "malformed loop" ); | |
255 return (CountedLoopNode*)ln; } | |
256 | |
257 #ifndef PRODUCT | |
258 virtual void dump_spec(outputStream *st) const; | |
259 #endif | |
260 }; | |
261 | |
262 | |
263 inline CountedLoopEndNode *CountedLoopNode::loopexit() const { | |
264 Node *bc = back_control(); | |
265 if( bc == NULL ) return NULL; | |
266 Node *le = bc->in(0); | |
267 if( le->Opcode() != Op_CountedLoopEnd ) | |
268 return NULL; | |
269 return (CountedLoopEndNode*)le; | |
270 } | |
271 inline Node *CountedLoopNode::init_trip() const { return loopexit() ? loopexit()->init_trip() : NULL; } | |
272 inline Node *CountedLoopNode::stride() const { return loopexit() ? loopexit()->stride() : NULL; } | |
273 inline int CountedLoopNode::stride_con() const { return loopexit() ? loopexit()->stride_con() : 0; } | |
274 inline bool CountedLoopNode::stride_is_con() const { return loopexit() && loopexit()->stride_is_con(); } | |
275 inline Node *CountedLoopNode::limit() const { return loopexit() ? loopexit()->limit() : NULL; } | |
276 inline Node *CountedLoopNode::incr() const { return loopexit() ? loopexit()->incr() : NULL; } | |
277 inline Node *CountedLoopNode::phi() const { return loopexit() ? loopexit()->phi() : NULL; } | |
278 | |
279 | |
280 // -----------------------------IdealLoopTree---------------------------------- | |
281 class IdealLoopTree : public ResourceObj { | |
282 public: | |
283 IdealLoopTree *_parent; // Parent in loop tree | |
284 IdealLoopTree *_next; // Next sibling in loop tree | |
285 IdealLoopTree *_child; // First child in loop tree | |
286 | |
287 // The head-tail backedge defines the loop. | |
288 // If tail is NULL then this loop has multiple backedges as part of the | |
289 // same loop. During cleanup I'll peel off the multiple backedges; merge | |
290 // them at the loop bottom and flow 1 real backedge into the loop. | |
291 Node *_head; // Head of loop | |
292 Node *_tail; // Tail of loop | |
293 inline Node *tail(); // Handle lazy update of _tail field | |
294 PhaseIdealLoop* _phase; | |
295 | |
296 Node_List _body; // Loop body for inner loops | |
297 | |
298 uint8 _nest; // Nesting depth | |
299 uint8 _irreducible:1, // True if irreducible | |
300 _has_call:1, // True if has call safepoint | |
301 _has_sfpt:1, // True if has non-call safepoint | |
302 _rce_candidate:1; // True if candidate for range check elimination | |
303 | |
39
76256d272075
6667612: (Escape Analysis) disable loop cloning if it has a scalar replaceable allocation
kvn
parents:
17
diff
changeset
|
304 Node_List* _required_safept; // A inner loop cannot delete these safepts; |
76256d272075
6667612: (Escape Analysis) disable loop cloning if it has a scalar replaceable allocation
kvn
parents:
17
diff
changeset
|
305 bool _allow_optimizations; // Allow loop optimizations |
0 | 306 |
307 IdealLoopTree( PhaseIdealLoop* phase, Node *head, Node *tail ) | |
308 : _parent(0), _next(0), _child(0), | |
309 _head(head), _tail(tail), | |
310 _phase(phase), | |
311 _required_safept(NULL), | |
39
76256d272075
6667612: (Escape Analysis) disable loop cloning if it has a scalar replaceable allocation
kvn
parents:
17
diff
changeset
|
312 _allow_optimizations(true), |
0 | 313 _nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0) |
314 { } | |
315 | |
316 // Is 'l' a member of 'this'? | |
317 int is_member( const IdealLoopTree *l ) const; // Test for nested membership | |
318 | |
319 // Set loop nesting depth. Accumulate has_call bits. | |
320 int set_nest( uint depth ); | |
321 | |
322 // Split out multiple fall-in edges from the loop header. Move them to a | |
323 // private RegionNode before the loop. This becomes the loop landing pad. | |
324 void split_fall_in( PhaseIdealLoop *phase, int fall_in_cnt ); | |
325 | |
326 // Split out the outermost loop from this shared header. | |
327 void split_outer_loop( PhaseIdealLoop *phase ); | |
328 | |
329 // Merge all the backedges from the shared header into a private Region. | |
330 // Feed that region as the one backedge to this loop. | |
331 void merge_many_backedges( PhaseIdealLoop *phase ); | |
332 | |
333 // Split shared headers and insert loop landing pads. | |
334 // Insert a LoopNode to replace the RegionNode. | |
335 // Returns TRUE if loop tree is structurally changed. | |
336 bool beautify_loops( PhaseIdealLoop *phase ); | |
337 | |
1172 | 338 // Perform optimization to use the loop predicates for null checks and range checks. |
339 // Applies to any loop level (not just the innermost one) | |
340 bool loop_predication( PhaseIdealLoop *phase); | |
341 | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
342 // Perform iteration-splitting on inner loops. Split iterations to |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
343 // avoid range checks or one-shot null checks. Returns false if the |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
344 // current round of loop opts should stop. |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
345 bool iteration_split( PhaseIdealLoop *phase, Node_List &old_new ); |
0 | 346 |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
347 // Driver for various flavors of iteration splitting. Returns false |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
348 // if the current round of loop opts should stop. |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
349 bool iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ); |
0 | 350 |
351 // Given dominators, try to find loops with calls that must always be | |
352 // executed (call dominates loop tail). These loops do not need non-call | |
353 // safepoints (ncsfpt). | |
354 void check_safepts(VectorSet &visited, Node_List &stack); | |
355 | |
356 // Allpaths backwards scan from loop tail, terminating each path at first safepoint | |
357 // encountered. | |
358 void allpaths_check_safepts(VectorSet &visited, Node_List &stack); | |
359 | |
360 // Convert to counted loops where possible | |
361 void counted_loop( PhaseIdealLoop *phase ); | |
362 | |
363 // Check for Node being a loop-breaking test | |
364 Node *is_loop_exit(Node *iff) const; | |
365 | |
366 // Returns true if ctrl is executed on every complete iteration | |
367 bool dominates_backedge(Node* ctrl); | |
368 | |
369 // Remove simplistic dead code from loop body | |
370 void DCE_loop_body(); | |
371 | |
372 // Look for loop-exit tests with my 50/50 guesses from the Parsing stage. | |
373 // Replace with a 1-in-10 exit guess. | |
374 void adjust_loop_exit_prob( PhaseIdealLoop *phase ); | |
375 | |
376 // Return TRUE or FALSE if the loop should never be RCE'd or aligned. | |
377 // Useful for unrolling loops with NO array accesses. | |
378 bool policy_peel_only( PhaseIdealLoop *phase ) const; | |
379 | |
380 // Return TRUE or FALSE if the loop should be unswitched -- clone | |
381 // loop with an invariant test | |
382 bool policy_unswitching( PhaseIdealLoop *phase ) const; | |
383 | |
384 // Micro-benchmark spamming. Remove empty loops. | |
385 bool policy_do_remove_empty_loop( PhaseIdealLoop *phase ); | |
386 | |
387 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can | |
388 // make some loop-invariant test (usually a null-check) happen before the | |
389 // loop. | |
390 bool policy_peeling( PhaseIdealLoop *phase ) const; | |
391 | |
392 // Return TRUE or FALSE if the loop should be maximally unrolled. Stash any | |
393 // known trip count in the counted loop node. | |
394 bool policy_maximally_unroll( PhaseIdealLoop *phase ) const; | |
395 | |
396 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if | |
397 // the loop is a CountedLoop and the body is small enough. | |
398 bool policy_unroll( PhaseIdealLoop *phase ) const; | |
399 | |
400 // Return TRUE or FALSE if the loop should be range-check-eliminated. | |
401 // Gather a list of IF tests that are dominated by iteration splitting; | |
402 // also gather the end of the first split and the start of the 2nd split. | |
403 bool policy_range_check( PhaseIdealLoop *phase ) const; | |
404 | |
405 // Return TRUE or FALSE if the loop should be cache-line aligned. | |
406 // Gather the expression that does the alignment. Note that only | |
605 | 407 // one array base can be aligned in a loop (unless the VM guarantees |
0 | 408 // mutual alignment). Note that if we vectorize short memory ops |
409 // into longer memory ops, we may want to increase alignment. | |
410 bool policy_align( PhaseIdealLoop *phase ) const; | |
411 | |
1172 | 412 // Return TRUE if "iff" is a range check. |
413 bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const; | |
414 | |
0 | 415 // Compute loop trip count from profile data |
416 void compute_profile_trip_cnt( PhaseIdealLoop *phase ); | |
417 | |
418 // Reassociate invariant expressions. | |
419 void reassociate_invariants(PhaseIdealLoop *phase); | |
420 // Reassociate invariant add and subtract expressions. | |
421 Node* reassociate_add_sub(Node* n1, PhaseIdealLoop *phase); | |
422 // Return nonzero index of invariant operand if invariant and variant | |
605 | 423 // are combined with an Add or Sub. Helper for reassociate_invariants. |
0 | 424 int is_invariant_addition(Node* n, PhaseIdealLoop *phase); |
425 | |
426 // Return true if n is invariant | |
427 bool is_invariant(Node* n) const; | |
428 | |
429 // Put loop body on igvn work list | |
430 void record_for_igvn(); | |
431 | |
432 bool is_loop() { return !_irreducible && _tail && !_tail->is_top(); } | |
433 bool is_inner() { return is_loop() && _child == NULL; } | |
434 bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); } | |
435 | |
436 #ifndef PRODUCT | |
437 void dump_head( ) const; // Dump loop head only | |
438 void dump() const; // Dump this loop recursively | |
439 void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const; | |
440 #endif | |
441 | |
442 }; | |
443 | |
444 // -----------------------------PhaseIdealLoop--------------------------------- | |
445 // Computes the mapping from Nodes to IdealLoopTrees. Organizes IdealLoopTrees into a | |
446 // loop tree. Drives the loop-based transformations on the ideal graph. | |
447 class PhaseIdealLoop : public PhaseTransform { | |
448 friend class IdealLoopTree; | |
449 friend class SuperWord; | |
450 // Pre-computed def-use info | |
451 PhaseIterGVN &_igvn; | |
452 | |
453 // Head of loop tree | |
454 IdealLoopTree *_ltree_root; | |
455 | |
456 // Array of pre-order numbers, plus post-visited bit. | |
457 // ZERO for not pre-visited. EVEN for pre-visited but not post-visited. | |
458 // ODD for post-visited. Other bits are the pre-order number. | |
459 uint *_preorders; | |
460 uint _max_preorder; | |
461 | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
462 const PhaseIdealLoop* _verify_me; |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
463 bool _verify_only; |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
464 |
0 | 465 // Allocate _preorders[] array |
466 void allocate_preorders() { | |
467 _max_preorder = C->unique()+8; | |
468 _preorders = NEW_RESOURCE_ARRAY(uint, _max_preorder); | |
469 memset(_preorders, 0, sizeof(uint) * _max_preorder); | |
470 } | |
471 | |
472 // Allocate _preorders[] array | |
473 void reallocate_preorders() { | |
474 if ( _max_preorder < C->unique() ) { | |
475 _preorders = REALLOC_RESOURCE_ARRAY(uint, _preorders, _max_preorder, C->unique()); | |
476 _max_preorder = C->unique(); | |
477 } | |
478 memset(_preorders, 0, sizeof(uint) * _max_preorder); | |
479 } | |
480 | |
481 // Check to grow _preorders[] array for the case when build_loop_tree_impl() | |
482 // adds new nodes. | |
483 void check_grow_preorders( ) { | |
484 if ( _max_preorder < C->unique() ) { | |
485 uint newsize = _max_preorder<<1; // double size of array | |
486 _preorders = REALLOC_RESOURCE_ARRAY(uint, _preorders, _max_preorder, newsize); | |
487 memset(&_preorders[_max_preorder],0,sizeof(uint)*(newsize-_max_preorder)); | |
488 _max_preorder = newsize; | |
489 } | |
490 } | |
491 // Check for pre-visited. Zero for NOT visited; non-zero for visited. | |
492 int is_visited( Node *n ) const { return _preorders[n->_idx]; } | |
493 // Pre-order numbers are written to the Nodes array as low-bit-set values. | |
494 void set_preorder_visited( Node *n, int pre_order ) { | |
495 assert( !is_visited( n ), "already set" ); | |
496 _preorders[n->_idx] = (pre_order<<1); | |
497 }; | |
498 // Return pre-order number. | |
499 int get_preorder( Node *n ) const { assert( is_visited(n), "" ); return _preorders[n->_idx]>>1; } | |
500 | |
501 // Check for being post-visited. | |
502 // Should be previsited already (checked with assert(is_visited(n))). | |
503 int is_postvisited( Node *n ) const { assert( is_visited(n), "" ); return _preorders[n->_idx]&1; } | |
504 | |
505 // Mark as post visited | |
506 void set_postvisited( Node *n ) { assert( !is_postvisited( n ), "" ); _preorders[n->_idx] |= 1; } | |
507 | |
508 // Set/get control node out. Set lower bit to distinguish from IdealLoopTree | |
509 // Returns true if "n" is a data node, false if it's a control node. | |
510 bool has_ctrl( Node *n ) const { return ((intptr_t)_nodes[n->_idx]) & 1; } | |
511 | |
512 // clear out dead code after build_loop_late | |
513 Node_List _deadlist; | |
514 | |
515 // Support for faster execution of get_late_ctrl()/dom_lca() | |
516 // when a node has many uses and dominator depth is deep. | |
517 Node_Array _dom_lca_tags; | |
518 void init_dom_lca_tags(); | |
519 void clear_dom_lca_tags(); | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
520 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
521 // Helper for debugging bad dominance relationships |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
522 bool verify_dominance(Node* n, Node* use, Node* LCA, Node* early); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
523 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
524 Node* compute_lca_of_uses(Node* n, Node* early, bool verify = false); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
525 |
0 | 526 // Inline wrapper for frequent cases: |
527 // 1) only one use | |
528 // 2) a use is the same as the current LCA passed as 'n1' | |
529 Node *dom_lca_for_get_late_ctrl( Node *lca, Node *n, Node *tag ) { | |
530 assert( n->is_CFG(), "" ); | |
531 // Fast-path NULL lca | |
532 if( lca != NULL && lca != n ) { | |
533 assert( lca->is_CFG(), "" ); | |
534 // find LCA of all uses | |
535 n = dom_lca_for_get_late_ctrl_internal( lca, n, tag ); | |
536 } | |
537 return find_non_split_ctrl(n); | |
538 } | |
539 Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag ); | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
540 |
0 | 541 // Helper function for directing control inputs away from CFG split |
542 // points. | |
543 Node *find_non_split_ctrl( Node *ctrl ) const { | |
544 if (ctrl != NULL) { | |
545 if (ctrl->is_MultiBranch()) { | |
546 ctrl = ctrl->in(0); | |
547 } | |
548 assert(ctrl->is_CFG(), "CFG"); | |
549 } | |
550 return ctrl; | |
551 } | |
552 | |
553 public: | |
554 bool has_node( Node* n ) const { return _nodes[n->_idx] != NULL; } | |
555 // check if transform created new nodes that need _ctrl recorded | |
556 Node *get_late_ctrl( Node *n, Node *early ); | |
557 Node *get_early_ctrl( Node *n ); | |
558 void set_early_ctrl( Node *n ); | |
559 void set_subtree_ctrl( Node *root ); | |
560 void set_ctrl( Node *n, Node *ctrl ) { | |
561 assert( !has_node(n) || has_ctrl(n), "" ); | |
562 assert( ctrl->in(0), "cannot set dead control node" ); | |
563 assert( ctrl == find_non_split_ctrl(ctrl), "must set legal crtl" ); | |
564 _nodes.map( n->_idx, (Node*)((intptr_t)ctrl + 1) ); | |
565 } | |
566 // Set control and update loop membership | |
567 void set_ctrl_and_loop(Node* n, Node* ctrl) { | |
568 IdealLoopTree* old_loop = get_loop(get_ctrl(n)); | |
569 IdealLoopTree* new_loop = get_loop(ctrl); | |
570 if (old_loop != new_loop) { | |
571 if (old_loop->_child == NULL) old_loop->_body.yank(n); | |
572 if (new_loop->_child == NULL) new_loop->_body.push(n); | |
573 } | |
574 set_ctrl(n, ctrl); | |
575 } | |
576 // Control nodes can be replaced or subsumed. During this pass they | |
577 // get their replacement Node in slot 1. Instead of updating the block | |
578 // location of all Nodes in the subsumed block, we lazily do it. As we | |
579 // pull such a subsumed block out of the array, we write back the final | |
580 // correct block. | |
581 Node *get_ctrl( Node *i ) { | |
582 assert(has_node(i), ""); | |
583 Node *n = get_ctrl_no_update(i); | |
584 _nodes.map( i->_idx, (Node*)((intptr_t)n + 1) ); | |
585 assert(has_node(i) && has_ctrl(i), ""); | |
586 assert(n == find_non_split_ctrl(n), "must return legal ctrl" ); | |
587 return n; | |
588 } | |
1172 | 589 // true if CFG node d dominates CFG node n |
590 bool is_dominator(Node *d, Node *n); | |
591 // return get_ctrl for a data node and self(n) for a CFG node | |
592 Node* ctrl_or_self(Node* n) { | |
593 if (has_ctrl(n)) | |
594 return get_ctrl(n); | |
595 else { | |
596 assert (n->is_CFG(), "must be a CFG node"); | |
597 return n; | |
598 } | |
599 } | |
0 | 600 |
601 private: | |
602 Node *get_ctrl_no_update( Node *i ) const { | |
603 assert( has_ctrl(i), "" ); | |
604 Node *n = (Node*)(((intptr_t)_nodes[i->_idx]) & ~1); | |
605 if (!n->in(0)) { | |
606 // Skip dead CFG nodes | |
607 do { | |
608 n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1); | |
609 } while (!n->in(0)); | |
610 n = find_non_split_ctrl(n); | |
611 } | |
612 return n; | |
613 } | |
614 | |
615 // Check for loop being set | |
616 // "n" must be a control node. Returns true if "n" is known to be in a loop. | |
617 bool has_loop( Node *n ) const { | |
618 assert(!has_node(n) || !has_ctrl(n), ""); | |
619 return has_node(n); | |
620 } | |
621 // Set loop | |
622 void set_loop( Node *n, IdealLoopTree *loop ) { | |
623 _nodes.map(n->_idx, (Node*)loop); | |
624 } | |
625 // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms. Replace | |
626 // the 'old_node' with 'new_node'. Kill old-node. Add a reference | |
627 // from old_node to new_node to support the lazy update. Reference | |
1172 | 628 // replaces loop reference, since that is not needed for dead node. |
0 | 629 public: |
630 void lazy_update( Node *old_node, Node *new_node ) { | |
631 assert( old_node != new_node, "no cycles please" ); | |
632 //old_node->set_req( 1, new_node /*NO DU INFO*/ ); | |
633 // Nodes always have DU info now, so re-use the side array slot | |
634 // for this node to provide the forwarding pointer. | |
635 _nodes.map( old_node->_idx, (Node*)((intptr_t)new_node + 1) ); | |
636 } | |
637 void lazy_replace( Node *old_node, Node *new_node ) { | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
638 _igvn.replace_node( old_node, new_node ); |
0 | 639 lazy_update( old_node, new_node ); |
640 } | |
641 void lazy_replace_proj( Node *old_node, Node *new_node ) { | |
642 assert( old_node->req() == 1, "use this for Projs" ); | |
643 _igvn.hash_delete(old_node); // Must hash-delete before hacking edges | |
644 old_node->add_req( NULL ); | |
645 lazy_replace( old_node, new_node ); | |
646 } | |
647 | |
648 private: | |
649 | |
650 // Place 'n' in some loop nest, where 'n' is a CFG node | |
651 void build_loop_tree(); | |
652 int build_loop_tree_impl( Node *n, int pre_order ); | |
653 // Insert loop into the existing loop tree. 'innermost' is a leaf of the | |
654 // loop tree, not the root. | |
655 IdealLoopTree *sort( IdealLoopTree *loop, IdealLoopTree *innermost ); | |
656 | |
657 // Place Data nodes in some loop nest | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
658 void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
659 void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
660 void build_loop_late_post ( Node* n ); |
0 | 661 |
662 // Array of immediate dominance info for each CFG node indexed by node idx | |
663 private: | |
664 uint _idom_size; | |
665 Node **_idom; // Array of immediate dominators | |
666 uint *_dom_depth; // Used for fast LCA test | |
667 GrowableArray<uint>* _dom_stk; // For recomputation of dom depth | |
668 | |
669 Node* idom_no_update(Node* d) const { | |
670 assert(d->_idx < _idom_size, "oob"); | |
671 Node* n = _idom[d->_idx]; | |
672 assert(n != NULL,"Bad immediate dominator info."); | |
673 while (n->in(0) == NULL) { // Skip dead CFG nodes | |
674 //n = n->in(1); | |
675 n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1); | |
676 assert(n != NULL,"Bad immediate dominator info."); | |
677 } | |
678 return n; | |
679 } | |
680 Node *idom(Node* d) const { | |
681 uint didx = d->_idx; | |
682 Node *n = idom_no_update(d); | |
683 _idom[didx] = n; // Lazily remove dead CFG nodes from table. | |
684 return n; | |
685 } | |
686 uint dom_depth(Node* d) const { | |
687 assert(d->_idx < _idom_size, ""); | |
688 return _dom_depth[d->_idx]; | |
689 } | |
690 void set_idom(Node* d, Node* n, uint dom_depth); | |
691 // Locally compute IDOM using dom_lca call | |
692 Node *compute_idom( Node *region ) const; | |
693 // Recompute dom_depth | |
694 void recompute_dom_depth(); | |
695 | |
696 // Is safept not required by an outer loop? | |
697 bool is_deleteable_safept(Node* sfpt); | |
698 | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
699 // Perform verification that the graph is valid. |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
700 PhaseIdealLoop( PhaseIterGVN &igvn) : |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
701 PhaseTransform(Ideal_Loop), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
702 _igvn(igvn), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
703 _dom_lca_tags(C->comp_arena()), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
704 _verify_me(NULL), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
705 _verify_only(true) { |
1172 | 706 build_and_optimize(false, false); |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
707 } |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
708 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
709 // build the loop tree and perform any requested optimizations |
1172 | 710 void build_and_optimize(bool do_split_if, bool do_loop_pred); |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
711 |
0 | 712 public: |
713 // Dominators for the sea of nodes | |
714 void Dominators(); | |
715 Node *dom_lca( Node *n1, Node *n2 ) const { | |
716 return find_non_split_ctrl(dom_lca_internal(n1, n2)); | |
717 } | |
718 Node *dom_lca_internal( Node *n1, Node *n2 ) const; | |
719 | |
720 // Compute the Ideal Node to Loop mapping | |
1172 | 721 PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs, bool do_loop_pred) : |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
722 PhaseTransform(Ideal_Loop), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
723 _igvn(igvn), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
724 _dom_lca_tags(C->comp_arena()), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
725 _verify_me(NULL), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
726 _verify_only(false) { |
1172 | 727 build_and_optimize(do_split_ifs, do_loop_pred); |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
728 } |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
729 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
730 // Verify that verify_me made the same decisions as a fresh run. |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
731 PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me) : |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
732 PhaseTransform(Ideal_Loop), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
733 _igvn(igvn), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
734 _dom_lca_tags(C->comp_arena()), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
735 _verify_me(verify_me), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
736 _verify_only(false) { |
1172 | 737 build_and_optimize(false, false); |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
738 } |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
739 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
740 // Build and verify the loop tree without modifying the graph. This |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
741 // is useful to verify that all inputs properly dominate their uses. |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
742 static void verify(PhaseIterGVN& igvn) { |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
743 #ifdef ASSERT |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
744 PhaseIdealLoop v(igvn); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
745 #endif |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
746 } |
0 | 747 |
748 // True if the method has at least 1 irreducible loop | |
749 bool _has_irreducible_loops; | |
750 | |
751 // Per-Node transform | |
752 virtual Node *transform( Node *a_node ) { return 0; } | |
753 | |
754 Node *is_counted_loop( Node *x, IdealLoopTree *loop ); | |
755 | |
756 // Return a post-walked LoopNode | |
757 IdealLoopTree *get_loop( Node *n ) const { | |
758 // Dead nodes have no loop, so return the top level loop instead | |
759 if (!has_node(n)) return _ltree_root; | |
760 assert(!has_ctrl(n), ""); | |
761 return (IdealLoopTree*)_nodes[n->_idx]; | |
762 } | |
763 | |
764 // Is 'n' a (nested) member of 'loop'? | |
765 int is_member( const IdealLoopTree *loop, Node *n ) const { | |
766 return loop->is_member(get_loop(n)); } | |
767 | |
768 // This is the basic building block of the loop optimizations. It clones an | |
769 // entire loop body. It makes an old_new loop body mapping; with this | |
770 // mapping you can find the new-loop equivalent to an old-loop node. All | |
771 // new-loop nodes are exactly equal to their old-loop counterparts, all | |
772 // edges are the same. All exits from the old-loop now have a RegionNode | |
773 // that merges the equivalent new-loop path. This is true even for the | |
774 // normal "loop-exit" condition. All uses of loop-invariant old-loop values | |
775 // now come from (one or more) Phis that merge their new-loop equivalents. | |
776 // Parameter side_by_side_idom: | |
777 // When side_by_size_idom is NULL, the dominator tree is constructed for | |
778 // the clone loop to dominate the original. Used in construction of | |
779 // pre-main-post loop sequence. | |
780 // When nonnull, the clone and original are side-by-side, both are | |
781 // dominated by the passed in side_by_side_idom node. Used in | |
782 // construction of unswitched loops. | |
783 void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth, | |
784 Node* side_by_side_idom = NULL); | |
785 | |
786 // If we got the effect of peeling, either by actually peeling or by | |
787 // making a pre-loop which must execute at least once, we can remove | |
788 // all loop-invariant dominated tests in the main body. | |
789 void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new ); | |
790 | |
791 // Generate code to do a loop peel for the given loop (and body). | |
792 // old_new is a temp array. | |
793 void do_peeling( IdealLoopTree *loop, Node_List &old_new ); | |
794 | |
795 // Add pre and post loops around the given loop. These loops are used | |
796 // during RCE, unrolling and aligning loops. | |
797 void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ); | |
798 // If Node n lives in the back_ctrl block, we clone a private version of n | |
799 // in preheader_ctrl block and return that, otherwise return n. | |
800 Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n ); | |
801 | |
802 // Take steps to maximally unroll the loop. Peel any odd iterations, then | |
803 // unroll to do double iterations. The next round of major loop transforms | |
804 // will repeat till the doubled loop body does all remaining iterations in 1 | |
805 // pass. | |
806 void do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ); | |
807 | |
808 // Unroll the loop body one step - make each trip do 2 iterations. | |
809 void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ); | |
810 | |
811 // Return true if exp is a constant times an induction var | |
812 bool is_scaled_iv(Node* exp, Node* iv, int* p_scale); | |
813 | |
814 // Return true if exp is a scaled induction var plus (or minus) constant | |
815 bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0); | |
816 | |
1172 | 817 // Return true if proj is for "proj->[region->..]call_uct" |
818 bool is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate = false); | |
819 // Return true for "if(test)-> proj -> ... | |
820 // | | |
821 // V | |
822 // other_proj->[region->..]call_uct" | |
823 bool is_uncommon_trap_if_pattern(ProjNode* proj, bool must_reason_predicate = false); | |
824 // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted | |
825 ProjNode* create_new_if_for_predicate(ProjNode* cont_proj); | |
826 // Find a good location to insert a predicate | |
827 ProjNode* find_predicate_insertion_point(Node* start_c); | |
828 // Construct a range check for a predicate if | |
829 BoolNode* rc_predicate(Node* ctrl, | |
830 int scale, Node* offset, | |
831 Node* init, Node* limit, Node* stride, | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1172
diff
changeset
|
832 Node* range, bool upper); |
1172 | 833 |
834 // Implementation of the loop predication to promote checks outside the loop | |
835 bool loop_predication_impl(IdealLoopTree *loop); | |
836 | |
837 // Helper function to collect predicate for eliminating the useless ones | |
838 void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1); | |
839 void eliminate_useless_predicates(); | |
840 | |
0 | 841 // Eliminate range-checks and other trip-counter vs loop-invariant tests. |
842 void do_range_check( IdealLoopTree *loop, Node_List &old_new ); | |
843 | |
844 // Create a slow version of the loop by cloning the loop | |
845 // and inserting an if to select fast-slow versions. | |
846 ProjNode* create_slow_version_of_loop(IdealLoopTree *loop, | |
847 Node_List &old_new); | |
848 | |
849 // Clone loop with an invariant test (that does not exit) and | |
850 // insert a clone of the test that selects which version to | |
851 // execute. | |
852 void do_unswitching (IdealLoopTree *loop, Node_List &old_new); | |
853 | |
854 // Find candidate "if" for unswitching | |
855 IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const; | |
856 | |
857 // Range Check Elimination uses this function! | |
858 // Constrain the main loop iterations so the affine function: | |
859 // scale_con * I + offset < limit | |
860 // always holds true. That is, either increase the number of iterations in | |
861 // the pre-loop or the post-loop until the condition holds true in the main | |
862 // loop. Scale_con, offset and limit are all loop invariant. | |
863 void add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ); | |
864 | |
865 // Partially peel loop up through last_peel node. | |
866 bool partial_peel( IdealLoopTree *loop, Node_List &old_new ); | |
867 | |
868 // Create a scheduled list of nodes control dependent on ctrl set. | |
869 void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched ); | |
870 // Has a use in the vector set | |
871 bool has_use_in_set( Node* n, VectorSet& vset ); | |
872 // Has use internal to the vector set (ie. not in a phi at the loop head) | |
873 bool has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ); | |
874 // clone "n" for uses that are outside of loop | |
875 void clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ); | |
876 // clone "n" for special uses that are in the not_peeled region | |
877 void clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n, | |
878 VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ); | |
879 // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist | |
880 void insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ); | |
881 #ifdef ASSERT | |
882 // Validate the loop partition sets: peel and not_peel | |
883 bool is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list, VectorSet& not_peel ); | |
884 // Ensure that uses outside of loop are of the right form | |
885 bool is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list, | |
886 uint orig_exit_idx, uint clone_exit_idx); | |
887 bool is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx); | |
888 #endif | |
889 | |
890 // Returns nonzero constant stride if-node is a possible iv test (otherwise returns zero.) | |
891 int stride_of_possible_iv( Node* iff ); | |
892 bool is_possible_iv_test( Node* iff ) { return stride_of_possible_iv(iff) != 0; } | |
893 // Return the (unique) control output node that's in the loop (if it exists.) | |
894 Node* stay_in_loop( Node* n, IdealLoopTree *loop); | |
895 // Insert a signed compare loop exit cloned from an unsigned compare. | |
896 IfNode* insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop); | |
897 void remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop); | |
898 // Utility to register node "n" with PhaseIdealLoop | |
899 void register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth); | |
900 // Utility to create an if-projection | |
901 ProjNode* proj_clone(ProjNode* p, IfNode* iff); | |
902 // Force the iff control output to be the live_proj | |
903 Node* short_circuit_if(IfNode* iff, ProjNode* live_proj); | |
904 // Insert a region before an if projection | |
905 RegionNode* insert_region_before_proj(ProjNode* proj); | |
906 // Insert a new if before an if projection | |
907 ProjNode* insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj); | |
908 | |
909 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps. | |
910 // "Nearly" because all Nodes have been cloned from the original in the loop, | |
911 // but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs | |
912 // through the Phi recursively, and return a Bool. | |
913 BoolNode *clone_iff( PhiNode *phi, IdealLoopTree *loop ); | |
914 CmpNode *clone_bool( PhiNode *phi, IdealLoopTree *loop ); | |
915 | |
916 | |
917 // Rework addressing expressions to get the most loop-invariant stuff | |
918 // moved out. We'd like to do all associative operators, but it's especially | |
919 // important (common) to do address expressions. | |
920 Node *remix_address_expressions( Node *n ); | |
921 | |
922 // Attempt to use a conditional move instead of a phi/branch | |
923 Node *conditional_move( Node *n ); | |
924 | |
925 // Reorganize offset computations to lower register pressure. | |
926 // Mostly prevent loop-fallout uses of the pre-incremented trip counter | |
927 // (which are then alive with the post-incremented trip counter | |
928 // forcing an extra register move) | |
929 void reorg_offsets( IdealLoopTree *loop ); | |
930 | |
931 // Check for aggressive application of 'split-if' optimization, | |
932 // using basic block level info. | |
933 void split_if_with_blocks ( VectorSet &visited, Node_Stack &nstack ); | |
934 Node *split_if_with_blocks_pre ( Node *n ); | |
935 void split_if_with_blocks_post( Node *n ); | |
936 Node *has_local_phi_input( Node *n ); | |
937 // Mark an IfNode as being dominated by a prior test, | |
938 // without actually altering the CFG (and hence IDOM info). | |
939 void dominated_by( Node *prevdom, Node *iff ); | |
940 | |
941 // Split Node 'n' through merge point | |
942 Node *split_thru_region( Node *n, Node *region ); | |
943 // Split Node 'n' through merge point if there is enough win. | |
944 Node *split_thru_phi( Node *n, Node *region, int policy ); | |
945 // Found an If getting its condition-code input from a Phi in the | |
946 // same block. Split thru the Region. | |
947 void do_split_if( Node *iff ); | |
948 | |
1763 | 949 // Conversion of fill/copy patterns into intrisic versions |
950 bool do_intrinsify_fill(); | |
951 bool intrinsify_fill(IdealLoopTree* lpt); | |
952 bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value, | |
953 Node*& shift, Node*& offset); | |
954 | |
0 | 955 private: |
956 // Return a type based on condition control flow | |
957 const TypeInt* filtered_type( Node *n, Node* n_ctrl); | |
958 const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); } | |
959 // Helpers for filtered type | |
960 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl); | |
961 | |
962 // Helper functions | |
963 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache ); | |
964 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ); | |
965 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ); | |
966 bool split_up( Node *n, Node *blk1, Node *blk2 ); | |
967 void sink_use( Node *use, Node *post_loop ); | |
968 Node *place_near_use( Node *useblock ) const; | |
969 | |
970 bool _created_loop_node; | |
971 public: | |
972 void set_created_loop_node() { _created_loop_node = true; } | |
973 bool created_loop_node() { return _created_loop_node; } | |
1172 | 974 void register_new_node( Node *n, Node *blk ); |
0 | 975 |
976 #ifndef PRODUCT | |
977 void dump( ) const; | |
978 void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const; | |
979 void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const; | |
980 void verify() const; // Major slow :-) | |
981 void verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const; | |
982 IdealLoopTree *get_loop_idx(Node* n) const { | |
983 // Dead nodes have no loop, so return the top level loop instead | |
984 return _nodes[n->_idx] ? (IdealLoopTree*)_nodes[n->_idx] : _ltree_root; | |
985 } | |
986 // Print some stats | |
987 static void print_statistics(); | |
988 static int _loop_invokes; // Count of PhaseIdealLoop invokes | |
989 static int _loop_work; // Sum of PhaseIdealLoop x _unique | |
990 #endif | |
991 }; | |
992 | |
993 inline Node* IdealLoopTree::tail() { | |
994 // Handle lazy update of _tail field | |
995 Node *n = _tail; | |
996 //while( !n->in(0) ) // Skip dead CFG nodes | |
997 //n = n->in(1); | |
998 if (n->in(0) == NULL) | |
999 n = _phase->get_ctrl(n); | |
1000 _tail = n; | |
1001 return n; | |
1002 } | |
1003 | |
1004 | |
1005 // Iterate over the loop tree using a preorder, left-to-right traversal. | |
1006 // | |
1007 // Example that visits all counted loops from within PhaseIdealLoop | |
1008 // | |
1009 // for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { | |
1010 // IdealLoopTree* lpt = iter.current(); | |
1011 // if (!lpt->is_counted()) continue; | |
1012 // ... | |
1013 class LoopTreeIterator : public StackObj { | |
1014 private: | |
1015 IdealLoopTree* _root; | |
1016 IdealLoopTree* _curnt; | |
1017 | |
1018 public: | |
1019 LoopTreeIterator(IdealLoopTree* root) : _root(root), _curnt(root) {} | |
1020 | |
1021 bool done() { return _curnt == NULL; } // Finished iterating? | |
1022 | |
1023 void next(); // Advance to next loop tree | |
1024 | |
1025 IdealLoopTree* current() { return _curnt; } // Return current value of iterator. | |
1026 }; | |
1972 | 1027 |
1028 #endif // SHARE_VM_OPTO_LOOPNODE_HPP |