Mercurial > hg > graal-compiler
annotate src/share/vm/opto/loopnode.hpp @ 3992:d1bdeef3e3e2
7098282: G1: assert(interval >= 0) failed: Sanity check, referencePolicy.cpp: 76
Summary: There is a race between one thread successfully forwarding and copying the klass mirror for the SoftReference class (including the static master clock) and another thread attempting to use the master clock while attempting to discover a soft reference object. Maintain a shadow copy of the soft reference master clock and use the shadow during reference discovery and reference processing.
Reviewed-by: tonyp, brutisso, ysr
author | johnc |
---|---|
date | Wed, 12 Oct 2011 10:25:51 -0700 |
parents | c96c3eb1efae |
children | 670a74b863fc |
rev | line source |
---|---|
0 | 1 /* |
2248
194c9fdee631
7017240: C2: native memory leak in nsk/regression/b4675027 on windows-x86 in comp mode with G1
kvn
parents:
1972
diff
changeset
|
2 * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1303
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #ifndef SHARE_VM_OPTO_LOOPNODE_HPP |
26 #define SHARE_VM_OPTO_LOOPNODE_HPP | |
27 | |
28 #include "opto/cfgnode.hpp" | |
29 #include "opto/multnode.hpp" | |
30 #include "opto/phaseX.hpp" | |
31 #include "opto/subnode.hpp" | |
32 #include "opto/type.hpp" | |
33 | |
0 | 34 class CmpNode; |
35 class CountedLoopEndNode; | |
36 class CountedLoopNode; | |
37 class IdealLoopTree; | |
38 class LoopNode; | |
39 class Node; | |
40 class PhaseIdealLoop; | |
41 class VectorSet; | |
1172 | 42 class Invariance; |
0 | 43 struct small_cache; |
44 | |
45 // | |
46 // I D E A L I Z E D L O O P S | |
47 // | |
48 // Idealized loops are the set of loops I perform more interesting | |
49 // transformations on, beyond simple hoisting. | |
50 | |
51 //------------------------------LoopNode--------------------------------------- | |
52 // Simple loop header. Fall in path on left, loop-back path on right. | |
53 class LoopNode : public RegionNode { | |
54 // Size is bigger to hold the flags. However, the flags do not change | |
55 // the semantics so it does not appear in the hash & cmp functions. | |
56 virtual uint size_of() const { return sizeof(*this); } | |
57 protected: | |
58 short _loop_flags; | |
59 // Names for flag bitfields | |
2465 | 60 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3, |
61 MainHasNoPreLoop=4, | |
62 HasExactTripCount=8, | |
63 InnerLoop=16, | |
64 PartialPeelLoop=32, | |
65 PartialPeelFailed=64 }; | |
0 | 66 char _unswitch_count; |
67 enum { _unswitch_max=3 }; | |
68 | |
69 public: | |
70 // Names for edge indices | |
71 enum { Self=0, EntryControl, LoopBackControl }; | |
72 | |
2465 | 73 int is_inner_loop() const { return _loop_flags & InnerLoop; } |
74 void set_inner_loop() { _loop_flags |= InnerLoop; } | |
0 | 75 |
2465 | 76 int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; } |
77 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; } | |
78 int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; } | |
79 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; } | |
0 | 80 |
81 int unswitch_max() { return _unswitch_max; } | |
82 int unswitch_count() { return _unswitch_count; } | |
83 void set_unswitch_count(int val) { | |
84 assert (val <= unswitch_max(), "too many unswitches"); | |
85 _unswitch_count = val; | |
86 } | |
87 | |
88 LoopNode( Node *entry, Node *backedge ) : RegionNode(3), _loop_flags(0), _unswitch_count(0) { | |
89 init_class_id(Class_Loop); | |
90 init_req(EntryControl, entry); | |
91 init_req(LoopBackControl, backedge); | |
92 } | |
93 | |
94 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); | |
95 virtual int Opcode() const; | |
96 bool can_be_counted_loop(PhaseTransform* phase) const { | |
97 return req() == 3 && in(0) != NULL && | |
98 in(1) != NULL && phase->type(in(1)) != Type::TOP && | |
99 in(2) != NULL && phase->type(in(2)) != Type::TOP; | |
100 } | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
101 bool is_valid_counted_loop() const; |
0 | 102 #ifndef PRODUCT |
103 virtual void dump_spec(outputStream *st) const; | |
104 #endif | |
105 }; | |
106 | |
107 //------------------------------Counted Loops---------------------------------- | |
108 // Counted loops are all trip-counted loops, with exactly 1 trip-counter exit | |
109 // path (and maybe some other exit paths). The trip-counter exit is always | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
110 // last in the loop. The trip-counter have to stride by a constant; |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
111 // the exit value is also loop invariant. |
0 | 112 |
113 // CountedLoopNodes and CountedLoopEndNodes come in matched pairs. The | |
114 // CountedLoopNode has the incoming loop control and the loop-back-control | |
115 // which is always the IfTrue before the matching CountedLoopEndNode. The | |
116 // CountedLoopEndNode has an incoming control (possibly not the | |
117 // CountedLoopNode if there is control flow in the loop), the post-increment | |
118 // trip-counter value, and the limit. The trip-counter value is always of | |
119 // the form (Op old-trip-counter stride). The old-trip-counter is produced | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
120 // by a Phi connected to the CountedLoopNode. The stride is constant. |
0 | 121 // The Op is any commutable opcode, including Add, Mul, Xor. The |
122 // CountedLoopEndNode also takes in the loop-invariant limit value. | |
123 | |
124 // From a CountedLoopNode I can reach the matching CountedLoopEndNode via the | |
125 // loop-back control. From CountedLoopEndNodes I can reach CountedLoopNodes | |
126 // via the old-trip-counter from the Op node. | |
127 | |
128 //------------------------------CountedLoopNode-------------------------------- | |
129 // CountedLoopNodes head simple counted loops. CountedLoopNodes have as | |
130 // inputs the incoming loop-start control and the loop-back control, so they | |
131 // act like RegionNodes. They also take in the initial trip counter, the | |
132 // loop-invariant stride and the loop-invariant limit value. CountedLoopNodes | |
133 // produce a loop-body control and the trip counter value. Since | |
134 // CountedLoopNodes behave like RegionNodes I still have a standard CFG model. | |
135 | |
136 class CountedLoopNode : public LoopNode { | |
137 // Size is bigger to hold _main_idx. However, _main_idx does not change | |
138 // the semantics so it does not appear in the hash & cmp functions. | |
139 virtual uint size_of() const { return sizeof(*this); } | |
140 | |
141 // For Pre- and Post-loops during debugging ONLY, this holds the index of | |
142 // the Main CountedLoop. Used to assert that we understand the graph shape. | |
143 node_idx_t _main_idx; | |
144 | |
2465 | 145 // Known trip count calculated by compute_exact_trip_count() |
146 uint _trip_count; | |
0 | 147 |
148 // Expected trip count from profile data | |
149 float _profile_trip_cnt; | |
150 | |
151 // Log2 of original loop bodies in unrolled loop | |
152 int _unrolled_count_log2; | |
153 | |
154 // Node count prior to last unrolling - used to decide if | |
155 // unroll,optimize,unroll,optimize,... is making progress | |
156 int _node_count_before_unroll; | |
157 | |
158 public: | |
159 CountedLoopNode( Node *entry, Node *backedge ) | |
2465 | 160 : LoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint), |
0 | 161 _profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0), |
162 _node_count_before_unroll(0) { | |
163 init_class_id(Class_CountedLoop); | |
164 // Initialize _trip_count to the largest possible value. | |
165 // Will be reset (lower) if the loop's trip count is known. | |
166 } | |
167 | |
168 virtual int Opcode() const; | |
169 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); | |
170 | |
171 Node *init_control() const { return in(EntryControl); } | |
172 Node *back_control() const { return in(LoopBackControl); } | |
173 CountedLoopEndNode *loopexit() const; | |
174 Node *init_trip() const; | |
175 Node *stride() const; | |
176 int stride_con() const; | |
177 bool stride_is_con() const; | |
178 Node *limit() const; | |
179 Node *incr() const; | |
180 Node *phi() const; | |
181 | |
182 // Match increment with optional truncation | |
183 static Node* match_incr_with_optional_truncation(Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type); | |
184 | |
185 // A 'main' loop has a pre-loop and a post-loop. The 'main' loop | |
186 // can run short a few iterations and may start a few iterations in. | |
187 // It will be RCE'd and unrolled and aligned. | |
188 | |
189 // A following 'post' loop will run any remaining iterations. Used | |
190 // during Range Check Elimination, the 'post' loop will do any final | |
191 // iterations with full checks. Also used by Loop Unrolling, where | |
192 // the 'post' loop will do any epilog iterations needed. Basically, | |
193 // a 'post' loop can not profitably be further unrolled or RCE'd. | |
194 | |
195 // A preceding 'pre' loop will run at least 1 iteration (to do peeling), | |
196 // it may do under-flow checks for RCE and may do alignment iterations | |
197 // so the following main loop 'knows' that it is striding down cache | |
198 // lines. | |
199 | |
200 // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or | |
201 // Aligned, may be missing it's pre-loop. | |
2465 | 202 int is_normal_loop() const { return (_loop_flags&PreMainPostFlagsMask) == Normal; } |
203 int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; } | |
204 int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; } | |
205 int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; } | |
206 int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; } | |
207 void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; } | |
0 | 208 |
367
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
196
diff
changeset
|
209 int main_idx() const { return _main_idx; } |
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
196
diff
changeset
|
210 |
0 | 211 |
212 void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; } | |
213 void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; } | |
214 void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; } | |
2465 | 215 void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; } |
216 | |
217 void set_trip_count(uint tc) { _trip_count = tc; } | |
218 uint trip_count() { return _trip_count; } | |
0 | 219 |
2465 | 220 bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; } |
221 void set_exact_trip_count(uint tc) { | |
222 _trip_count = tc; | |
223 _loop_flags |= HasExactTripCount; | |
224 } | |
225 void set_nonexact_trip_count() { | |
226 _loop_flags &= ~HasExactTripCount; | |
227 } | |
0 | 228 |
229 void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; } | |
230 float profile_trip_cnt() { return _profile_trip_cnt; } | |
231 | |
232 void double_unrolled_count() { _unrolled_count_log2++; } | |
233 int unrolled_count() { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); } | |
234 | |
235 void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; } | |
236 int node_count_before_unroll() { return _node_count_before_unroll; } | |
237 | |
238 #ifndef PRODUCT | |
239 virtual void dump_spec(outputStream *st) const; | |
240 #endif | |
241 }; | |
242 | |
243 //------------------------------CountedLoopEndNode----------------------------- | |
244 // CountedLoopEndNodes end simple trip counted loops. They act much like | |
245 // IfNodes. | |
246 class CountedLoopEndNode : public IfNode { | |
247 public: | |
248 enum { TestControl, TestValue }; | |
249 | |
250 CountedLoopEndNode( Node *control, Node *test, float prob, float cnt ) | |
251 : IfNode( control, test, prob, cnt) { | |
252 init_class_id(Class_CountedLoopEnd); | |
253 } | |
254 virtual int Opcode() const; | |
255 | |
256 Node *cmp_node() const { return (in(TestValue)->req() >=2) ? in(TestValue)->in(1) : NULL; } | |
257 Node *incr() const { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; } | |
258 Node *limit() const { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; } | |
259 Node *stride() const { Node *tmp = incr (); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; } | |
260 Node *phi() const { Node *tmp = incr (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; } | |
261 Node *init_trip() const { Node *tmp = phi (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; } | |
262 int stride_con() const; | |
263 bool stride_is_con() const { Node *tmp = stride (); return (tmp != NULL && tmp->is_Con()); } | |
264 BoolTest::mask test_trip() const { return in(TestValue)->as_Bool()->_test._test; } | |
265 CountedLoopNode *loopnode() const { | |
266 Node *ln = phi()->in(0); | |
267 assert( ln->Opcode() == Op_CountedLoop, "malformed loop" ); | |
268 return (CountedLoopNode*)ln; } | |
269 | |
270 #ifndef PRODUCT | |
271 virtual void dump_spec(outputStream *st) const; | |
272 #endif | |
273 }; | |
274 | |
275 | |
276 inline CountedLoopEndNode *CountedLoopNode::loopexit() const { | |
277 Node *bc = back_control(); | |
278 if( bc == NULL ) return NULL; | |
279 Node *le = bc->in(0); | |
280 if( le->Opcode() != Op_CountedLoopEnd ) | |
281 return NULL; | |
282 return (CountedLoopEndNode*)le; | |
283 } | |
284 inline Node *CountedLoopNode::init_trip() const { return loopexit() ? loopexit()->init_trip() : NULL; } | |
285 inline Node *CountedLoopNode::stride() const { return loopexit() ? loopexit()->stride() : NULL; } | |
286 inline int CountedLoopNode::stride_con() const { return loopexit() ? loopexit()->stride_con() : 0; } | |
287 inline bool CountedLoopNode::stride_is_con() const { return loopexit() && loopexit()->stride_is_con(); } | |
288 inline Node *CountedLoopNode::limit() const { return loopexit() ? loopexit()->limit() : NULL; } | |
289 inline Node *CountedLoopNode::incr() const { return loopexit() ? loopexit()->incr() : NULL; } | |
290 inline Node *CountedLoopNode::phi() const { return loopexit() ? loopexit()->phi() : NULL; } | |
291 | |
3345 | 292 //------------------------------LoopLimitNode----------------------------- |
293 // Counted Loop limit node which represents exact final iterator value: | |
294 // trip_count = (limit - init_trip + stride - 1)/stride | |
295 // final_value= trip_count * stride + init_trip. | |
296 // Use HW instructions to calculate it when it can overflow in integer. | |
297 // Note, final_value should fit into integer since counted loop has | |
298 // limit check: limit <= max_int-stride. | |
299 class LoopLimitNode : public Node { | |
300 enum { Init=1, Limit=2, Stride=3 }; | |
301 public: | |
302 LoopLimitNode( Compile* C, Node *init, Node *limit, Node *stride ) : Node(0,init,limit,stride) { | |
303 // Put it on the Macro nodes list to optimize during macro nodes expansion. | |
304 init_flags(Flag_is_macro); | |
305 C->add_macro_node(this); | |
306 } | |
307 virtual int Opcode() const; | |
308 virtual const Type *bottom_type() const { return TypeInt::INT; } | |
309 virtual uint ideal_reg() const { return Op_RegI; } | |
310 virtual const Type *Value( PhaseTransform *phase ) const; | |
311 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); | |
312 virtual Node *Identity( PhaseTransform *phase ); | |
313 }; | |
0 | 314 |
315 // -----------------------------IdealLoopTree---------------------------------- | |
316 class IdealLoopTree : public ResourceObj { | |
317 public: | |
318 IdealLoopTree *_parent; // Parent in loop tree | |
319 IdealLoopTree *_next; // Next sibling in loop tree | |
320 IdealLoopTree *_child; // First child in loop tree | |
321 | |
322 // The head-tail backedge defines the loop. | |
323 // If tail is NULL then this loop has multiple backedges as part of the | |
324 // same loop. During cleanup I'll peel off the multiple backedges; merge | |
325 // them at the loop bottom and flow 1 real backedge into the loop. | |
326 Node *_head; // Head of loop | |
327 Node *_tail; // Tail of loop | |
328 inline Node *tail(); // Handle lazy update of _tail field | |
329 PhaseIdealLoop* _phase; | |
330 | |
331 Node_List _body; // Loop body for inner loops | |
332 | |
333 uint8 _nest; // Nesting depth | |
334 uint8 _irreducible:1, // True if irreducible | |
335 _has_call:1, // True if has call safepoint | |
336 _has_sfpt:1, // True if has non-call safepoint | |
337 _rce_candidate:1; // True if candidate for range check elimination | |
338 | |
39
76256d272075
6667612: (Escape Analysis) disable loop cloning if it has a scalar replaceable allocation
kvn
parents:
17
diff
changeset
|
339 Node_List* _required_safept; // A inner loop cannot delete these safepts; |
76256d272075
6667612: (Escape Analysis) disable loop cloning if it has a scalar replaceable allocation
kvn
parents:
17
diff
changeset
|
340 bool _allow_optimizations; // Allow loop optimizations |
0 | 341 |
342 IdealLoopTree( PhaseIdealLoop* phase, Node *head, Node *tail ) | |
343 : _parent(0), _next(0), _child(0), | |
344 _head(head), _tail(tail), | |
345 _phase(phase), | |
346 _required_safept(NULL), | |
39
76256d272075
6667612: (Escape Analysis) disable loop cloning if it has a scalar replaceable allocation
kvn
parents:
17
diff
changeset
|
347 _allow_optimizations(true), |
0 | 348 _nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0) |
349 { } | |
350 | |
351 // Is 'l' a member of 'this'? | |
352 int is_member( const IdealLoopTree *l ) const; // Test for nested membership | |
353 | |
354 // Set loop nesting depth. Accumulate has_call bits. | |
355 int set_nest( uint depth ); | |
356 | |
357 // Split out multiple fall-in edges from the loop header. Move them to a | |
358 // private RegionNode before the loop. This becomes the loop landing pad. | |
359 void split_fall_in( PhaseIdealLoop *phase, int fall_in_cnt ); | |
360 | |
361 // Split out the outermost loop from this shared header. | |
362 void split_outer_loop( PhaseIdealLoop *phase ); | |
363 | |
364 // Merge all the backedges from the shared header into a private Region. | |
365 // Feed that region as the one backedge to this loop. | |
366 void merge_many_backedges( PhaseIdealLoop *phase ); | |
367 | |
368 // Split shared headers and insert loop landing pads. | |
369 // Insert a LoopNode to replace the RegionNode. | |
370 // Returns TRUE if loop tree is structurally changed. | |
371 bool beautify_loops( PhaseIdealLoop *phase ); | |
372 | |
1172 | 373 // Perform optimization to use the loop predicates for null checks and range checks. |
374 // Applies to any loop level (not just the innermost one) | |
375 bool loop_predication( PhaseIdealLoop *phase); | |
376 | |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
377 // Perform iteration-splitting on inner loops. Split iterations to |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
378 // avoid range checks or one-shot null checks. Returns false if the |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
379 // current round of loop opts should stop. |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
380 bool iteration_split( PhaseIdealLoop *phase, Node_List &old_new ); |
0 | 381 |
401
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
382 // Driver for various flavors of iteration splitting. Returns false |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
383 // if the current round of loop opts should stop. |
ee8f06bfb27c
6743188: incomplete fix for 6700047 C2 failed in idom_no_update
never
parents:
367
diff
changeset
|
384 bool iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ); |
0 | 385 |
386 // Given dominators, try to find loops with calls that must always be | |
387 // executed (call dominates loop tail). These loops do not need non-call | |
388 // safepoints (ncsfpt). | |
389 void check_safepts(VectorSet &visited, Node_List &stack); | |
390 | |
391 // Allpaths backwards scan from loop tail, terminating each path at first safepoint | |
392 // encountered. | |
393 void allpaths_check_safepts(VectorSet &visited, Node_List &stack); | |
394 | |
395 // Convert to counted loops where possible | |
396 void counted_loop( PhaseIdealLoop *phase ); | |
397 | |
398 // Check for Node being a loop-breaking test | |
399 Node *is_loop_exit(Node *iff) const; | |
400 | |
401 // Returns true if ctrl is executed on every complete iteration | |
402 bool dominates_backedge(Node* ctrl); | |
403 | |
404 // Remove simplistic dead code from loop body | |
405 void DCE_loop_body(); | |
406 | |
407 // Look for loop-exit tests with my 50/50 guesses from the Parsing stage. | |
408 // Replace with a 1-in-10 exit guess. | |
409 void adjust_loop_exit_prob( PhaseIdealLoop *phase ); | |
410 | |
411 // Return TRUE or FALSE if the loop should never be RCE'd or aligned. | |
412 // Useful for unrolling loops with NO array accesses. | |
413 bool policy_peel_only( PhaseIdealLoop *phase ) const; | |
414 | |
415 // Return TRUE or FALSE if the loop should be unswitched -- clone | |
416 // loop with an invariant test | |
417 bool policy_unswitching( PhaseIdealLoop *phase ) const; | |
418 | |
419 // Micro-benchmark spamming. Remove empty loops. | |
420 bool policy_do_remove_empty_loop( PhaseIdealLoop *phase ); | |
421 | |
2465 | 422 // Convert one iteration loop into normal code. |
423 bool policy_do_one_iteration_loop( PhaseIdealLoop *phase ); | |
424 | |
0 | 425 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can |
426 // make some loop-invariant test (usually a null-check) happen before the | |
427 // loop. | |
428 bool policy_peeling( PhaseIdealLoop *phase ) const; | |
429 | |
430 // Return TRUE or FALSE if the loop should be maximally unrolled. Stash any | |
431 // known trip count in the counted loop node. | |
432 bool policy_maximally_unroll( PhaseIdealLoop *phase ) const; | |
433 | |
434 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if | |
435 // the loop is a CountedLoop and the body is small enough. | |
436 bool policy_unroll( PhaseIdealLoop *phase ) const; | |
437 | |
438 // Return TRUE or FALSE if the loop should be range-check-eliminated. | |
439 // Gather a list of IF tests that are dominated by iteration splitting; | |
440 // also gather the end of the first split and the start of the 2nd split. | |
441 bool policy_range_check( PhaseIdealLoop *phase ) const; | |
442 | |
443 // Return TRUE or FALSE if the loop should be cache-line aligned. | |
444 // Gather the expression that does the alignment. Note that only | |
605 | 445 // one array base can be aligned in a loop (unless the VM guarantees |
0 | 446 // mutual alignment). Note that if we vectorize short memory ops |
447 // into longer memory ops, we may want to increase alignment. | |
448 bool policy_align( PhaseIdealLoop *phase ) const; | |
449 | |
1172 | 450 // Return TRUE if "iff" is a range check. |
451 bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const; | |
452 | |
2465 | 453 // Compute loop exact trip count if possible |
454 void compute_exact_trip_count( PhaseIdealLoop *phase ); | |
455 | |
0 | 456 // Compute loop trip count from profile data |
457 void compute_profile_trip_cnt( PhaseIdealLoop *phase ); | |
458 | |
459 // Reassociate invariant expressions. | |
460 void reassociate_invariants(PhaseIdealLoop *phase); | |
461 // Reassociate invariant add and subtract expressions. | |
462 Node* reassociate_add_sub(Node* n1, PhaseIdealLoop *phase); | |
463 // Return nonzero index of invariant operand if invariant and variant | |
605 | 464 // are combined with an Add or Sub. Helper for reassociate_invariants. |
0 | 465 int is_invariant_addition(Node* n, PhaseIdealLoop *phase); |
466 | |
467 // Return true if n is invariant | |
468 bool is_invariant(Node* n) const; | |
469 | |
470 // Put loop body on igvn work list | |
471 void record_for_igvn(); | |
472 | |
473 bool is_loop() { return !_irreducible && _tail && !_tail->is_top(); } | |
474 bool is_inner() { return is_loop() && _child == NULL; } | |
475 bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); } | |
476 | |
477 #ifndef PRODUCT | |
478 void dump_head( ) const; // Dump loop head only | |
479 void dump() const; // Dump this loop recursively | |
480 void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const; | |
481 #endif | |
482 | |
483 }; | |
484 | |
485 // -----------------------------PhaseIdealLoop--------------------------------- | |
486 // Computes the mapping from Nodes to IdealLoopTrees. Organizes IdealLoopTrees into a | |
487 // loop tree. Drives the loop-based transformations on the ideal graph. | |
488 class PhaseIdealLoop : public PhaseTransform { | |
489 friend class IdealLoopTree; | |
490 friend class SuperWord; | |
491 // Pre-computed def-use info | |
492 PhaseIterGVN &_igvn; | |
493 | |
494 // Head of loop tree | |
495 IdealLoopTree *_ltree_root; | |
496 | |
497 // Array of pre-order numbers, plus post-visited bit. | |
498 // ZERO for not pre-visited. EVEN for pre-visited but not post-visited. | |
499 // ODD for post-visited. Other bits are the pre-order number. | |
500 uint *_preorders; | |
501 uint _max_preorder; | |
502 | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
503 const PhaseIdealLoop* _verify_me; |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
504 bool _verify_only; |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
505 |
0 | 506 // Allocate _preorders[] array |
507 void allocate_preorders() { | |
508 _max_preorder = C->unique()+8; | |
509 _preorders = NEW_RESOURCE_ARRAY(uint, _max_preorder); | |
510 memset(_preorders, 0, sizeof(uint) * _max_preorder); | |
511 } | |
512 | |
513 // Allocate _preorders[] array | |
514 void reallocate_preorders() { | |
515 if ( _max_preorder < C->unique() ) { | |
516 _preorders = REALLOC_RESOURCE_ARRAY(uint, _preorders, _max_preorder, C->unique()); | |
517 _max_preorder = C->unique(); | |
518 } | |
519 memset(_preorders, 0, sizeof(uint) * _max_preorder); | |
520 } | |
521 | |
522 // Check to grow _preorders[] array for the case when build_loop_tree_impl() | |
523 // adds new nodes. | |
524 void check_grow_preorders( ) { | |
525 if ( _max_preorder < C->unique() ) { | |
526 uint newsize = _max_preorder<<1; // double size of array | |
527 _preorders = REALLOC_RESOURCE_ARRAY(uint, _preorders, _max_preorder, newsize); | |
528 memset(&_preorders[_max_preorder],0,sizeof(uint)*(newsize-_max_preorder)); | |
529 _max_preorder = newsize; | |
530 } | |
531 } | |
532 // Check for pre-visited. Zero for NOT visited; non-zero for visited. | |
533 int is_visited( Node *n ) const { return _preorders[n->_idx]; } | |
534 // Pre-order numbers are written to the Nodes array as low-bit-set values. | |
535 void set_preorder_visited( Node *n, int pre_order ) { | |
536 assert( !is_visited( n ), "already set" ); | |
537 _preorders[n->_idx] = (pre_order<<1); | |
538 }; | |
539 // Return pre-order number. | |
540 int get_preorder( Node *n ) const { assert( is_visited(n), "" ); return _preorders[n->_idx]>>1; } | |
541 | |
542 // Check for being post-visited. | |
543 // Should be previsited already (checked with assert(is_visited(n))). | |
544 int is_postvisited( Node *n ) const { assert( is_visited(n), "" ); return _preorders[n->_idx]&1; } | |
545 | |
546 // Mark as post visited | |
547 void set_postvisited( Node *n ) { assert( !is_postvisited( n ), "" ); _preorders[n->_idx] |= 1; } | |
548 | |
549 // Set/get control node out. Set lower bit to distinguish from IdealLoopTree | |
550 // Returns true if "n" is a data node, false if it's a control node. | |
551 bool has_ctrl( Node *n ) const { return ((intptr_t)_nodes[n->_idx]) & 1; } | |
552 | |
553 // clear out dead code after build_loop_late | |
554 Node_List _deadlist; | |
555 | |
556 // Support for faster execution of get_late_ctrl()/dom_lca() | |
557 // when a node has many uses and dominator depth is deep. | |
558 Node_Array _dom_lca_tags; | |
559 void init_dom_lca_tags(); | |
560 void clear_dom_lca_tags(); | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
561 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
562 // Helper for debugging bad dominance relationships |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
563 bool verify_dominance(Node* n, Node* use, Node* LCA, Node* early); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
564 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
565 Node* compute_lca_of_uses(Node* n, Node* early, bool verify = false); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
566 |
0 | 567 // Inline wrapper for frequent cases: |
568 // 1) only one use | |
569 // 2) a use is the same as the current LCA passed as 'n1' | |
570 Node *dom_lca_for_get_late_ctrl( Node *lca, Node *n, Node *tag ) { | |
571 assert( n->is_CFG(), "" ); | |
572 // Fast-path NULL lca | |
573 if( lca != NULL && lca != n ) { | |
574 assert( lca->is_CFG(), "" ); | |
575 // find LCA of all uses | |
576 n = dom_lca_for_get_late_ctrl_internal( lca, n, tag ); | |
577 } | |
578 return find_non_split_ctrl(n); | |
579 } | |
580 Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag ); | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
581 |
0 | 582 // Helper function for directing control inputs away from CFG split |
583 // points. | |
584 Node *find_non_split_ctrl( Node *ctrl ) const { | |
585 if (ctrl != NULL) { | |
586 if (ctrl->is_MultiBranch()) { | |
587 ctrl = ctrl->in(0); | |
588 } | |
589 assert(ctrl->is_CFG(), "CFG"); | |
590 } | |
591 return ctrl; | |
592 } | |
593 | |
594 public: | |
595 bool has_node( Node* n ) const { return _nodes[n->_idx] != NULL; } | |
596 // check if transform created new nodes that need _ctrl recorded | |
597 Node *get_late_ctrl( Node *n, Node *early ); | |
598 Node *get_early_ctrl( Node *n ); | |
599 void set_early_ctrl( Node *n ); | |
600 void set_subtree_ctrl( Node *root ); | |
601 void set_ctrl( Node *n, Node *ctrl ) { | |
602 assert( !has_node(n) || has_ctrl(n), "" ); | |
603 assert( ctrl->in(0), "cannot set dead control node" ); | |
604 assert( ctrl == find_non_split_ctrl(ctrl), "must set legal crtl" ); | |
605 _nodes.map( n->_idx, (Node*)((intptr_t)ctrl + 1) ); | |
606 } | |
607 // Set control and update loop membership | |
608 void set_ctrl_and_loop(Node* n, Node* ctrl) { | |
609 IdealLoopTree* old_loop = get_loop(get_ctrl(n)); | |
610 IdealLoopTree* new_loop = get_loop(ctrl); | |
611 if (old_loop != new_loop) { | |
612 if (old_loop->_child == NULL) old_loop->_body.yank(n); | |
613 if (new_loop->_child == NULL) new_loop->_body.push(n); | |
614 } | |
615 set_ctrl(n, ctrl); | |
616 } | |
617 // Control nodes can be replaced or subsumed. During this pass they | |
618 // get their replacement Node in slot 1. Instead of updating the block | |
619 // location of all Nodes in the subsumed block, we lazily do it. As we | |
620 // pull such a subsumed block out of the array, we write back the final | |
621 // correct block. | |
622 Node *get_ctrl( Node *i ) { | |
623 assert(has_node(i), ""); | |
624 Node *n = get_ctrl_no_update(i); | |
625 _nodes.map( i->_idx, (Node*)((intptr_t)n + 1) ); | |
626 assert(has_node(i) && has_ctrl(i), ""); | |
627 assert(n == find_non_split_ctrl(n), "must return legal ctrl" ); | |
628 return n; | |
629 } | |
1172 | 630 // true if CFG node d dominates CFG node n |
631 bool is_dominator(Node *d, Node *n); | |
632 // return get_ctrl for a data node and self(n) for a CFG node | |
633 Node* ctrl_or_self(Node* n) { | |
634 if (has_ctrl(n)) | |
635 return get_ctrl(n); | |
636 else { | |
637 assert (n->is_CFG(), "must be a CFG node"); | |
638 return n; | |
639 } | |
640 } | |
0 | 641 |
642 private: | |
643 Node *get_ctrl_no_update( Node *i ) const { | |
644 assert( has_ctrl(i), "" ); | |
645 Node *n = (Node*)(((intptr_t)_nodes[i->_idx]) & ~1); | |
646 if (!n->in(0)) { | |
647 // Skip dead CFG nodes | |
648 do { | |
649 n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1); | |
650 } while (!n->in(0)); | |
651 n = find_non_split_ctrl(n); | |
652 } | |
653 return n; | |
654 } | |
655 | |
656 // Check for loop being set | |
657 // "n" must be a control node. Returns true if "n" is known to be in a loop. | |
658 bool has_loop( Node *n ) const { | |
659 assert(!has_node(n) || !has_ctrl(n), ""); | |
660 return has_node(n); | |
661 } | |
662 // Set loop | |
663 void set_loop( Node *n, IdealLoopTree *loop ) { | |
664 _nodes.map(n->_idx, (Node*)loop); | |
665 } | |
666 // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms. Replace | |
667 // the 'old_node' with 'new_node'. Kill old-node. Add a reference | |
668 // from old_node to new_node to support the lazy update. Reference | |
1172 | 669 // replaces loop reference, since that is not needed for dead node. |
0 | 670 public: |
671 void lazy_update( Node *old_node, Node *new_node ) { | |
672 assert( old_node != new_node, "no cycles please" ); | |
673 //old_node->set_req( 1, new_node /*NO DU INFO*/ ); | |
674 // Nodes always have DU info now, so re-use the side array slot | |
675 // for this node to provide the forwarding pointer. | |
676 _nodes.map( old_node->_idx, (Node*)((intptr_t)new_node + 1) ); | |
677 } | |
678 void lazy_replace( Node *old_node, Node *new_node ) { | |
1621
6027dddc26c6
6677629: PhaseIterGVN::subsume_node() should call hash_delete() and add_users_to_worklist()
kvn
parents:
1552
diff
changeset
|
679 _igvn.replace_node( old_node, new_node ); |
0 | 680 lazy_update( old_node, new_node ); |
681 } | |
682 void lazy_replace_proj( Node *old_node, Node *new_node ) { | |
683 assert( old_node->req() == 1, "use this for Projs" ); | |
684 _igvn.hash_delete(old_node); // Must hash-delete before hacking edges | |
685 old_node->add_req( NULL ); | |
686 lazy_replace( old_node, new_node ); | |
687 } | |
688 | |
689 private: | |
690 | |
691 // Place 'n' in some loop nest, where 'n' is a CFG node | |
692 void build_loop_tree(); | |
693 int build_loop_tree_impl( Node *n, int pre_order ); | |
694 // Insert loop into the existing loop tree. 'innermost' is a leaf of the | |
695 // loop tree, not the root. | |
696 IdealLoopTree *sort( IdealLoopTree *loop, IdealLoopTree *innermost ); | |
697 | |
698 // Place Data nodes in some loop nest | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
699 void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
700 void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
701 void build_loop_late_post ( Node* n ); |
0 | 702 |
703 // Array of immediate dominance info for each CFG node indexed by node idx | |
704 private: | |
705 uint _idom_size; | |
706 Node **_idom; // Array of immediate dominators | |
707 uint *_dom_depth; // Used for fast LCA test | |
708 GrowableArray<uint>* _dom_stk; // For recomputation of dom depth | |
709 | |
710 Node* idom_no_update(Node* d) const { | |
711 assert(d->_idx < _idom_size, "oob"); | |
712 Node* n = _idom[d->_idx]; | |
713 assert(n != NULL,"Bad immediate dominator info."); | |
714 while (n->in(0) == NULL) { // Skip dead CFG nodes | |
715 //n = n->in(1); | |
716 n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1); | |
717 assert(n != NULL,"Bad immediate dominator info."); | |
718 } | |
719 return n; | |
720 } | |
721 Node *idom(Node* d) const { | |
722 uint didx = d->_idx; | |
723 Node *n = idom_no_update(d); | |
724 _idom[didx] = n; // Lazily remove dead CFG nodes from table. | |
725 return n; | |
726 } | |
727 uint dom_depth(Node* d) const { | |
728 assert(d->_idx < _idom_size, ""); | |
729 return _dom_depth[d->_idx]; | |
730 } | |
731 void set_idom(Node* d, Node* n, uint dom_depth); | |
732 // Locally compute IDOM using dom_lca call | |
733 Node *compute_idom( Node *region ) const; | |
734 // Recompute dom_depth | |
735 void recompute_dom_depth(); | |
736 | |
737 // Is safept not required by an outer loop? | |
738 bool is_deleteable_safept(Node* sfpt); | |
739 | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
740 // Replace parallel induction variable (parallel to trip counter) |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
741 void replace_parallel_iv(IdealLoopTree *loop); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
742 |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
743 // Perform verification that the graph is valid. |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
744 PhaseIdealLoop( PhaseIterGVN &igvn) : |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
745 PhaseTransform(Ideal_Loop), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
746 _igvn(igvn), |
2248
194c9fdee631
7017240: C2: native memory leak in nsk/regression/b4675027 on windows-x86 in comp mode with G1
kvn
parents:
1972
diff
changeset
|
747 _dom_lca_tags(arena()), // Thread::resource_area |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
748 _verify_me(NULL), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
749 _verify_only(true) { |
2445 | 750 build_and_optimize(false); |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
751 } |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
752 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
753 // build the loop tree and perform any requested optimizations |
2445 | 754 void build_and_optimize(bool do_split_if); |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
755 |
0 | 756 public: |
757 // Dominators for the sea of nodes | |
758 void Dominators(); | |
759 Node *dom_lca( Node *n1, Node *n2 ) const { | |
760 return find_non_split_ctrl(dom_lca_internal(n1, n2)); | |
761 } | |
762 Node *dom_lca_internal( Node *n1, Node *n2 ) const; | |
763 | |
764 // Compute the Ideal Node to Loop mapping | |
2445 | 765 PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) : |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
766 PhaseTransform(Ideal_Loop), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
767 _igvn(igvn), |
2248
194c9fdee631
7017240: C2: native memory leak in nsk/regression/b4675027 on windows-x86 in comp mode with G1
kvn
parents:
1972
diff
changeset
|
768 _dom_lca_tags(arena()), // Thread::resource_area |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
769 _verify_me(NULL), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
770 _verify_only(false) { |
2445 | 771 build_and_optimize(do_split_ifs); |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
772 } |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
773 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
774 // Verify that verify_me made the same decisions as a fresh run. |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
775 PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me) : |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
776 PhaseTransform(Ideal_Loop), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
777 _igvn(igvn), |
2248
194c9fdee631
7017240: C2: native memory leak in nsk/regression/b4675027 on windows-x86 in comp mode with G1
kvn
parents:
1972
diff
changeset
|
778 _dom_lca_tags(arena()), // Thread::resource_area |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
779 _verify_me(verify_me), |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
780 _verify_only(false) { |
2445 | 781 build_and_optimize(false); |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
782 } |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
783 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
784 // Build and verify the loop tree without modifying the graph. This |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
785 // is useful to verify that all inputs properly dominate their uses. |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
786 static void verify(PhaseIterGVN& igvn) { |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
787 #ifdef ASSERT |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
788 PhaseIdealLoop v(igvn); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
789 #endif |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
605
diff
changeset
|
790 } |
0 | 791 |
792 // True if the method has at least 1 irreducible loop | |
793 bool _has_irreducible_loops; | |
794 | |
795 // Per-Node transform | |
796 virtual Node *transform( Node *a_node ) { return 0; } | |
797 | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
798 bool is_counted_loop( Node *x, IdealLoopTree *loop ); |
0 | 799 |
3345 | 800 Node* exact_limit( IdealLoopTree *loop ); |
801 | |
0 | 802 // Return a post-walked LoopNode |
803 IdealLoopTree *get_loop( Node *n ) const { | |
804 // Dead nodes have no loop, so return the top level loop instead | |
805 if (!has_node(n)) return _ltree_root; | |
806 assert(!has_ctrl(n), ""); | |
807 return (IdealLoopTree*)_nodes[n->_idx]; | |
808 } | |
809 | |
810 // Is 'n' a (nested) member of 'loop'? | |
811 int is_member( const IdealLoopTree *loop, Node *n ) const { | |
812 return loop->is_member(get_loop(n)); } | |
813 | |
814 // This is the basic building block of the loop optimizations. It clones an | |
815 // entire loop body. It makes an old_new loop body mapping; with this | |
816 // mapping you can find the new-loop equivalent to an old-loop node. All | |
817 // new-loop nodes are exactly equal to their old-loop counterparts, all | |
818 // edges are the same. All exits from the old-loop now have a RegionNode | |
819 // that merges the equivalent new-loop path. This is true even for the | |
820 // normal "loop-exit" condition. All uses of loop-invariant old-loop values | |
821 // now come from (one or more) Phis that merge their new-loop equivalents. | |
822 // Parameter side_by_side_idom: | |
823 // When side_by_size_idom is NULL, the dominator tree is constructed for | |
824 // the clone loop to dominate the original. Used in construction of | |
825 // pre-main-post loop sequence. | |
826 // When nonnull, the clone and original are side-by-side, both are | |
827 // dominated by the passed in side_by_side_idom node. Used in | |
828 // construction of unswitched loops. | |
829 void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth, | |
830 Node* side_by_side_idom = NULL); | |
831 | |
832 // If we got the effect of peeling, either by actually peeling or by | |
833 // making a pre-loop which must execute at least once, we can remove | |
834 // all loop-invariant dominated tests in the main body. | |
835 void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new ); | |
836 | |
837 // Generate code to do a loop peel for the given loop (and body). | |
838 // old_new is a temp array. | |
839 void do_peeling( IdealLoopTree *loop, Node_List &old_new ); | |
840 | |
841 // Add pre and post loops around the given loop. These loops are used | |
842 // during RCE, unrolling and aligning loops. | |
843 void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ); | |
844 // If Node n lives in the back_ctrl block, we clone a private version of n | |
845 // in preheader_ctrl block and return that, otherwise return n. | |
3788
e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
kvn
parents:
3383
diff
changeset
|
846 Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones ); |
0 | 847 |
848 // Take steps to maximally unroll the loop. Peel any odd iterations, then | |
849 // unroll to do double iterations. The next round of major loop transforms | |
850 // will repeat till the doubled loop body does all remaining iterations in 1 | |
851 // pass. | |
852 void do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ); | |
853 | |
854 // Unroll the loop body one step - make each trip do 2 iterations. | |
855 void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ); | |
856 | |
857 // Return true if exp is a constant times an induction var | |
858 bool is_scaled_iv(Node* exp, Node* iv, int* p_scale); | |
859 | |
860 // Return true if exp is a scaled induction var plus (or minus) constant | |
861 bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0); | |
862 | |
1172 | 863 // Return true if proj is for "proj->[region->..]call_uct" |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
864 static bool is_uncommon_trap_proj(ProjNode* proj, Deoptimization::DeoptReason reason); |
1172 | 865 // Return true for "if(test)-> proj -> ... |
866 // | | |
867 // V | |
868 // other_proj->[region->..]call_uct" | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
869 static bool is_uncommon_trap_if_pattern(ProjNode* proj, Deoptimization::DeoptReason reason); |
1172 | 870 // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
871 ProjNode* create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
872 Deoptimization::DeoptReason reason); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
873 void register_control(Node* n, IdealLoopTree *loop, Node* pred); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
874 |
2445 | 875 // Clone loop predicates to cloned loops (peeled, unswitched) |
876 static ProjNode* clone_predicate(ProjNode* predicate_proj, Node* new_entry, | |
877 Deoptimization::DeoptReason reason, | |
878 PhaseIdealLoop* loop_phase, | |
879 PhaseIterGVN* igvn); | |
3845
c96c3eb1efae
7068051: SIGSEGV in PhaseIdealLoop::build_loop_late_post
kvn
parents:
3840
diff
changeset
|
880 |
2445 | 881 static Node* clone_loop_predicates(Node* old_entry, Node* new_entry, |
3345 | 882 bool clone_limit_check, |
2445 | 883 PhaseIdealLoop* loop_phase, |
884 PhaseIterGVN* igvn); | |
3345 | 885 Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check); |
2445 | 886 |
887 static Node* skip_loop_predicates(Node* entry); | |
888 | |
889 // Find a good location to insert a predicate | |
2383
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
890 static ProjNode* find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason); |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
891 // Find a predicate |
9dc311b8473e
7008866: Missing loop predicate for loop with multiple entries
kvn
parents:
2248
diff
changeset
|
892 static Node* find_predicate(Node* entry); |
1172 | 893 // Construct a range check for a predicate if |
3345 | 894 BoolNode* rc_predicate(IdealLoopTree *loop, Node* ctrl, |
1172 | 895 int scale, Node* offset, |
896 Node* init, Node* limit, Node* stride, | |
1303
c047da02984c
6930043: C2: SIGSEGV in javasoft.sqe.tests.lang.arr017.arr01702.arr01702.loop_forw(II)I
never
parents:
1172
diff
changeset
|
897 Node* range, bool upper); |
1172 | 898 |
899 // Implementation of the loop predication to promote checks outside the loop | |
900 bool loop_predication_impl(IdealLoopTree *loop); | |
901 | |
902 // Helper function to collect predicate for eliminating the useless ones | |
903 void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1); | |
904 void eliminate_useless_predicates(); | |
905 | |
0 | 906 // Eliminate range-checks and other trip-counter vs loop-invariant tests. |
907 void do_range_check( IdealLoopTree *loop, Node_List &old_new ); | |
908 | |
909 // Create a slow version of the loop by cloning the loop | |
910 // and inserting an if to select fast-slow versions. | |
911 ProjNode* create_slow_version_of_loop(IdealLoopTree *loop, | |
912 Node_List &old_new); | |
913 | |
914 // Clone loop with an invariant test (that does not exit) and | |
915 // insert a clone of the test that selects which version to | |
916 // execute. | |
917 void do_unswitching (IdealLoopTree *loop, Node_List &old_new); | |
918 | |
919 // Find candidate "if" for unswitching | |
920 IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const; | |
921 | |
922 // Range Check Elimination uses this function! | |
923 // Constrain the main loop iterations so the affine function: | |
3345 | 924 // low_limit <= scale_con * I + offset < upper_limit |
0 | 925 // always holds true. That is, either increase the number of iterations in |
926 // the pre-loop or the post-loop until the condition holds true in the main | |
927 // loop. Scale_con, offset and limit are all loop invariant. | |
3345 | 928 void add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ); |
3383
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3345
diff
changeset
|
929 // Helper function for add_constraint(). |
38569792a45a
7044725: -XX:-UnrollLimitCheck -Xcomp : Exception: String index out of range: 29488
kvn
parents:
3345
diff
changeset
|
930 Node* adjust_limit( int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl ); |
0 | 931 |
932 // Partially peel loop up through last_peel node. | |
933 bool partial_peel( IdealLoopTree *loop, Node_List &old_new ); | |
934 | |
935 // Create a scheduled list of nodes control dependent on ctrl set. | |
936 void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched ); | |
937 // Has a use in the vector set | |
938 bool has_use_in_set( Node* n, VectorSet& vset ); | |
939 // Has use internal to the vector set (ie. not in a phi at the loop head) | |
940 bool has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ); | |
941 // clone "n" for uses that are outside of loop | |
942 void clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ); | |
943 // clone "n" for special uses that are in the not_peeled region | |
944 void clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n, | |
945 VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ); | |
946 // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist | |
947 void insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ); | |
948 #ifdef ASSERT | |
949 // Validate the loop partition sets: peel and not_peel | |
950 bool is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list, VectorSet& not_peel ); | |
951 // Ensure that uses outside of loop are of the right form | |
952 bool is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list, | |
953 uint orig_exit_idx, uint clone_exit_idx); | |
954 bool is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx); | |
955 #endif | |
956 | |
957 // Returns nonzero constant stride if-node is a possible iv test (otherwise returns zero.) | |
958 int stride_of_possible_iv( Node* iff ); | |
959 bool is_possible_iv_test( Node* iff ) { return stride_of_possible_iv(iff) != 0; } | |
960 // Return the (unique) control output node that's in the loop (if it exists.) | |
961 Node* stay_in_loop( Node* n, IdealLoopTree *loop); | |
962 // Insert a signed compare loop exit cloned from an unsigned compare. | |
963 IfNode* insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop); | |
964 void remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop); | |
965 // Utility to register node "n" with PhaseIdealLoop | |
966 void register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth); | |
967 // Utility to create an if-projection | |
968 ProjNode* proj_clone(ProjNode* p, IfNode* iff); | |
969 // Force the iff control output to be the live_proj | |
970 Node* short_circuit_if(IfNode* iff, ProjNode* live_proj); | |
971 // Insert a region before an if projection | |
972 RegionNode* insert_region_before_proj(ProjNode* proj); | |
973 // Insert a new if before an if projection | |
974 ProjNode* insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj); | |
975 | |
976 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps. | |
977 // "Nearly" because all Nodes have been cloned from the original in the loop, | |
978 // but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs | |
979 // through the Phi recursively, and return a Bool. | |
980 BoolNode *clone_iff( PhiNode *phi, IdealLoopTree *loop ); | |
981 CmpNode *clone_bool( PhiNode *phi, IdealLoopTree *loop ); | |
982 | |
983 | |
984 // Rework addressing expressions to get the most loop-invariant stuff | |
985 // moved out. We'd like to do all associative operators, but it's especially | |
986 // important (common) to do address expressions. | |
987 Node *remix_address_expressions( Node *n ); | |
988 | |
989 // Attempt to use a conditional move instead of a phi/branch | |
990 Node *conditional_move( Node *n ); | |
991 | |
992 // Reorganize offset computations to lower register pressure. | |
993 // Mostly prevent loop-fallout uses of the pre-incremented trip counter | |
994 // (which are then alive with the post-incremented trip counter | |
995 // forcing an extra register move) | |
996 void reorg_offsets( IdealLoopTree *loop ); | |
997 | |
998 // Check for aggressive application of 'split-if' optimization, | |
999 // using basic block level info. | |
1000 void split_if_with_blocks ( VectorSet &visited, Node_Stack &nstack ); | |
1001 Node *split_if_with_blocks_pre ( Node *n ); | |
1002 void split_if_with_blocks_post( Node *n ); | |
1003 Node *has_local_phi_input( Node *n ); | |
1004 // Mark an IfNode as being dominated by a prior test, | |
1005 // without actually altering the CFG (and hence IDOM info). | |
3840
4e761e7e6e12
7070134: Hotspot crashes with sigsegv from PorterStemmer
kvn
parents:
3788
diff
changeset
|
1006 void dominated_by( Node *prevdom, Node *iff, bool flip = false, bool exclude_loop_predicate = false ); |
0 | 1007 |
1008 // Split Node 'n' through merge point | |
1009 Node *split_thru_region( Node *n, Node *region ); | |
1010 // Split Node 'n' through merge point if there is enough win. | |
1011 Node *split_thru_phi( Node *n, Node *region, int policy ); | |
1012 // Found an If getting its condition-code input from a Phi in the | |
1013 // same block. Split thru the Region. | |
1014 void do_split_if( Node *iff ); | |
1015 | |
1763 | 1016 // Conversion of fill/copy patterns into intrisic versions |
1017 bool do_intrinsify_fill(); | |
1018 bool intrinsify_fill(IdealLoopTree* lpt); | |
1019 bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value, | |
1020 Node*& shift, Node*& offset); | |
1021 | |
0 | 1022 private: |
1023 // Return a type based on condition control flow | |
1024 const TypeInt* filtered_type( Node *n, Node* n_ctrl); | |
1025 const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); } | |
1026 // Helpers for filtered type | |
1027 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl); | |
1028 | |
1029 // Helper functions | |
1030 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache ); | |
1031 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ); | |
1032 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ); | |
1033 bool split_up( Node *n, Node *blk1, Node *blk2 ); | |
1034 void sink_use( Node *use, Node *post_loop ); | |
1035 Node *place_near_use( Node *useblock ) const; | |
1036 | |
1037 bool _created_loop_node; | |
1038 public: | |
1039 void set_created_loop_node() { _created_loop_node = true; } | |
1040 bool created_loop_node() { return _created_loop_node; } | |
1172 | 1041 void register_new_node( Node *n, Node *blk ); |
0 | 1042 |
1043 #ifndef PRODUCT | |
1044 void dump( ) const; | |
1045 void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const; | |
1046 void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const; | |
1047 void verify() const; // Major slow :-) | |
1048 void verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const; | |
1049 IdealLoopTree *get_loop_idx(Node* n) const { | |
1050 // Dead nodes have no loop, so return the top level loop instead | |
1051 return _nodes[n->_idx] ? (IdealLoopTree*)_nodes[n->_idx] : _ltree_root; | |
1052 } | |
1053 // Print some stats | |
1054 static void print_statistics(); | |
1055 static int _loop_invokes; // Count of PhaseIdealLoop invokes | |
1056 static int _loop_work; // Sum of PhaseIdealLoop x _unique | |
1057 #endif | |
1058 }; | |
1059 | |
1060 inline Node* IdealLoopTree::tail() { | |
1061 // Handle lazy update of _tail field | |
1062 Node *n = _tail; | |
1063 //while( !n->in(0) ) // Skip dead CFG nodes | |
1064 //n = n->in(1); | |
1065 if (n->in(0) == NULL) | |
1066 n = _phase->get_ctrl(n); | |
1067 _tail = n; | |
1068 return n; | |
1069 } | |
1070 | |
1071 | |
1072 // Iterate over the loop tree using a preorder, left-to-right traversal. | |
1073 // | |
1074 // Example that visits all counted loops from within PhaseIdealLoop | |
1075 // | |
1076 // for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { | |
1077 // IdealLoopTree* lpt = iter.current(); | |
1078 // if (!lpt->is_counted()) continue; | |
1079 // ... | |
1080 class LoopTreeIterator : public StackObj { | |
1081 private: | |
1082 IdealLoopTree* _root; | |
1083 IdealLoopTree* _curnt; | |
1084 | |
1085 public: | |
1086 LoopTreeIterator(IdealLoopTree* root) : _root(root), _curnt(root) {} | |
1087 | |
1088 bool done() { return _curnt == NULL; } // Finished iterating? | |
1089 | |
1090 void next(); // Advance to next loop tree | |
1091 | |
1092 IdealLoopTree* current() { return _curnt; } // Return current value of iterator. | |
1093 }; | |
1972 | 1094 |
1095 #endif // SHARE_VM_OPTO_LOOPNODE_HPP |