Mercurial > hg > graal-compiler
annotate src/share/vm/opto/gcm.cpp @ 1941:79d04223b8a5
Added caching for resolved types and resolved fields.
This is crucial, because the local load elimination will lead to wrong results, if field equality (of two RiField objects with the same object and the same RiType) is not given. The caching makes sure that the default equals implementation is sufficient.
author | Thomas Wuerthinger <wuerthinger@ssw.jku.at> |
---|---|
date | Tue, 28 Dec 2010 18:33:26 +0100 |
parents | 0e35fa8ebccd |
children | f95d63e2154a |
rev | line source |
---|---|
0 | 1 /* |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
833
diff
changeset
|
2 * Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
833
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
833
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
833
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
25 // Portions of code courtesy of Clifford Click | |
26 | |
27 // Optimization - Graph Style | |
28 | |
29 #include "incls/_precompiled.incl" | |
30 #include "incls/_gcm.cpp.incl" | |
31 | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
32 // To avoid float value underflow |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
33 #define MIN_BLOCK_FREQUENCY 1.e-35f |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
34 |
0 | 35 //----------------------------schedule_node_into_block------------------------- |
36 // Insert node n into block b. Look for projections of n and make sure they | |
37 // are in b also. | |
38 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) { | |
39 // Set basic block of n, Add n to b, | |
40 _bbs.map(n->_idx, b); | |
41 b->add_inst(n); | |
42 | |
43 // After Matching, nearly any old Node may have projections trailing it. | |
44 // These are usually machine-dependent flags. In any case, they might | |
45 // float to another block below this one. Move them up. | |
46 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |
47 Node* use = n->fast_out(i); | |
48 if (use->is_Proj()) { | |
49 Block* buse = _bbs[use->_idx]; | |
50 if (buse != b) { // In wrong block? | |
51 if (buse != NULL) | |
52 buse->find_remove(use); // Remove from wrong block | |
53 _bbs.map(use->_idx, b); // Re-insert in this block | |
54 b->add_inst(use); | |
55 } | |
56 } | |
57 } | |
58 } | |
59 | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
60 //----------------------------replace_block_proj_ctrl------------------------- |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
61 // Nodes that have is_block_proj() nodes as their control need to use |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
62 // the appropriate Region for their actual block as their control since |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
63 // the projection will be in a predecessor block. |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
64 void PhaseCFG::replace_block_proj_ctrl( Node *n ) { |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
65 const Node *in0 = n->in(0); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
66 assert(in0 != NULL, "Only control-dependent"); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
67 const Node *p = in0->is_block_proj(); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
68 if (p != NULL && p != n) { // Control from a block projection? |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
69 assert(!n->pinned() || n->is_SafePointScalarObject(), "only SafePointScalarObject pinned node is expected here"); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
70 // Find trailing Region |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
71 Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
72 uint j = 0; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
73 if (pb->_num_succs != 1) { // More then 1 successor? |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
74 // Search for successor |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
75 uint max = pb->_nodes.size(); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
76 assert( max > 1, "" ); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
77 uint start = max - pb->_num_succs; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
78 // Find which output path belongs to projection |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
79 for (j = start; j < max; j++) { |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
80 if( pb->_nodes[j] == in0 ) |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
81 break; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
82 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
83 assert( j < max, "must find" ); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
84 // Change control to match head of successor basic block |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
85 j -= start; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
86 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
87 n->set_req(0, pb->_succs[j]->head()); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
88 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
89 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
90 |
0 | 91 |
92 //------------------------------schedule_pinned_nodes-------------------------- | |
93 // Set the basic block for Nodes pinned into blocks | |
94 void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) { | |
95 // Allocate node stack of size C->unique()+8 to avoid frequent realloc | |
96 GrowableArray <Node *> spstack(C->unique()+8); | |
97 spstack.push(_root); | |
98 while ( spstack.is_nonempty() ) { | |
99 Node *n = spstack.pop(); | |
100 if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited | |
101 if( n->pinned() && !_bbs.lookup(n->_idx) ) { // Pinned? Nail it down! | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
102 assert( n->in(0), "pinned Node must have Control" ); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
103 // Before setting block replace block_proj control edge |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
104 replace_block_proj_ctrl(n); |
0 | 105 Node *input = n->in(0); |
106 while( !input->is_block_start() ) | |
107 input = input->in(0); | |
108 Block *b = _bbs[input->_idx]; // Basic block of controlling input | |
109 schedule_node_into_block(n, b); | |
110 } | |
111 for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs | |
112 if( n->in(i) != NULL ) | |
113 spstack.push(n->in(i)); | |
114 } | |
115 } | |
116 } | |
117 } | |
118 | |
119 #ifdef ASSERT | |
120 // Assert that new input b2 is dominated by all previous inputs. | |
121 // Check this by by seeing that it is dominated by b1, the deepest | |
122 // input observed until b2. | |
123 static void assert_dom(Block* b1, Block* b2, Node* n, Block_Array &bbs) { | |
124 if (b1 == NULL) return; | |
125 assert(b1->_dom_depth < b2->_dom_depth, "sanity"); | |
126 Block* tmp = b2; | |
127 while (tmp != b1 && tmp != NULL) { | |
128 tmp = tmp->_idom; | |
129 } | |
130 if (tmp != b1) { | |
131 // Detected an unschedulable graph. Print some nice stuff and die. | |
132 tty->print_cr("!!! Unschedulable graph !!!"); | |
133 for (uint j=0; j<n->len(); j++) { // For all inputs | |
134 Node* inn = n->in(j); // Get input | |
135 if (inn == NULL) continue; // Ignore NULL, missing inputs | |
136 Block* inb = bbs[inn->_idx]; | |
137 tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order, | |
138 inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth); | |
139 inn->dump(); | |
140 } | |
141 tty->print("Failing node: "); | |
142 n->dump(); | |
143 assert(false, "unscheduable graph"); | |
144 } | |
145 } | |
146 #endif | |
147 | |
148 static Block* find_deepest_input(Node* n, Block_Array &bbs) { | |
149 // Find the last input dominated by all other inputs. | |
150 Block* deepb = NULL; // Deepest block so far | |
151 int deepb_dom_depth = 0; | |
152 for (uint k = 0; k < n->len(); k++) { // For all inputs | |
153 Node* inn = n->in(k); // Get input | |
154 if (inn == NULL) continue; // Ignore NULL, missing inputs | |
155 Block* inb = bbs[inn->_idx]; | |
156 assert(inb != NULL, "must already have scheduled this input"); | |
157 if (deepb_dom_depth < (int) inb->_dom_depth) { | |
158 // The new inb must be dominated by the previous deepb. | |
159 // The various inputs must be linearly ordered in the dom | |
160 // tree, or else there will not be a unique deepest block. | |
161 DEBUG_ONLY(assert_dom(deepb, inb, n, bbs)); | |
162 deepb = inb; // Save deepest block | |
163 deepb_dom_depth = deepb->_dom_depth; | |
164 } | |
165 } | |
166 assert(deepb != NULL, "must be at least one input to n"); | |
167 return deepb; | |
168 } | |
169 | |
170 | |
171 //------------------------------schedule_early--------------------------------- | |
172 // Find the earliest Block any instruction can be placed in. Some instructions | |
173 // are pinned into Blocks. Unpinned instructions can appear in last block in | |
174 // which all their inputs occur. | |
175 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { | |
176 // Allocate stack with enough space to avoid frequent realloc | |
177 Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats | |
178 // roots.push(_root); _root will be processed among C->top() inputs | |
179 roots.push(C->top()); | |
180 visited.set(C->top()->_idx); | |
181 | |
182 while (roots.size() != 0) { | |
183 // Use local variables nstack_top_n & nstack_top_i to cache values | |
184 // on stack's top. | |
185 Node *nstack_top_n = roots.pop(); | |
186 uint nstack_top_i = 0; | |
187 //while_nstack_nonempty: | |
188 while (true) { | |
189 // Get parent node and next input's index from stack's top. | |
190 Node *n = nstack_top_n; | |
191 uint i = nstack_top_i; | |
192 | |
193 if (i == 0) { | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
194 // Fixup some control. Constants without control get attached |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
195 // to root and nodes that use is_block_proj() nodes should be attached |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
196 // to the region that starts their block. |
0 | 197 const Node *in0 = n->in(0); |
198 if (in0 != NULL) { // Control-dependent? | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
199 replace_block_proj_ctrl(n); |
0 | 200 } else { // n->in(0) == NULL |
201 if (n->req() == 1) { // This guy is a constant with NO inputs? | |
202 n->set_req(0, _root); | |
203 } | |
204 } | |
205 } | |
206 | |
207 // First, visit all inputs and force them to get a block. If an | |
208 // input is already in a block we quit following inputs (to avoid | |
209 // cycles). Instead we put that Node on a worklist to be handled | |
210 // later (since IT'S inputs may not have a block yet). | |
211 bool done = true; // Assume all n's inputs will be processed | |
212 while (i < n->len()) { // For all inputs | |
213 Node *in = n->in(i); // Get input | |
214 ++i; | |
215 if (in == NULL) continue; // Ignore NULL, missing inputs | |
216 int is_visited = visited.test_set(in->_idx); | |
217 if (!_bbs.lookup(in->_idx)) { // Missing block selection? | |
218 if (is_visited) { | |
219 // assert( !visited.test(in->_idx), "did not schedule early" ); | |
220 return false; | |
221 } | |
222 nstack.push(n, i); // Save parent node and next input's index. | |
223 nstack_top_n = in; // Process current input now. | |
224 nstack_top_i = 0; | |
225 done = false; // Not all n's inputs processed. | |
226 break; // continue while_nstack_nonempty; | |
227 } else if (!is_visited) { // Input not yet visited? | |
228 roots.push(in); // Visit this guy later, using worklist | |
229 } | |
230 } | |
231 if (done) { | |
232 // All of n's inputs have been processed, complete post-processing. | |
233 | |
234 // Some instructions are pinned into a block. These include Region, | |
235 // Phi, Start, Return, and other control-dependent instructions and | |
236 // any projections which depend on them. | |
237 if (!n->pinned()) { | |
238 // Set earliest legal block. | |
239 _bbs.map(n->_idx, find_deepest_input(n, _bbs)); | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
240 } else { |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
241 assert(_bbs[n->_idx] == _bbs[n->in(0)->_idx], "Pinned Node should be at the same block as its control edge"); |
0 | 242 } |
243 | |
244 if (nstack.is_empty()) { | |
245 // Finished all nodes on stack. | |
246 // Process next node on the worklist 'roots'. | |
247 break; | |
248 } | |
249 // Get saved parent node and next input's index. | |
250 nstack_top_n = nstack.node(); | |
251 nstack_top_i = nstack.index(); | |
252 nstack.pop(); | |
253 } // if (done) | |
254 } // while (true) | |
255 } // while (roots.size() != 0) | |
256 return true; | |
257 } | |
258 | |
259 //------------------------------dom_lca---------------------------------------- | |
260 // Find least common ancestor in dominator tree | |
261 // LCA is a current notion of LCA, to be raised above 'this'. | |
262 // As a convenient boundary condition, return 'this' if LCA is NULL. | |
263 // Find the LCA of those two nodes. | |
264 Block* Block::dom_lca(Block* LCA) { | |
265 if (LCA == NULL || LCA == this) return this; | |
266 | |
267 Block* anc = this; | |
268 while (anc->_dom_depth > LCA->_dom_depth) | |
269 anc = anc->_idom; // Walk up till anc is as high as LCA | |
270 | |
271 while (LCA->_dom_depth > anc->_dom_depth) | |
272 LCA = LCA->_idom; // Walk up till LCA is as high as anc | |
273 | |
274 while (LCA != anc) { // Walk both up till they are the same | |
275 LCA = LCA->_idom; | |
276 anc = anc->_idom; | |
277 } | |
278 | |
279 return LCA; | |
280 } | |
281 | |
282 //--------------------------raise_LCA_above_use-------------------------------- | |
283 // We are placing a definition, and have been given a def->use edge. | |
284 // The definition must dominate the use, so move the LCA upward in the | |
285 // dominator tree to dominate the use. If the use is a phi, adjust | |
286 // the LCA only with the phi input paths which actually use this def. | |
287 static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, Block_Array &bbs) { | |
288 Block* buse = bbs[use->_idx]; | |
289 if (buse == NULL) return LCA; // Unused killing Projs have no use block | |
290 if (!use->is_Phi()) return buse->dom_lca(LCA); | |
291 uint pmax = use->req(); // Number of Phi inputs | |
292 // Why does not this loop just break after finding the matching input to | |
293 // the Phi? Well...it's like this. I do not have true def-use/use-def | |
294 // chains. Means I cannot distinguish, from the def-use direction, which | |
295 // of many use-defs lead from the same use to the same def. That is, this | |
296 // Phi might have several uses of the same def. Each use appears in a | |
297 // different predecessor block. But when I enter here, I cannot distinguish | |
298 // which use-def edge I should find the predecessor block for. So I find | |
299 // them all. Means I do a little extra work if a Phi uses the same value | |
300 // more than once. | |
301 for (uint j=1; j<pmax; j++) { // For all inputs | |
302 if (use->in(j) == def) { // Found matching input? | |
303 Block* pred = bbs[buse->pred(j)->_idx]; | |
304 LCA = pred->dom_lca(LCA); | |
305 } | |
306 } | |
307 return LCA; | |
308 } | |
309 | |
310 //----------------------------raise_LCA_above_marks---------------------------- | |
311 // Return a new LCA that dominates LCA and any of its marked predecessors. | |
312 // Search all my parents up to 'early' (exclusive), looking for predecessors | |
313 // which are marked with the given index. Return the LCA (in the dom tree) | |
314 // of all marked blocks. If there are none marked, return the original | |
315 // LCA. | |
316 static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, | |
317 Block* early, Block_Array &bbs) { | |
318 Block_List worklist; | |
319 worklist.push(LCA); | |
320 while (worklist.size() > 0) { | |
321 Block* mid = worklist.pop(); | |
322 if (mid == early) continue; // stop searching here | |
323 | |
324 // Test and set the visited bit. | |
325 if (mid->raise_LCA_visited() == mark) continue; // already visited | |
326 | |
327 // Don't process the current LCA, otherwise the search may terminate early | |
328 if (mid != LCA && mid->raise_LCA_mark() == mark) { | |
329 // Raise the LCA. | |
330 LCA = mid->dom_lca(LCA); | |
331 if (LCA == early) break; // stop searching everywhere | |
332 assert(early->dominates(LCA), "early is high enough"); | |
333 // Resume searching at that point, skipping intermediate levels. | |
334 worklist.push(LCA); | |
215
273eaa04d9a1
6714694: assertion in 64bit server vm (store->find_edge(load) != -1,"missing precedence edge") with COOPs
kvn
parents:
31
diff
changeset
|
335 if (LCA == mid) |
273eaa04d9a1
6714694: assertion in 64bit server vm (store->find_edge(load) != -1,"missing precedence edge") with COOPs
kvn
parents:
31
diff
changeset
|
336 continue; // Don't mark as visited to avoid early termination. |
0 | 337 } else { |
338 // Keep searching through this block's predecessors. | |
339 for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) { | |
340 Block* mid_parent = bbs[ mid->pred(j)->_idx ]; | |
341 worklist.push(mid_parent); | |
342 } | |
343 } | |
215
273eaa04d9a1
6714694: assertion in 64bit server vm (store->find_edge(load) != -1,"missing precedence edge") with COOPs
kvn
parents:
31
diff
changeset
|
344 mid->set_raise_LCA_visited(mark); |
0 | 345 } |
346 return LCA; | |
347 } | |
348 | |
349 //--------------------------memory_early_block-------------------------------- | |
350 // This is a variation of find_deepest_input, the heart of schedule_early. | |
351 // Find the "early" block for a load, if we considered only memory and | |
352 // address inputs, that is, if other data inputs were ignored. | |
353 // | |
354 // Because a subset of edges are considered, the resulting block will | |
355 // be earlier (at a shallower dom_depth) than the true schedule_early | |
356 // point of the node. We compute this earlier block as a more permissive | |
357 // site for anti-dependency insertion, but only if subsume_loads is enabled. | |
358 static Block* memory_early_block(Node* load, Block* early, Block_Array &bbs) { | |
359 Node* base; | |
360 Node* index; | |
361 Node* store = load->in(MemNode::Memory); | |
362 load->as_Mach()->memory_inputs(base, index); | |
363 | |
364 assert(base != NodeSentinel && index != NodeSentinel, | |
365 "unexpected base/index inputs"); | |
366 | |
367 Node* mem_inputs[4]; | |
368 int mem_inputs_length = 0; | |
369 if (base != NULL) mem_inputs[mem_inputs_length++] = base; | |
370 if (index != NULL) mem_inputs[mem_inputs_length++] = index; | |
371 if (store != NULL) mem_inputs[mem_inputs_length++] = store; | |
372 | |
373 // In the comparision below, add one to account for the control input, | |
374 // which may be null, but always takes up a spot in the in array. | |
375 if (mem_inputs_length + 1 < (int) load->req()) { | |
376 // This "load" has more inputs than just the memory, base and index inputs. | |
377 // For purposes of checking anti-dependences, we need to start | |
378 // from the early block of only the address portion of the instruction, | |
379 // and ignore other blocks that may have factored into the wider | |
380 // schedule_early calculation. | |
381 if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0); | |
382 | |
383 Block* deepb = NULL; // Deepest block so far | |
384 int deepb_dom_depth = 0; | |
385 for (int i = 0; i < mem_inputs_length; i++) { | |
386 Block* inb = bbs[mem_inputs[i]->_idx]; | |
387 if (deepb_dom_depth < (int) inb->_dom_depth) { | |
388 // The new inb must be dominated by the previous deepb. | |
389 // The various inputs must be linearly ordered in the dom | |
390 // tree, or else there will not be a unique deepest block. | |
391 DEBUG_ONLY(assert_dom(deepb, inb, load, bbs)); | |
392 deepb = inb; // Save deepest block | |
393 deepb_dom_depth = deepb->_dom_depth; | |
394 } | |
395 } | |
396 early = deepb; | |
397 } | |
398 | |
399 return early; | |
400 } | |
401 | |
402 //--------------------------insert_anti_dependences--------------------------- | |
403 // A load may need to witness memory that nearby stores can overwrite. | |
404 // For each nearby store, either insert an "anti-dependence" edge | |
405 // from the load to the store, or else move LCA upward to force the | |
406 // load to (eventually) be scheduled in a block above the store. | |
407 // | |
408 // Do not add edges to stores on distinct control-flow paths; | |
409 // only add edges to stores which might interfere. | |
410 // | |
411 // Return the (updated) LCA. There will not be any possibly interfering | |
412 // store between the load's "early block" and the updated LCA. | |
413 // Any stores in the updated LCA will have new precedence edges | |
414 // back to the load. The caller is expected to schedule the load | |
415 // in the LCA, in which case the precedence edges will make LCM | |
416 // preserve anti-dependences. The caller may also hoist the load | |
417 // above the LCA, if it is not the early block. | |
418 Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) { | |
419 assert(load->needs_anti_dependence_check(), "must be a load of some sort"); | |
420 assert(LCA != NULL, ""); | |
421 DEBUG_ONLY(Block* LCA_orig = LCA); | |
422 | |
423 // Compute the alias index. Loads and stores with different alias indices | |
424 // do not need anti-dependence edges. | |
425 uint load_alias_idx = C->get_alias_index(load->adr_type()); | |
426 #ifdef ASSERT | |
427 if (load_alias_idx == Compile::AliasIdxBot && C->AliasLevel() > 0 && | |
428 (PrintOpto || VerifyAliases || | |
429 PrintMiscellaneous && (WizardMode || Verbose))) { | |
430 // Load nodes should not consume all of memory. | |
431 // Reporting a bottom type indicates a bug in adlc. | |
432 // If some particular type of node validly consumes all of memory, | |
433 // sharpen the preceding "if" to exclude it, so we can catch bugs here. | |
434 tty->print_cr("*** Possible Anti-Dependence Bug: Load consumes all of memory."); | |
435 load->dump(2); | |
436 if (VerifyAliases) assert(load_alias_idx != Compile::AliasIdxBot, ""); | |
437 } | |
438 #endif | |
439 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp), | |
440 "String compare is only known 'load' that does not conflict with any stores"); | |
681 | 441 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrEquals), |
442 "String equals is a 'load' that does not conflict with any stores"); | |
443 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOf), | |
444 "String indexOf is a 'load' that does not conflict with any stores"); | |
445 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_AryEq), | |
446 "Arrays equals is a 'load' that do not conflict with any stores"); | |
0 | 447 |
448 if (!C->alias_type(load_alias_idx)->is_rewritable()) { | |
449 // It is impossible to spoil this load by putting stores before it, | |
450 // because we know that the stores will never update the value | |
451 // which 'load' must witness. | |
452 return LCA; | |
453 } | |
454 | |
455 node_idx_t load_index = load->_idx; | |
456 | |
457 // Note the earliest legal placement of 'load', as determined by | |
458 // by the unique point in the dom tree where all memory effects | |
459 // and other inputs are first available. (Computed by schedule_early.) | |
460 // For normal loads, 'early' is the shallowest place (dom graph wise) | |
461 // to look for anti-deps between this load and any store. | |
462 Block* early = _bbs[load_index]; | |
463 | |
464 // If we are subsuming loads, compute an "early" block that only considers | |
465 // memory or address inputs. This block may be different than the | |
466 // schedule_early block in that it could be at an even shallower depth in the | |
467 // dominator tree, and allow for a broader discovery of anti-dependences. | |
468 if (C->subsume_loads()) { | |
469 early = memory_early_block(load, early, _bbs); | |
470 } | |
471 | |
472 ResourceArea *area = Thread::current()->resource_area(); | |
473 Node_List worklist_mem(area); // prior memory state to store | |
474 Node_List worklist_store(area); // possible-def to explore | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
475 Node_List worklist_visited(area); // visited mergemem nodes |
0 | 476 Node_List non_early_stores(area); // all relevant stores outside of early |
477 bool must_raise_LCA = false; | |
478 | |
479 #ifdef TRACK_PHI_INPUTS | |
480 // %%% This extra checking fails because MergeMem nodes are not GVNed. | |
481 // Provide "phi_inputs" to check if every input to a PhiNode is from the | |
482 // original memory state. This indicates a PhiNode for which should not | |
483 // prevent the load from sinking. For such a block, set_raise_LCA_mark | |
484 // may be overly conservative. | |
485 // Mechanism: count inputs seen for each Phi encountered in worklist_store. | |
486 DEBUG_ONLY(GrowableArray<uint> phi_inputs(area, C->unique(),0,0)); | |
487 #endif | |
488 | |
489 // 'load' uses some memory state; look for users of the same state. | |
490 // Recurse through MergeMem nodes to the stores that use them. | |
491 | |
492 // Each of these stores is a possible definition of memory | |
493 // that 'load' needs to use. We need to force 'load' | |
494 // to occur before each such store. When the store is in | |
495 // the same block as 'load', we insert an anti-dependence | |
496 // edge load->store. | |
497 | |
498 // The relevant stores "nearby" the load consist of a tree rooted | |
499 // at initial_mem, with internal nodes of type MergeMem. | |
500 // Therefore, the branches visited by the worklist are of this form: | |
501 // initial_mem -> (MergeMem ->)* store | |
502 // The anti-dependence constraints apply only to the fringe of this tree. | |
503 | |
504 Node* initial_mem = load->in(MemNode::Memory); | |
505 worklist_store.push(initial_mem); | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
506 worklist_visited.push(initial_mem); |
0 | 507 worklist_mem.push(NULL); |
508 while (worklist_store.size() > 0) { | |
509 // Examine a nearby store to see if it might interfere with our load. | |
510 Node* mem = worklist_mem.pop(); | |
511 Node* store = worklist_store.pop(); | |
512 uint op = store->Opcode(); | |
513 | |
514 // MergeMems do not directly have anti-deps. | |
515 // Treat them as internal nodes in a forward tree of memory states, | |
516 // the leaves of which are each a 'possible-def'. | |
517 if (store == initial_mem // root (exclusive) of tree we are searching | |
518 || op == Op_MergeMem // internal node of tree we are searching | |
519 ) { | |
520 mem = store; // It's not a possibly interfering store. | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
521 if (store == initial_mem) |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
522 initial_mem = NULL; // only process initial memory once |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
523 |
0 | 524 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { |
525 store = mem->fast_out(i); | |
526 if (store->is_MergeMem()) { | |
527 // Be sure we don't get into combinatorial problems. | |
528 // (Allow phis to be repeated; they can merge two relevant states.) | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
529 uint j = worklist_visited.size(); |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
530 for (; j > 0; j--) { |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
531 if (worklist_visited.at(j-1) == store) break; |
0 | 532 } |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
533 if (j > 0) continue; // already on work list; do not repeat |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
534 worklist_visited.push(store); |
0 | 535 } |
536 worklist_mem.push(mem); | |
537 worklist_store.push(store); | |
538 } | |
539 continue; | |
540 } | |
541 | |
542 if (op == Op_MachProj || op == Op_Catch) continue; | |
543 if (store->needs_anti_dependence_check()) continue; // not really a store | |
544 | |
545 // Compute the alias index. Loads and stores with different alias | |
546 // indices do not need anti-dependence edges. Wide MemBar's are | |
547 // anti-dependent on everything (except immutable memories). | |
548 const TypePtr* adr_type = store->adr_type(); | |
549 if (!C->can_alias(adr_type, load_alias_idx)) continue; | |
550 | |
551 // Most slow-path runtime calls do NOT modify Java memory, but | |
552 // they can block and so write Raw memory. | |
553 if (store->is_Mach()) { | |
554 MachNode* mstore = store->as_Mach(); | |
555 if (load_alias_idx != Compile::AliasIdxRaw) { | |
556 // Check for call into the runtime using the Java calling | |
557 // convention (and from there into a wrapper); it has no | |
558 // _method. Can't do this optimization for Native calls because | |
559 // they CAN write to Java memory. | |
560 if (mstore->ideal_Opcode() == Op_CallStaticJava) { | |
561 assert(mstore->is_MachSafePoint(), ""); | |
562 MachSafePointNode* ms = (MachSafePointNode*) mstore; | |
563 assert(ms->is_MachCallJava(), ""); | |
564 MachCallJavaNode* mcj = (MachCallJavaNode*) ms; | |
565 if (mcj->_method == NULL) { | |
566 // These runtime calls do not write to Java visible memory | |
567 // (other than Raw) and so do not require anti-dependence edges. | |
568 continue; | |
569 } | |
570 } | |
571 // Same for SafePoints: they read/write Raw but only read otherwise. | |
572 // This is basically a workaround for SafePoints only defining control | |
573 // instead of control + memory. | |
574 if (mstore->ideal_Opcode() == Op_SafePoint) | |
575 continue; | |
576 } else { | |
577 // Some raw memory, such as the load of "top" at an allocation, | |
578 // can be control dependent on the previous safepoint. See | |
579 // comments in GraphKit::allocate_heap() about control input. | |
580 // Inserting an anti-dep between such a safepoint and a use | |
581 // creates a cycle, and will cause a subsequent failure in | |
582 // local scheduling. (BugId 4919904) | |
583 // (%%% How can a control input be a safepoint and not a projection??) | |
584 if (mstore->ideal_Opcode() == Op_SafePoint && load->in(0) == mstore) | |
585 continue; | |
586 } | |
587 } | |
588 | |
589 // Identify a block that the current load must be above, | |
590 // or else observe that 'store' is all the way up in the | |
591 // earliest legal block for 'load'. In the latter case, | |
592 // immediately insert an anti-dependence edge. | |
593 Block* store_block = _bbs[store->_idx]; | |
594 assert(store_block != NULL, "unused killing projections skipped above"); | |
595 | |
596 if (store->is_Phi()) { | |
597 // 'load' uses memory which is one (or more) of the Phi's inputs. | |
598 // It must be scheduled not before the Phi, but rather before | |
599 // each of the relevant Phi inputs. | |
600 // | |
601 // Instead of finding the LCA of all inputs to a Phi that match 'mem', | |
602 // we mark each corresponding predecessor block and do a combined | |
603 // hoisting operation later (raise_LCA_above_marks). | |
604 // | |
605 // Do not assert(store_block != early, "Phi merging memory after access") | |
606 // PhiNode may be at start of block 'early' with backedge to 'early' | |
607 DEBUG_ONLY(bool found_match = false); | |
608 for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) { | |
609 if (store->in(j) == mem) { // Found matching input? | |
610 DEBUG_ONLY(found_match = true); | |
611 Block* pred_block = _bbs[store_block->pred(j)->_idx]; | |
612 if (pred_block != early) { | |
613 // If any predecessor of the Phi matches the load's "early block", | |
614 // we do not need a precedence edge between the Phi and 'load' | |
605 | 615 // since the load will be forced into a block preceding the Phi. |
0 | 616 pred_block->set_raise_LCA_mark(load_index); |
617 assert(!LCA_orig->dominates(pred_block) || | |
618 early->dominates(pred_block), "early is high enough"); | |
619 must_raise_LCA = true; | |
788 | 620 } else { |
621 // anti-dependent upon PHI pinned below 'early', no edge needed | |
622 LCA = early; // but can not schedule below 'early' | |
0 | 623 } |
624 } | |
625 } | |
626 assert(found_match, "no worklist bug"); | |
627 #ifdef TRACK_PHI_INPUTS | |
628 #ifdef ASSERT | |
629 // This assert asks about correct handling of PhiNodes, which may not | |
630 // have all input edges directly from 'mem'. See BugId 4621264 | |
631 int num_mem_inputs = phi_inputs.at_grow(store->_idx,0) + 1; | |
632 // Increment by exactly one even if there are multiple copies of 'mem' | |
633 // coming into the phi, because we will run this block several times | |
634 // if there are several copies of 'mem'. (That's how DU iterators work.) | |
635 phi_inputs.at_put(store->_idx, num_mem_inputs); | |
636 assert(PhiNode::Input + num_mem_inputs < store->req(), | |
637 "Expect at least one phi input will not be from original memory state"); | |
638 #endif //ASSERT | |
639 #endif //TRACK_PHI_INPUTS | |
640 } else if (store_block != early) { | |
641 // 'store' is between the current LCA and earliest possible block. | |
642 // Label its block, and decide later on how to raise the LCA | |
643 // to include the effect on LCA of this store. | |
644 // If this store's block gets chosen as the raised LCA, we | |
645 // will find him on the non_early_stores list and stick him | |
646 // with a precedence edge. | |
647 // (But, don't bother if LCA is already raised all the way.) | |
648 if (LCA != early) { | |
649 store_block->set_raise_LCA_mark(load_index); | |
650 must_raise_LCA = true; | |
651 non_early_stores.push(store); | |
652 } | |
653 } else { | |
654 // Found a possibly-interfering store in the load's 'early' block. | |
655 // This means 'load' cannot sink at all in the dominator tree. | |
656 // Add an anti-dep edge, and squeeze 'load' into the highest block. | |
657 assert(store != load->in(0), "dependence cycle found"); | |
658 if (verify) { | |
659 assert(store->find_edge(load) != -1, "missing precedence edge"); | |
660 } else { | |
661 store->add_prec(load); | |
662 } | |
663 LCA = early; | |
664 // This turns off the process of gathering non_early_stores. | |
665 } | |
666 } | |
667 // (Worklist is now empty; all nearby stores have been visited.) | |
668 | |
669 // Finished if 'load' must be scheduled in its 'early' block. | |
670 // If we found any stores there, they have already been given | |
671 // precedence edges. | |
672 if (LCA == early) return LCA; | |
673 | |
674 // We get here only if there are no possibly-interfering stores | |
675 // in the load's 'early' block. Move LCA up above all predecessors | |
676 // which contain stores we have noted. | |
677 // | |
678 // The raised LCA block can be a home to such interfering stores, | |
679 // but its predecessors must not contain any such stores. | |
680 // | |
681 // The raised LCA will be a lower bound for placing the load, | |
682 // preventing the load from sinking past any block containing | |
683 // a store that may invalidate the memory state required by 'load'. | |
684 if (must_raise_LCA) | |
685 LCA = raise_LCA_above_marks(LCA, load->_idx, early, _bbs); | |
686 if (LCA == early) return LCA; | |
687 | |
688 // Insert anti-dependence edges from 'load' to each store | |
689 // in the non-early LCA block. | |
690 // Mine the non_early_stores list for such stores. | |
691 if (LCA->raise_LCA_mark() == load_index) { | |
692 while (non_early_stores.size() > 0) { | |
693 Node* store = non_early_stores.pop(); | |
694 Block* store_block = _bbs[store->_idx]; | |
695 if (store_block == LCA) { | |
696 // add anti_dependence from store to load in its own block | |
697 assert(store != load->in(0), "dependence cycle found"); | |
698 if (verify) { | |
699 assert(store->find_edge(load) != -1, "missing precedence edge"); | |
700 } else { | |
701 store->add_prec(load); | |
702 } | |
703 } else { | |
704 assert(store_block->raise_LCA_mark() == load_index, "block was marked"); | |
705 // Any other stores we found must be either inside the new LCA | |
706 // or else outside the original LCA. In the latter case, they | |
707 // did not interfere with any use of 'load'. | |
708 assert(LCA->dominates(store_block) | |
709 || !LCA_orig->dominates(store_block), "no stray stores"); | |
710 } | |
711 } | |
712 } | |
713 | |
714 // Return the highest block containing stores; any stores | |
715 // within that block have been given anti-dependence edges. | |
716 return LCA; | |
717 } | |
718 | |
719 // This class is used to iterate backwards over the nodes in the graph. | |
720 | |
721 class Node_Backward_Iterator { | |
722 | |
723 private: | |
724 Node_Backward_Iterator(); | |
725 | |
726 public: | |
727 // Constructor for the iterator | |
728 Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs); | |
729 | |
730 // Postincrement operator to iterate over the nodes | |
731 Node *next(); | |
732 | |
733 private: | |
734 VectorSet &_visited; | |
735 Node_List &_stack; | |
736 Block_Array &_bbs; | |
737 }; | |
738 | |
739 // Constructor for the Node_Backward_Iterator | |
740 Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs ) | |
741 : _visited(visited), _stack(stack), _bbs(bbs) { | |
742 // The stack should contain exactly the root | |
743 stack.clear(); | |
744 stack.push(root); | |
745 | |
746 // Clear the visited bits | |
747 visited.Clear(); | |
748 } | |
749 | |
750 // Iterator for the Node_Backward_Iterator | |
751 Node *Node_Backward_Iterator::next() { | |
752 | |
753 // If the _stack is empty, then just return NULL: finished. | |
754 if ( !_stack.size() ) | |
755 return NULL; | |
756 | |
757 // '_stack' is emulating a real _stack. The 'visit-all-users' loop has been | |
758 // made stateless, so I do not need to record the index 'i' on my _stack. | |
759 // Instead I visit all users each time, scanning for unvisited users. | |
760 // I visit unvisited not-anti-dependence users first, then anti-dependent | |
761 // children next. | |
762 Node *self = _stack.pop(); | |
763 | |
764 // I cycle here when I am entering a deeper level of recursion. | |
765 // The key variable 'self' was set prior to jumping here. | |
766 while( 1 ) { | |
767 | |
768 _visited.set(self->_idx); | |
769 | |
770 // Now schedule all uses as late as possible. | |
771 uint src = self->is_Proj() ? self->in(0)->_idx : self->_idx; | |
772 uint src_rpo = _bbs[src]->_rpo; | |
773 | |
774 // Schedule all nodes in a post-order visit | |
775 Node *unvisited = NULL; // Unvisited anti-dependent Node, if any | |
776 | |
777 // Scan for unvisited nodes | |
778 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { | |
779 // For all uses, schedule late | |
780 Node* n = self->fast_out(i); // Use | |
781 | |
782 // Skip already visited children | |
783 if ( _visited.test(n->_idx) ) | |
784 continue; | |
785 | |
786 // do not traverse backward control edges | |
787 Node *use = n->is_Proj() ? n->in(0) : n; | |
788 uint use_rpo = _bbs[use->_idx]->_rpo; | |
789 | |
790 if ( use_rpo < src_rpo ) | |
791 continue; | |
792 | |
793 // Phi nodes always precede uses in a basic block | |
794 if ( use_rpo == src_rpo && use->is_Phi() ) | |
795 continue; | |
796 | |
797 unvisited = n; // Found unvisited | |
798 | |
799 // Check for possible-anti-dependent | |
800 if( !n->needs_anti_dependence_check() ) | |
801 break; // Not visited, not anti-dep; schedule it NOW | |
802 } | |
803 | |
804 // Did I find an unvisited not-anti-dependent Node? | |
805 if ( !unvisited ) | |
806 break; // All done with children; post-visit 'self' | |
807 | |
808 // Visit the unvisited Node. Contains the obvious push to | |
809 // indicate I'm entering a deeper level of recursion. I push the | |
810 // old state onto the _stack and set a new state and loop (recurse). | |
811 _stack.push(self); | |
812 self = unvisited; | |
813 } // End recursion loop | |
814 | |
815 return self; | |
816 } | |
817 | |
818 //------------------------------ComputeLatenciesBackwards---------------------- | |
819 // Compute the latency of all the instructions. | |
820 void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) { | |
821 #ifndef PRODUCT | |
822 if (trace_opto_pipelining()) | |
823 tty->print("\n#---- ComputeLatenciesBackwards ----\n"); | |
824 #endif | |
825 | |
826 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); | |
827 Node *n; | |
828 | |
829 // Walk over all the nodes from last to first | |
830 while (n = iter.next()) { | |
831 // Set the latency for the definitions of this instruction | |
832 partial_latency_of_defs(n); | |
833 } | |
834 } // end ComputeLatenciesBackwards | |
835 | |
836 //------------------------------partial_latency_of_defs------------------------ | |
837 // Compute the latency impact of this node on all defs. This computes | |
838 // a number that increases as we approach the beginning of the routine. | |
839 void PhaseCFG::partial_latency_of_defs(Node *n) { | |
840 // Set the latency for this instruction | |
841 #ifndef PRODUCT | |
842 if (trace_opto_pipelining()) { | |
843 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", | |
1685 | 844 n->_idx, _node_latency->at_grow(n->_idx)); |
0 | 845 dump(); |
846 } | |
847 #endif | |
848 | |
849 if (n->is_Proj()) | |
850 n = n->in(0); | |
851 | |
852 if (n->is_Root()) | |
853 return; | |
854 | |
855 uint nlen = n->len(); | |
1685 | 856 uint use_latency = _node_latency->at_grow(n->_idx); |
0 | 857 uint use_pre_order = _bbs[n->_idx]->_pre_order; |
858 | |
859 for ( uint j=0; j<nlen; j++ ) { | |
860 Node *def = n->in(j); | |
861 | |
862 if (!def || def == n) | |
863 continue; | |
864 | |
865 // Walk backwards thru projections | |
866 if (def->is_Proj()) | |
867 def = def->in(0); | |
868 | |
869 #ifndef PRODUCT | |
870 if (trace_opto_pipelining()) { | |
871 tty->print("# in(%2d): ", j); | |
872 def->dump(); | |
873 } | |
874 #endif | |
875 | |
876 // If the defining block is not known, assume it is ok | |
877 Block *def_block = _bbs[def->_idx]; | |
878 uint def_pre_order = def_block ? def_block->_pre_order : 0; | |
879 | |
880 if ( (use_pre_order < def_pre_order) || | |
881 (use_pre_order == def_pre_order && n->is_Phi()) ) | |
882 continue; | |
883 | |
884 uint delta_latency = n->latency(j); | |
885 uint current_latency = delta_latency + use_latency; | |
886 | |
1685 | 887 if (_node_latency->at_grow(def->_idx) < current_latency) { |
888 _node_latency->at_put_grow(def->_idx, current_latency); | |
0 | 889 } |
890 | |
891 #ifndef PRODUCT | |
892 if (trace_opto_pipelining()) { | |
893 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", | |
894 use_latency, j, delta_latency, current_latency, def->_idx, | |
1685 | 895 _node_latency->at_grow(def->_idx)); |
0 | 896 } |
897 #endif | |
898 } | |
899 } | |
900 | |
901 //------------------------------latency_from_use------------------------------- | |
902 // Compute the latency of a specific use | |
903 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) { | |
904 // If self-reference, return no latency | |
905 if (use == n || use->is_Root()) | |
906 return 0; | |
907 | |
908 uint def_pre_order = _bbs[def->_idx]->_pre_order; | |
909 uint latency = 0; | |
910 | |
911 // If the use is not a projection, then it is simple... | |
912 if (!use->is_Proj()) { | |
913 #ifndef PRODUCT | |
914 if (trace_opto_pipelining()) { | |
915 tty->print("# out(): "); | |
916 use->dump(); | |
917 } | |
918 #endif | |
919 | |
920 uint use_pre_order = _bbs[use->_idx]->_pre_order; | |
921 | |
922 if (use_pre_order < def_pre_order) | |
923 return 0; | |
924 | |
925 if (use_pre_order == def_pre_order && use->is_Phi()) | |
926 return 0; | |
927 | |
928 uint nlen = use->len(); | |
1685 | 929 uint nl = _node_latency->at_grow(use->_idx); |
0 | 930 |
931 for ( uint j=0; j<nlen; j++ ) { | |
932 if (use->in(j) == n) { | |
933 // Change this if we want local latencies | |
934 uint ul = use->latency(j); | |
935 uint l = ul + nl; | |
936 if (latency < l) latency = l; | |
937 #ifndef PRODUCT | |
938 if (trace_opto_pipelining()) { | |
939 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, latency = %d", | |
940 nl, j, ul, l, latency); | |
941 } | |
942 #endif | |
943 } | |
944 } | |
945 } else { | |
946 // This is a projection, just grab the latency of the use(s) | |
947 for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) { | |
948 uint l = latency_from_use(use, def, use->fast_out(j)); | |
949 if (latency < l) latency = l; | |
950 } | |
951 } | |
952 | |
953 return latency; | |
954 } | |
955 | |
956 //------------------------------latency_from_uses------------------------------ | |
957 // Compute the latency of this instruction relative to all of it's uses. | |
958 // This computes a number that increases as we approach the beginning of the | |
959 // routine. | |
960 void PhaseCFG::latency_from_uses(Node *n) { | |
961 // Set the latency for this instruction | |
962 #ifndef PRODUCT | |
963 if (trace_opto_pipelining()) { | |
964 tty->print("# latency_from_outputs: node_latency[%d] = %d for node", | |
1685 | 965 n->_idx, _node_latency->at_grow(n->_idx)); |
0 | 966 dump(); |
967 } | |
968 #endif | |
969 uint latency=0; | |
970 const Node *def = n->is_Proj() ? n->in(0): n; | |
971 | |
972 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |
973 uint l = latency_from_use(n, def, n->fast_out(i)); | |
974 | |
975 if (latency < l) latency = l; | |
976 } | |
977 | |
1685 | 978 _node_latency->at_put_grow(n->_idx, latency); |
0 | 979 } |
980 | |
981 //------------------------------hoist_to_cheaper_block------------------------- | |
982 // Pick a block for node self, between early and LCA, that is a cheaper | |
983 // alternative to LCA. | |
984 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { | |
985 const double delta = 1+PROB_UNLIKELY_MAG(4); | |
986 Block* least = LCA; | |
987 double least_freq = least->_freq; | |
1685 | 988 uint target = _node_latency->at_grow(self->_idx); |
989 uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx); | |
990 uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx); | |
0 | 991 bool in_latency = (target <= start_latency); |
992 const Block* root_block = _bbs[_root->_idx]; | |
993 | |
994 // Turn off latency scheduling if scheduling is just plain off | |
995 if (!C->do_scheduling()) | |
996 in_latency = true; | |
997 | |
998 // Do not hoist (to cover latency) instructions which target a | |
999 // single register. Hoisting stretches the live range of the | |
1000 // single register and may force spilling. | |
1001 MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL; | |
1002 if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty()) | |
1003 in_latency = true; | |
1004 | |
1005 #ifndef PRODUCT | |
1006 if (trace_opto_pipelining()) { | |
1007 tty->print("# Find cheaper block for latency %d: ", | |
1685 | 1008 _node_latency->at_grow(self->_idx)); |
0 | 1009 self->dump(); |
1010 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | |
1011 LCA->_pre_order, | |
1012 LCA->_nodes[0]->_idx, | |
1013 start_latency, | |
1014 LCA->_nodes[LCA->end_idx()]->_idx, | |
1015 end_latency, | |
1016 least_freq); | |
1017 } | |
1018 #endif | |
1019 | |
1020 // Walk up the dominator tree from LCA (Lowest common ancestor) to | |
1021 // the earliest legal location. Capture the least execution frequency. | |
1022 while (LCA != early) { | |
1023 LCA = LCA->_idom; // Follow up the dominator tree | |
1024 | |
1025 if (LCA == NULL) { | |
1026 // Bailout without retry | |
1027 C->record_method_not_compilable("late schedule failed: LCA == NULL"); | |
1028 return least; | |
1029 } | |
1030 | |
1031 // Don't hoist machine instructions to the root basic block | |
1032 if (mach && LCA == root_block) | |
1033 break; | |
1034 | |
1685 | 1035 uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx); |
0 | 1036 uint end_idx = LCA->end_idx(); |
1685 | 1037 uint end_lat = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx); |
0 | 1038 double LCA_freq = LCA->_freq; |
1039 #ifndef PRODUCT | |
1040 if (trace_opto_pipelining()) { | |
1041 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | |
1042 LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq); | |
1043 } | |
1044 #endif | |
1045 if (LCA_freq < least_freq || // Better Frequency | |
1046 ( !in_latency && // No block containing latency | |
1047 LCA_freq < least_freq * delta && // No worse frequency | |
1048 target >= end_lat && // within latency range | |
1049 !self->is_iteratively_computed() ) // But don't hoist IV increments | |
1050 // because they may end up above other uses of their phi forcing | |
1051 // their result register to be different from their input. | |
1052 ) { | |
1053 least = LCA; // Found cheaper block | |
1054 least_freq = LCA_freq; | |
1055 start_latency = start_lat; | |
1056 end_latency = end_lat; | |
1057 if (target <= start_lat) | |
1058 in_latency = true; | |
1059 } | |
1060 } | |
1061 | |
1062 #ifndef PRODUCT | |
1063 if (trace_opto_pipelining()) { | |
1064 tty->print_cr("# Choose block B%d with start latency=%d and freq=%g", | |
1065 least->_pre_order, start_latency, least_freq); | |
1066 } | |
1067 #endif | |
1068 | |
1069 // See if the latency needs to be updated | |
1070 if (target < end_latency) { | |
1071 #ifndef PRODUCT | |
1072 if (trace_opto_pipelining()) { | |
1073 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); | |
1074 } | |
1075 #endif | |
1685 | 1076 _node_latency->at_put_grow(self->_idx, end_latency); |
0 | 1077 partial_latency_of_defs(self); |
1078 } | |
1079 | |
1080 return least; | |
1081 } | |
1082 | |
1083 | |
1084 //------------------------------schedule_late----------------------------------- | |
1085 // Now schedule all codes as LATE as possible. This is the LCA in the | |
1086 // dominator tree of all USES of a value. Pick the block with the least | |
1087 // loop nesting depth that is lowest in the dominator tree. | |
1088 extern const char must_clone[]; | |
1089 void PhaseCFG::schedule_late(VectorSet &visited, Node_List &stack) { | |
1090 #ifndef PRODUCT | |
1091 if (trace_opto_pipelining()) | |
1092 tty->print("\n#---- schedule_late ----\n"); | |
1093 #endif | |
1094 | |
1095 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); | |
1096 Node *self; | |
1097 | |
1098 // Walk over all the nodes from last to first | |
1099 while (self = iter.next()) { | |
1100 Block* early = _bbs[self->_idx]; // Earliest legal placement | |
1101 | |
1102 if (self->is_top()) { | |
1103 // Top node goes in bb #2 with other constants. | |
1104 // It must be special-cased, because it has no out edges. | |
1105 early->add_inst(self); | |
1106 continue; | |
1107 } | |
1108 | |
1109 // No uses, just terminate | |
1110 if (self->outcnt() == 0) { | |
1111 assert(self->Opcode() == Op_MachProj, "sanity"); | |
1112 continue; // Must be a dead machine projection | |
1113 } | |
1114 | |
1115 // If node is pinned in the block, then no scheduling can be done. | |
1116 if( self->pinned() ) // Pinned in block? | |
1117 continue; | |
1118 | |
1119 MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL; | |
1120 if (mach) { | |
1121 switch (mach->ideal_Opcode()) { | |
1122 case Op_CreateEx: | |
1123 // Don't move exception creation | |
1124 early->add_inst(self); | |
1125 continue; | |
1126 break; | |
1127 case Op_CheckCastPP: | |
1128 // Don't move CheckCastPP nodes away from their input, if the input | |
1129 // is a rawptr (5071820). | |
1130 Node *def = self->in(1); | |
1131 if (def != NULL && def->bottom_type()->base() == Type::RawPtr) { | |
1132 early->add_inst(self); | |
833
acba6af809c8
6840775: Multiple JVM crashes seen with 1.6.0_10 through 1.6.0_14
kvn
parents:
788
diff
changeset
|
1133 #ifdef ASSERT |
acba6af809c8
6840775: Multiple JVM crashes seen with 1.6.0_10 through 1.6.0_14
kvn
parents:
788
diff
changeset
|
1134 _raw_oops.push(def); |
acba6af809c8
6840775: Multiple JVM crashes seen with 1.6.0_10 through 1.6.0_14
kvn
parents:
788
diff
changeset
|
1135 #endif |
0 | 1136 continue; |
1137 } | |
1138 break; | |
1139 } | |
1140 } | |
1141 | |
1142 // Gather LCA of all uses | |
1143 Block *LCA = NULL; | |
1144 { | |
1145 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { | |
1146 // For all uses, find LCA | |
1147 Node* use = self->fast_out(i); | |
1148 LCA = raise_LCA_above_use(LCA, use, self, _bbs); | |
1149 } | |
1150 } // (Hide defs of imax, i from rest of block.) | |
1151 | |
1152 // Place temps in the block of their use. This isn't a | |
1153 // requirement for correctness but it reduces useless | |
1154 // interference between temps and other nodes. | |
1155 if (mach != NULL && mach->is_MachTemp()) { | |
1156 _bbs.map(self->_idx, LCA); | |
1157 LCA->add_inst(self); | |
1158 continue; | |
1159 } | |
1160 | |
1161 // Check if 'self' could be anti-dependent on memory | |
1162 if (self->needs_anti_dependence_check()) { | |
1163 // Hoist LCA above possible-defs and insert anti-dependences to | |
1164 // defs in new LCA block. | |
1165 LCA = insert_anti_dependences(LCA, self); | |
1166 } | |
1167 | |
1168 if (early->_dom_depth > LCA->_dom_depth) { | |
1169 // Somehow the LCA has moved above the earliest legal point. | |
1170 // (One way this can happen is via memory_early_block.) | |
1171 if (C->subsume_loads() == true && !C->failing()) { | |
1172 // Retry with subsume_loads == false | |
1173 // If this is the first failure, the sentinel string will "stick" | |
1174 // to the Compile object, and the C2Compiler will see it and retry. | |
1175 C->record_failure(C2Compiler::retry_no_subsuming_loads()); | |
1176 } else { | |
1177 // Bailout without retry when (early->_dom_depth > LCA->_dom_depth) | |
1178 C->record_method_not_compilable("late schedule failed: incorrect graph"); | |
1179 } | |
1180 return; | |
1181 } | |
1182 | |
1183 // If there is no opportunity to hoist, then we're done. | |
1184 bool try_to_hoist = (LCA != early); | |
1185 | |
1186 // Must clone guys stay next to use; no hoisting allowed. | |
1187 // Also cannot hoist guys that alter memory or are otherwise not | |
1188 // allocatable (hoisting can make a value live longer, leading to | |
1189 // anti and output dependency problems which are normally resolved | |
1190 // by the register allocator giving everyone a different register). | |
1191 if (mach != NULL && must_clone[mach->ideal_Opcode()]) | |
1192 try_to_hoist = false; | |
1193 | |
1194 Block* late = NULL; | |
1195 if (try_to_hoist) { | |
1196 // Now find the block with the least execution frequency. | |
1197 // Start at the latest schedule and work up to the earliest schedule | |
1198 // in the dominator tree. Thus the Node will dominate all its uses. | |
1199 late = hoist_to_cheaper_block(LCA, early, self); | |
1200 } else { | |
1201 // Just use the LCA of the uses. | |
1202 late = LCA; | |
1203 } | |
1204 | |
1205 // Put the node into target block | |
1206 schedule_node_into_block(self, late); | |
1207 | |
1208 #ifdef ASSERT | |
1209 if (self->needs_anti_dependence_check()) { | |
1210 // since precedence edges are only inserted when we're sure they | |
1211 // are needed make sure that after placement in a block we don't | |
1212 // need any new precedence edges. | |
1213 verify_anti_dependences(late, self); | |
1214 } | |
1215 #endif | |
1216 } // Loop until all nodes have been visited | |
1217 | |
1218 } // end ScheduleLate | |
1219 | |
1220 //------------------------------GlobalCodeMotion------------------------------- | |
1221 void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) { | |
1222 ResourceMark rm; | |
1223 | |
1224 #ifndef PRODUCT | |
1225 if (trace_opto_pipelining()) { | |
1226 tty->print("\n---- Start GlobalCodeMotion ----\n"); | |
1227 } | |
1228 #endif | |
1229 | |
1230 // Initialize the bbs.map for things on the proj_list | |
1231 uint i; | |
1232 for( i=0; i < proj_list.size(); i++ ) | |
1233 _bbs.map(proj_list[i]->_idx, NULL); | |
1234 | |
1235 // Set the basic block for Nodes pinned into blocks | |
1236 Arena *a = Thread::current()->resource_area(); | |
1237 VectorSet visited(a); | |
1238 schedule_pinned_nodes( visited ); | |
1239 | |
1240 // Find the earliest Block any instruction can be placed in. Some | |
1241 // instructions are pinned into Blocks. Unpinned instructions can | |
1242 // appear in last block in which all their inputs occur. | |
1243 visited.Clear(); | |
1244 Node_List stack(a); | |
1245 stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list | |
1246 if (!schedule_early(visited, stack)) { | |
1247 // Bailout without retry | |
1248 C->record_method_not_compilable("early schedule failed"); | |
1249 return; | |
1250 } | |
1251 | |
1252 // Build Def-Use edges. | |
1253 proj_list.push(_root); // Add real root as another root | |
1254 proj_list.pop(); | |
1255 | |
1256 // Compute the latency information (via backwards walk) for all the | |
1257 // instructions in the graph | |
1685 | 1258 _node_latency = new GrowableArray<uint>(); // resource_area allocation |
0 | 1259 |
1260 if( C->do_scheduling() ) | |
1261 ComputeLatenciesBackwards(visited, stack); | |
1262 | |
1263 // Now schedule all codes as LATE as possible. This is the LCA in the | |
1264 // dominator tree of all USES of a value. Pick the block with the least | |
1265 // loop nesting depth that is lowest in the dominator tree. | |
1266 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) | |
1267 schedule_late(visited, stack); | |
1268 if( C->failing() ) { | |
1269 // schedule_late fails only when graph is incorrect. | |
1270 assert(!VerifyGraphEdges, "verification should have failed"); | |
1271 return; | |
1272 } | |
1273 | |
1274 unique = C->unique(); | |
1275 | |
1276 #ifndef PRODUCT | |
1277 if (trace_opto_pipelining()) { | |
1278 tty->print("\n---- Detect implicit null checks ----\n"); | |
1279 } | |
1280 #endif | |
1281 | |
1282 // Detect implicit-null-check opportunities. Basically, find NULL checks | |
1283 // with suitable memory ops nearby. Use the memory op to do the NULL check. | |
1284 // I can generate a memory op if there is not one nearby. | |
1285 if (C->is_method_compilation()) { | |
1286 // Don't do it for natives, adapters, or runtime stubs | |
1287 int allowed_reasons = 0; | |
1288 // ...and don't do it when there have been too many traps, globally. | |
1289 for (int reason = (int)Deoptimization::Reason_none+1; | |
1290 reason < Compile::trapHistLength; reason++) { | |
1291 assert(reason < BitsPerInt, "recode bit map"); | |
1292 if (!C->too_many_traps((Deoptimization::DeoptReason) reason)) | |
1293 allowed_reasons |= nth_bit(reason); | |
1294 } | |
1295 // By reversing the loop direction we get a very minor gain on mpegaudio. | |
1296 // Feel free to revert to a forward loop for clarity. | |
1297 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { | |
1298 for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) { | |
1299 Node *proj = matcher._null_check_tests[i ]; | |
1300 Node *val = matcher._null_check_tests[i+1]; | |
1301 _bbs[proj->_idx]->implicit_null_check(this, proj, val, allowed_reasons); | |
1302 // The implicit_null_check will only perform the transformation | |
1303 // if the null branch is truly uncommon, *and* it leads to an | |
1304 // uncommon trap. Combined with the too_many_traps guards | |
1305 // above, this prevents SEGV storms reported in 6366351, | |
1306 // by recompiling offending methods without this optimization. | |
1307 } | |
1308 } | |
1309 | |
1310 #ifndef PRODUCT | |
1311 if (trace_opto_pipelining()) { | |
1312 tty->print("\n---- Start Local Scheduling ----\n"); | |
1313 } | |
1314 #endif | |
1315 | |
1316 // Schedule locally. Right now a simple topological sort. | |
1317 // Later, do a real latency aware scheduler. | |
1318 int *ready_cnt = NEW_RESOURCE_ARRAY(int,C->unique()); | |
1319 memset( ready_cnt, -1, C->unique() * sizeof(int) ); | |
1320 visited.Clear(); | |
1321 for (i = 0; i < _num_blocks; i++) { | |
1322 if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) { | |
1323 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { | |
1324 C->record_method_not_compilable("local schedule failed"); | |
1325 } | |
1326 return; | |
1327 } | |
1328 } | |
1329 | |
1330 // If we inserted any instructions between a Call and his CatchNode, | |
1331 // clone the instructions on all paths below the Catch. | |
1332 for( i=0; i < _num_blocks; i++ ) | |
1333 _blocks[i]->call_catch_cleanup(_bbs); | |
1334 | |
1335 #ifndef PRODUCT | |
1336 if (trace_opto_pipelining()) { | |
1337 tty->print("\n---- After GlobalCodeMotion ----\n"); | |
1338 for (uint i = 0; i < _num_blocks; i++) { | |
1339 _blocks[i]->dump(); | |
1340 } | |
1341 } | |
1342 #endif | |
1685 | 1343 // Dead. |
1344 _node_latency = (GrowableArray<uint> *)0xdeadbeef; | |
0 | 1345 } |
1346 | |
1347 | |
1348 //------------------------------Estimate_Block_Frequency----------------------- | |
1349 // Estimate block frequencies based on IfNode probabilities. | |
1350 void PhaseCFG::Estimate_Block_Frequency() { | |
418 | 1351 |
1352 // Force conditional branches leading to uncommon traps to be unlikely, | |
1353 // not because we get to the uncommon_trap with less relative frequency, | |
1354 // but because an uncommon_trap typically causes a deopt, so we only get | |
1355 // there once. | |
1356 if (C->do_freq_based_layout()) { | |
1357 Block_List worklist; | |
1358 Block* root_blk = _blocks[0]; | |
1359 for (uint i = 1; i < root_blk->num_preds(); i++) { | |
1360 Block *pb = _bbs[root_blk->pred(i)->_idx]; | |
1361 if (pb->has_uncommon_code()) { | |
1362 worklist.push(pb); | |
1363 } | |
1364 } | |
1365 while (worklist.size() > 0) { | |
1366 Block* uct = worklist.pop(); | |
1367 if (uct == _broot) continue; | |
1368 for (uint i = 1; i < uct->num_preds(); i++) { | |
1369 Block *pb = _bbs[uct->pred(i)->_idx]; | |
1370 if (pb->_num_succs == 1) { | |
1371 worklist.push(pb); | |
1372 } else if (pb->num_fall_throughs() == 2) { | |
1373 pb->update_uncommon_branch(uct); | |
1374 } | |
1375 } | |
1376 } | |
1377 } | |
0 | 1378 |
1379 // Create the loop tree and calculate loop depth. | |
1380 _root_loop = create_loop_tree(); | |
1381 _root_loop->compute_loop_depth(0); | |
1382 | |
1383 // Compute block frequency of each block, relative to a single loop entry. | |
1384 _root_loop->compute_freq(); | |
1385 | |
1386 // Adjust all frequencies to be relative to a single method entry | |
418 | 1387 _root_loop->_freq = 1.0; |
0 | 1388 _root_loop->scale_freq(); |
1389 | |
673 | 1390 // Save outmost loop frequency for LRG frequency threshold |
1391 _outer_loop_freq = _root_loop->outer_loop_freq(); | |
1392 | |
0 | 1393 // force paths ending at uncommon traps to be infrequent |
418 | 1394 if (!C->do_freq_based_layout()) { |
1395 Block_List worklist; | |
1396 Block* root_blk = _blocks[0]; | |
1397 for (uint i = 1; i < root_blk->num_preds(); i++) { | |
1398 Block *pb = _bbs[root_blk->pred(i)->_idx]; | |
1399 if (pb->has_uncommon_code()) { | |
1400 worklist.push(pb); | |
1401 } | |
0 | 1402 } |
418 | 1403 while (worklist.size() > 0) { |
1404 Block* uct = worklist.pop(); | |
1405 uct->_freq = PROB_MIN; | |
1406 for (uint i = 1; i < uct->num_preds(); i++) { | |
1407 Block *pb = _bbs[uct->pred(i)->_idx]; | |
1408 if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { | |
1409 worklist.push(pb); | |
1410 } | |
0 | 1411 } |
1412 } | |
1413 } | |
1414 | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1415 #ifdef ASSERT |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1416 for (uint i = 0; i < _num_blocks; i++ ) { |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1417 Block *b = _blocks[i]; |
605 | 1418 assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1419 } |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1420 #endif |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1421 |
0 | 1422 #ifndef PRODUCT |
1423 if (PrintCFGBlockFreq) { | |
1424 tty->print_cr("CFG Block Frequencies"); | |
1425 _root_loop->dump_tree(); | |
1426 if (Verbose) { | |
1427 tty->print_cr("PhaseCFG dump"); | |
1428 dump(); | |
1429 tty->print_cr("Node dump"); | |
1430 _root->dump(99999); | |
1431 } | |
1432 } | |
1433 #endif | |
1434 } | |
1435 | |
1436 //----------------------------create_loop_tree-------------------------------- | |
1437 // Create a loop tree from the CFG | |
1438 CFGLoop* PhaseCFG::create_loop_tree() { | |
1439 | |
1440 #ifdef ASSERT | |
1441 assert( _blocks[0] == _broot, "" ); | |
1442 for (uint i = 0; i < _num_blocks; i++ ) { | |
1443 Block *b = _blocks[i]; | |
1444 // Check that _loop field are clear...we could clear them if not. | |
1445 assert(b->_loop == NULL, "clear _loop expected"); | |
1446 // Sanity check that the RPO numbering is reflected in the _blocks array. | |
1447 // It doesn't have to be for the loop tree to be built, but if it is not, | |
1448 // then the blocks have been reordered since dom graph building...which | |
1449 // may question the RPO numbering | |
1450 assert(b->_rpo == i, "unexpected reverse post order number"); | |
1451 } | |
1452 #endif | |
1453 | |
1454 int idct = 0; | |
1455 CFGLoop* root_loop = new CFGLoop(idct++); | |
1456 | |
1457 Block_List worklist; | |
1458 | |
1459 // Assign blocks to loops | |
1460 for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block | |
1461 Block *b = _blocks[i]; | |
1462 | |
1463 if (b->head()->is_Loop()) { | |
1464 Block* loop_head = b; | |
1465 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); | |
1466 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); | |
1467 Block* tail = _bbs[tail_n->_idx]; | |
1468 | |
1469 // Defensively filter out Loop nodes for non-single-entry loops. | |
1470 // For all reasonable loops, the head occurs before the tail in RPO. | |
1471 if (i <= tail->_rpo) { | |
1472 | |
1473 // The tail and (recursive) predecessors of the tail | |
1474 // are made members of a new loop. | |
1475 | |
1476 assert(worklist.size() == 0, "nonempty worklist"); | |
1477 CFGLoop* nloop = new CFGLoop(idct++); | |
1478 assert(loop_head->_loop == NULL, "just checking"); | |
1479 loop_head->_loop = nloop; | |
1480 // Add to nloop so push_pred() will skip over inner loops | |
1481 nloop->add_member(loop_head); | |
1482 nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, _bbs); | |
1483 | |
1484 while (worklist.size() > 0) { | |
1485 Block* member = worklist.pop(); | |
1486 if (member != loop_head) { | |
1487 for (uint j = 1; j < member->num_preds(); j++) { | |
1488 nloop->push_pred(member, j, worklist, _bbs); | |
1489 } | |
1490 } | |
1491 } | |
1492 } | |
1493 } | |
1494 } | |
1495 | |
1496 // Create a member list for each loop consisting | |
1497 // of both blocks and (immediate child) loops. | |
1498 for (uint i = 0; i < _num_blocks; i++) { | |
1499 Block *b = _blocks[i]; | |
1500 CFGLoop* lp = b->_loop; | |
1501 if (lp == NULL) { | |
1502 // Not assigned to a loop. Add it to the method's pseudo loop. | |
1503 b->_loop = root_loop; | |
1504 lp = root_loop; | |
1505 } | |
1506 if (lp == root_loop || b != lp->head()) { // loop heads are already members | |
1507 lp->add_member(b); | |
1508 } | |
1509 if (lp != root_loop) { | |
1510 if (lp->parent() == NULL) { | |
1511 // Not a nested loop. Make it a child of the method's pseudo loop. | |
1512 root_loop->add_nested_loop(lp); | |
1513 } | |
1514 if (b == lp->head()) { | |
1515 // Add nested loop to member list of parent loop. | |
1516 lp->parent()->add_member(lp); | |
1517 } | |
1518 } | |
1519 } | |
1520 | |
1521 return root_loop; | |
1522 } | |
1523 | |
1524 //------------------------------push_pred-------------------------------------- | |
1525 void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk) { | |
1526 Node* pred_n = blk->pred(i); | |
1527 Block* pred = node_to_blk[pred_n->_idx]; | |
1528 CFGLoop *pred_loop = pred->_loop; | |
1529 if (pred_loop == NULL) { | |
1530 // Filter out blocks for non-single-entry loops. | |
1531 // For all reasonable loops, the head occurs before the tail in RPO. | |
1532 if (pred->_rpo > head()->_rpo) { | |
1533 pred->_loop = this; | |
1534 worklist.push(pred); | |
1535 } | |
1536 } else if (pred_loop != this) { | |
1537 // Nested loop. | |
1538 while (pred_loop->_parent != NULL && pred_loop->_parent != this) { | |
1539 pred_loop = pred_loop->_parent; | |
1540 } | |
1541 // Make pred's loop be a child | |
1542 if (pred_loop->_parent == NULL) { | |
1543 add_nested_loop(pred_loop); | |
1544 // Continue with loop entry predecessor. | |
1545 Block* pred_head = pred_loop->head(); | |
1546 assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); | |
1547 assert(pred_head != head(), "loop head in only one loop"); | |
1548 push_pred(pred_head, LoopNode::EntryControl, worklist, node_to_blk); | |
1549 } else { | |
1550 assert(pred_loop->_parent == this && _parent == NULL, "just checking"); | |
1551 } | |
1552 } | |
1553 } | |
1554 | |
1555 //------------------------------add_nested_loop-------------------------------- | |
1556 // Make cl a child of the current loop in the loop tree. | |
1557 void CFGLoop::add_nested_loop(CFGLoop* cl) { | |
1558 assert(_parent == NULL, "no parent yet"); | |
1559 assert(cl != this, "not my own parent"); | |
1560 cl->_parent = this; | |
1561 CFGLoop* ch = _child; | |
1562 if (ch == NULL) { | |
1563 _child = cl; | |
1564 } else { | |
1565 while (ch->_sibling != NULL) { ch = ch->_sibling; } | |
1566 ch->_sibling = cl; | |
1567 } | |
1568 } | |
1569 | |
1570 //------------------------------compute_loop_depth----------------------------- | |
1571 // Store the loop depth in each CFGLoop object. | |
1572 // Recursively walk the children to do the same for them. | |
1573 void CFGLoop::compute_loop_depth(int depth) { | |
1574 _depth = depth; | |
1575 CFGLoop* ch = _child; | |
1576 while (ch != NULL) { | |
1577 ch->compute_loop_depth(depth + 1); | |
1578 ch = ch->_sibling; | |
1579 } | |
1580 } | |
1581 | |
1582 //------------------------------compute_freq----------------------------------- | |
1583 // Compute the frequency of each block and loop, relative to a single entry | |
1584 // into the dominating loop head. | |
1585 void CFGLoop::compute_freq() { | |
1586 // Bottom up traversal of loop tree (visit inner loops first.) | |
1587 // Set loop head frequency to 1.0, then transitively | |
1588 // compute frequency for all successors in the loop, | |
1589 // as well as for each exit edge. Inner loops are | |
1590 // treated as single blocks with loop exit targets | |
1591 // as the successor blocks. | |
1592 | |
1593 // Nested loops first | |
1594 CFGLoop* ch = _child; | |
1595 while (ch != NULL) { | |
1596 ch->compute_freq(); | |
1597 ch = ch->_sibling; | |
1598 } | |
1599 assert (_members.length() > 0, "no empty loops"); | |
1600 Block* hd = head(); | |
1601 hd->_freq = 1.0f; | |
1602 for (int i = 0; i < _members.length(); i++) { | |
1603 CFGElement* s = _members.at(i); | |
1604 float freq = s->_freq; | |
1605 if (s->is_block()) { | |
1606 Block* b = s->as_Block(); | |
1607 for (uint j = 0; j < b->_num_succs; j++) { | |
1608 Block* sb = b->_succs[j]; | |
1609 update_succ_freq(sb, freq * b->succ_prob(j)); | |
1610 } | |
1611 } else { | |
1612 CFGLoop* lp = s->as_CFGLoop(); | |
1613 assert(lp->_parent == this, "immediate child"); | |
1614 for (int k = 0; k < lp->_exits.length(); k++) { | |
1615 Block* eb = lp->_exits.at(k).get_target(); | |
1616 float prob = lp->_exits.at(k).get_prob(); | |
1617 update_succ_freq(eb, freq * prob); | |
1618 } | |
1619 } | |
1620 } | |
1621 | |
1622 // For all loops other than the outer, "method" loop, | |
1623 // sum and normalize the exit probability. The "method" loop | |
1624 // should keep the initial exit probability of 1, so that | |
1625 // inner blocks do not get erroneously scaled. | |
1626 if (_depth != 0) { | |
1627 // Total the exit probabilities for this loop. | |
1628 float exits_sum = 0.0f; | |
1629 for (int i = 0; i < _exits.length(); i++) { | |
1630 exits_sum += _exits.at(i).get_prob(); | |
1631 } | |
1632 | |
1633 // Normalize the exit probabilities. Until now, the | |
1634 // probabilities estimate the possibility of exit per | |
1635 // a single loop iteration; afterward, they estimate | |
1636 // the probability of exit per loop entry. | |
1637 for (int i = 0; i < _exits.length(); i++) { | |
1638 Block* et = _exits.at(i).get_target(); | |
418 | 1639 float new_prob = 0.0f; |
1640 if (_exits.at(i).get_prob() > 0.0f) { | |
1641 new_prob = _exits.at(i).get_prob() / exits_sum; | |
1642 } | |
0 | 1643 BlockProbPair bpp(et, new_prob); |
1644 _exits.at_put(i, bpp); | |
1645 } | |
1646 | |
418 | 1647 // Save the total, but guard against unreasonable probability, |
0 | 1648 // as the value is used to estimate the loop trip count. |
1649 // An infinite trip count would blur relative block | |
1650 // frequencies. | |
1651 if (exits_sum > 1.0f) exits_sum = 1.0; | |
1652 if (exits_sum < PROB_MIN) exits_sum = PROB_MIN; | |
1653 _exit_prob = exits_sum; | |
1654 } | |
1655 } | |
1656 | |
1657 //------------------------------succ_prob------------------------------------- | |
1658 // Determine the probability of reaching successor 'i' from the receiver block. | |
1659 float Block::succ_prob(uint i) { | |
1660 int eidx = end_idx(); | |
1661 Node *n = _nodes[eidx]; // Get ending Node | |
308 | 1662 |
1663 int op = n->Opcode(); | |
1664 if (n->is_Mach()) { | |
1665 if (n->is_MachNullCheck()) { | |
1666 // Can only reach here if called after lcm. The original Op_If is gone, | |
1667 // so we attempt to infer the probability from one or both of the | |
1668 // successor blocks. | |
1669 assert(_num_succs == 2, "expecting 2 successors of a null check"); | |
1670 // If either successor has only one predecessor, then the | |
605 | 1671 // probability estimate can be derived using the |
308 | 1672 // relative frequency of the successor and this block. |
1673 if (_succs[i]->num_preds() == 2) { | |
1674 return _succs[i]->_freq / _freq; | |
1675 } else if (_succs[1-i]->num_preds() == 2) { | |
1676 return 1 - (_succs[1-i]->_freq / _freq); | |
1677 } else { | |
1678 // Estimate using both successor frequencies | |
1679 float freq = _succs[i]->_freq; | |
1680 return freq / (freq + _succs[1-i]->_freq); | |
1681 } | |
1682 } | |
1683 op = n->as_Mach()->ideal_Opcode(); | |
1684 } | |
1685 | |
0 | 1686 |
1687 // Switch on branch type | |
1688 switch( op ) { | |
1689 case Op_CountedLoopEnd: | |
1690 case Op_If: { | |
1691 assert (i < 2, "just checking"); | |
1692 // Conditionals pass on only part of their frequency | |
1693 float prob = n->as_MachIf()->_prob; | |
1694 assert(prob >= 0.0 && prob <= 1.0, "out of range probability"); | |
1695 // If succ[i] is the FALSE branch, invert path info | |
1696 if( _nodes[i + eidx + 1]->Opcode() == Op_IfFalse ) { | |
1697 return 1.0f - prob; // not taken | |
1698 } else { | |
1699 return prob; // taken | |
1700 } | |
1701 } | |
1702 | |
1703 case Op_Jump: | |
1704 // Divide the frequency between all successors evenly | |
1705 return 1.0f/_num_succs; | |
1706 | |
1707 case Op_Catch: { | |
1708 const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj(); | |
1709 if (ci->_con == CatchProjNode::fall_through_index) { | |
1710 // Fall-thru path gets the lion's share. | |
1711 return 1.0f - PROB_UNLIKELY_MAG(5)*_num_succs; | |
1712 } else { | |
1713 // Presume exceptional paths are equally unlikely | |
1714 return PROB_UNLIKELY_MAG(5); | |
1715 } | |
1716 } | |
1717 | |
1718 case Op_Root: | |
1719 case Op_Goto: | |
1720 // Pass frequency straight thru to target | |
1721 return 1.0f; | |
1722 | |
1723 case Op_NeverBranch: | |
1724 return 0.0f; | |
1725 | |
1726 case Op_TailCall: | |
1727 case Op_TailJump: | |
1728 case Op_Return: | |
1729 case Op_Halt: | |
1730 case Op_Rethrow: | |
1731 // Do not push out freq to root block | |
1732 return 0.0f; | |
1733 | |
1734 default: | |
1735 ShouldNotReachHere(); | |
1736 } | |
1737 | |
1738 return 0.0f; | |
1739 } | |
1740 | |
418 | 1741 //------------------------------num_fall_throughs----------------------------- |
1742 // Return the number of fall-through candidates for a block | |
1743 int Block::num_fall_throughs() { | |
1744 int eidx = end_idx(); | |
1745 Node *n = _nodes[eidx]; // Get ending Node | |
1746 | |
1747 int op = n->Opcode(); | |
1748 if (n->is_Mach()) { | |
1749 if (n->is_MachNullCheck()) { | |
1750 // In theory, either side can fall-thru, for simplicity sake, | |
1751 // let's say only the false branch can now. | |
1752 return 1; | |
1753 } | |
1754 op = n->as_Mach()->ideal_Opcode(); | |
1755 } | |
1756 | |
1757 // Switch on branch type | |
1758 switch( op ) { | |
1759 case Op_CountedLoopEnd: | |
1760 case Op_If: | |
1761 return 2; | |
1762 | |
1763 case Op_Root: | |
1764 case Op_Goto: | |
1765 return 1; | |
1766 | |
1767 case Op_Catch: { | |
1768 for (uint i = 0; i < _num_succs; i++) { | |
1769 const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj(); | |
1770 if (ci->_con == CatchProjNode::fall_through_index) { | |
1771 return 1; | |
1772 } | |
1773 } | |
1774 return 0; | |
1775 } | |
1776 | |
1777 case Op_Jump: | |
1778 case Op_NeverBranch: | |
1779 case Op_TailCall: | |
1780 case Op_TailJump: | |
1781 case Op_Return: | |
1782 case Op_Halt: | |
1783 case Op_Rethrow: | |
1784 return 0; | |
1785 | |
1786 default: | |
1787 ShouldNotReachHere(); | |
1788 } | |
1789 | |
1790 return 0; | |
1791 } | |
1792 | |
1793 //------------------------------succ_fall_through----------------------------- | |
1794 // Return true if a specific successor could be fall-through target. | |
1795 bool Block::succ_fall_through(uint i) { | |
1796 int eidx = end_idx(); | |
1797 Node *n = _nodes[eidx]; // Get ending Node | |
1798 | |
1799 int op = n->Opcode(); | |
1800 if (n->is_Mach()) { | |
1801 if (n->is_MachNullCheck()) { | |
1802 // In theory, either side can fall-thru, for simplicity sake, | |
1803 // let's say only the false branch can now. | |
1804 return _nodes[i + eidx + 1]->Opcode() == Op_IfFalse; | |
1805 } | |
1806 op = n->as_Mach()->ideal_Opcode(); | |
1807 } | |
1808 | |
1809 // Switch on branch type | |
1810 switch( op ) { | |
1811 case Op_CountedLoopEnd: | |
1812 case Op_If: | |
1813 case Op_Root: | |
1814 case Op_Goto: | |
1815 return true; | |
1816 | |
1817 case Op_Catch: { | |
1818 const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj(); | |
1819 return ci->_con == CatchProjNode::fall_through_index; | |
1820 } | |
1821 | |
1822 case Op_Jump: | |
1823 case Op_NeverBranch: | |
1824 case Op_TailCall: | |
1825 case Op_TailJump: | |
1826 case Op_Return: | |
1827 case Op_Halt: | |
1828 case Op_Rethrow: | |
1829 return false; | |
1830 | |
1831 default: | |
1832 ShouldNotReachHere(); | |
1833 } | |
1834 | |
1835 return false; | |
1836 } | |
1837 | |
1838 //------------------------------update_uncommon_branch------------------------ | |
1839 // Update the probability of a two-branch to be uncommon | |
1840 void Block::update_uncommon_branch(Block* ub) { | |
1841 int eidx = end_idx(); | |
1842 Node *n = _nodes[eidx]; // Get ending Node | |
1843 | |
1844 int op = n->as_Mach()->ideal_Opcode(); | |
1845 | |
1846 assert(op == Op_CountedLoopEnd || op == Op_If, "must be a If"); | |
1847 assert(num_fall_throughs() == 2, "must be a two way branch block"); | |
1848 | |
1849 // Which successor is ub? | |
1850 uint s; | |
1851 for (s = 0; s <_num_succs; s++) { | |
1852 if (_succs[s] == ub) break; | |
1853 } | |
1854 assert(s < 2, "uncommon successor must be found"); | |
1855 | |
1856 // If ub is the true path, make the proability small, else | |
1857 // ub is the false path, and make the probability large | |
1858 bool invert = (_nodes[s + eidx + 1]->Opcode() == Op_IfFalse); | |
1859 | |
1860 // Get existing probability | |
1861 float p = n->as_MachIf()->_prob; | |
1862 | |
1863 if (invert) p = 1.0 - p; | |
1864 if (p > PROB_MIN) { | |
1865 p = PROB_MIN; | |
1866 } | |
1867 if (invert) p = 1.0 - p; | |
1868 | |
1869 n->as_MachIf()->_prob = p; | |
1870 } | |
1871 | |
0 | 1872 //------------------------------update_succ_freq------------------------------- |
605 | 1873 // Update the appropriate frequency associated with block 'b', a successor of |
0 | 1874 // a block in this loop. |
1875 void CFGLoop::update_succ_freq(Block* b, float freq) { | |
1876 if (b->_loop == this) { | |
1877 if (b == head()) { | |
1878 // back branch within the loop | |
1879 // Do nothing now, the loop carried frequency will be | |
1880 // adjust later in scale_freq(). | |
1881 } else { | |
1882 // simple branch within the loop | |
1883 b->_freq += freq; | |
1884 } | |
1885 } else if (!in_loop_nest(b)) { | |
1886 // branch is exit from this loop | |
1887 BlockProbPair bpp(b, freq); | |
1888 _exits.append(bpp); | |
1889 } else { | |
1890 // branch into nested loop | |
1891 CFGLoop* ch = b->_loop; | |
1892 ch->_freq += freq; | |
1893 } | |
1894 } | |
1895 | |
1896 //------------------------------in_loop_nest----------------------------------- | |
1897 // Determine if block b is in the receiver's loop nest. | |
1898 bool CFGLoop::in_loop_nest(Block* b) { | |
1899 int depth = _depth; | |
1900 CFGLoop* b_loop = b->_loop; | |
1901 int b_depth = b_loop->_depth; | |
1902 if (depth == b_depth) { | |
1903 return true; | |
1904 } | |
1905 while (b_depth > depth) { | |
1906 b_loop = b_loop->_parent; | |
1907 b_depth = b_loop->_depth; | |
1908 } | |
1909 return b_loop == this; | |
1910 } | |
1911 | |
1912 //------------------------------scale_freq------------------------------------- | |
1913 // Scale frequency of loops and blocks by trip counts from outer loops | |
1914 // Do a top down traversal of loop tree (visit outer loops first.) | |
1915 void CFGLoop::scale_freq() { | |
1916 float loop_freq = _freq * trip_count(); | |
673 | 1917 _freq = loop_freq; |
0 | 1918 for (int i = 0; i < _members.length(); i++) { |
1919 CFGElement* s = _members.at(i); | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1920 float block_freq = s->_freq * loop_freq; |
621 | 1921 if (g_isnan(block_freq) || block_freq < MIN_BLOCK_FREQUENCY) |
1922 block_freq = MIN_BLOCK_FREQUENCY; | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1923 s->_freq = block_freq; |
0 | 1924 } |
1925 CFGLoop* ch = _child; | |
1926 while (ch != NULL) { | |
1927 ch->scale_freq(); | |
1928 ch = ch->_sibling; | |
1929 } | |
1930 } | |
1931 | |
673 | 1932 // Frequency of outer loop |
1933 float CFGLoop::outer_loop_freq() const { | |
1934 if (_child != NULL) { | |
1935 return _child->_freq; | |
1936 } | |
1937 return _freq; | |
1938 } | |
1939 | |
0 | 1940 #ifndef PRODUCT |
1941 //------------------------------dump_tree-------------------------------------- | |
1942 void CFGLoop::dump_tree() const { | |
1943 dump(); | |
1944 if (_child != NULL) _child->dump_tree(); | |
1945 if (_sibling != NULL) _sibling->dump_tree(); | |
1946 } | |
1947 | |
1948 //------------------------------dump------------------------------------------- | |
1949 void CFGLoop::dump() const { | |
1950 for (int i = 0; i < _depth; i++) tty->print(" "); | |
1951 tty->print("%s: %d trip_count: %6.0f freq: %6.0f\n", | |
1952 _depth == 0 ? "Method" : "Loop", _id, trip_count(), _freq); | |
1953 for (int i = 0; i < _depth; i++) tty->print(" "); | |
1954 tty->print(" members:", _id); | |
1955 int k = 0; | |
1956 for (int i = 0; i < _members.length(); i++) { | |
1957 if (k++ >= 6) { | |
1958 tty->print("\n "); | |
1959 for (int j = 0; j < _depth+1; j++) tty->print(" "); | |
1960 k = 0; | |
1961 } | |
1962 CFGElement *s = _members.at(i); | |
1963 if (s->is_block()) { | |
1964 Block *b = s->as_Block(); | |
1965 tty->print(" B%d(%6.3f)", b->_pre_order, b->_freq); | |
1966 } else { | |
1967 CFGLoop* lp = s->as_CFGLoop(); | |
1968 tty->print(" L%d(%6.3f)", lp->_id, lp->_freq); | |
1969 } | |
1970 } | |
1971 tty->print("\n"); | |
1972 for (int i = 0; i < _depth; i++) tty->print(" "); | |
1973 tty->print(" exits: "); | |
1974 k = 0; | |
1975 for (int i = 0; i < _exits.length(); i++) { | |
1976 if (k++ >= 7) { | |
1977 tty->print("\n "); | |
1978 for (int j = 0; j < _depth+1; j++) tty->print(" "); | |
1979 k = 0; | |
1980 } | |
1981 Block *blk = _exits.at(i).get_target(); | |
1982 float prob = _exits.at(i).get_prob(); | |
1983 tty->print(" ->%d@%d%%", blk->_pre_order, (int)(prob*100)); | |
1984 } | |
1985 tty->print("\n"); | |
1986 } | |
1987 #endif |