Mercurial > hg > truffle
annotate src/share/vm/opto/gcm.cpp @ 1994:6cd6d394f280
7001033: assert(gch->gc_cause() == GCCause::_scavenge_alot || !gch->incremental_collection_failed())
7002546: regression on SpecJbb2005 on 7b118 comparing to 7b117 on small heaps
Summary: Relaxed assertion checking related to incremental_collection_failed flag to allow for ExplicitGCInvokesConcurrent behaviour where we do not want a failing scavenge to bail to a stop-world collection. Parameterized incremental_collection_will_fail() so we can selectively use, or not use, as appropriate, the statistical prediction at specific use sites. This essentially reverts the scavenge bail-out logic to what it was prior to some recent changes that had inadvertently started using the statistical prediction which can be noisy in the presence of bursty loads. Added some associated verbose non-product debugging messages.
Reviewed-by: johnc, tonyp
author | ysr |
---|---|
date | Tue, 07 Dec 2010 21:55:53 -0800 |
parents | f95d63e2154a |
children | 2f644f85485d |
rev | line source |
---|---|
0 | 1 /* |
1972 | 2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
833
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
833
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
833
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "libadt/vectset.hpp" | |
27 #include "memory/allocation.inline.hpp" | |
28 #include "opto/block.hpp" | |
29 #include "opto/c2compiler.hpp" | |
30 #include "opto/callnode.hpp" | |
31 #include "opto/cfgnode.hpp" | |
32 #include "opto/machnode.hpp" | |
33 #include "opto/opcodes.hpp" | |
34 #include "opto/phaseX.hpp" | |
35 #include "opto/rootnode.hpp" | |
36 #include "opto/runtime.hpp" | |
37 #include "runtime/deoptimization.hpp" | |
38 #ifdef TARGET_ARCH_MODEL_x86_32 | |
39 # include "adfiles/ad_x86_32.hpp" | |
40 #endif | |
41 #ifdef TARGET_ARCH_MODEL_x86_64 | |
42 # include "adfiles/ad_x86_64.hpp" | |
43 #endif | |
44 #ifdef TARGET_ARCH_MODEL_sparc | |
45 # include "adfiles/ad_sparc.hpp" | |
46 #endif | |
47 #ifdef TARGET_ARCH_MODEL_zero | |
48 # include "adfiles/ad_zero.hpp" | |
49 #endif | |
50 | |
0 | 51 // Portions of code courtesy of Clifford Click |
52 | |
53 // Optimization - Graph Style | |
54 | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
55 // To avoid float value underflow |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
56 #define MIN_BLOCK_FREQUENCY 1.e-35f |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
57 |
0 | 58 //----------------------------schedule_node_into_block------------------------- |
59 // Insert node n into block b. Look for projections of n and make sure they | |
60 // are in b also. | |
61 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) { | |
62 // Set basic block of n, Add n to b, | |
63 _bbs.map(n->_idx, b); | |
64 b->add_inst(n); | |
65 | |
66 // After Matching, nearly any old Node may have projections trailing it. | |
67 // These are usually machine-dependent flags. In any case, they might | |
68 // float to another block below this one. Move them up. | |
69 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |
70 Node* use = n->fast_out(i); | |
71 if (use->is_Proj()) { | |
72 Block* buse = _bbs[use->_idx]; | |
73 if (buse != b) { // In wrong block? | |
74 if (buse != NULL) | |
75 buse->find_remove(use); // Remove from wrong block | |
76 _bbs.map(use->_idx, b); // Re-insert in this block | |
77 b->add_inst(use); | |
78 } | |
79 } | |
80 } | |
81 } | |
82 | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
83 //----------------------------replace_block_proj_ctrl------------------------- |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
84 // Nodes that have is_block_proj() nodes as their control need to use |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
85 // the appropriate Region for their actual block as their control since |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
86 // the projection will be in a predecessor block. |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
87 void PhaseCFG::replace_block_proj_ctrl( Node *n ) { |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
88 const Node *in0 = n->in(0); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
89 assert(in0 != NULL, "Only control-dependent"); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
90 const Node *p = in0->is_block_proj(); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
91 if (p != NULL && p != n) { // Control from a block projection? |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
92 assert(!n->pinned() || n->is_SafePointScalarObject(), "only SafePointScalarObject pinned node is expected here"); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
93 // Find trailing Region |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
94 Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
95 uint j = 0; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
96 if (pb->_num_succs != 1) { // More then 1 successor? |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
97 // Search for successor |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
98 uint max = pb->_nodes.size(); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
99 assert( max > 1, "" ); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
100 uint start = max - pb->_num_succs; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
101 // Find which output path belongs to projection |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
102 for (j = start; j < max; j++) { |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
103 if( pb->_nodes[j] == in0 ) |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
104 break; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
105 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
106 assert( j < max, "must find" ); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
107 // Change control to match head of successor basic block |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
108 j -= start; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
109 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
110 n->set_req(0, pb->_succs[j]->head()); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
111 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
112 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
113 |
0 | 114 |
115 //------------------------------schedule_pinned_nodes-------------------------- | |
116 // Set the basic block for Nodes pinned into blocks | |
117 void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) { | |
118 // Allocate node stack of size C->unique()+8 to avoid frequent realloc | |
119 GrowableArray <Node *> spstack(C->unique()+8); | |
120 spstack.push(_root); | |
121 while ( spstack.is_nonempty() ) { | |
122 Node *n = spstack.pop(); | |
123 if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited | |
124 if( n->pinned() && !_bbs.lookup(n->_idx) ) { // Pinned? Nail it down! | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
125 assert( n->in(0), "pinned Node must have Control" ); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
126 // Before setting block replace block_proj control edge |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
127 replace_block_proj_ctrl(n); |
0 | 128 Node *input = n->in(0); |
129 while( !input->is_block_start() ) | |
130 input = input->in(0); | |
131 Block *b = _bbs[input->_idx]; // Basic block of controlling input | |
132 schedule_node_into_block(n, b); | |
133 } | |
134 for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs | |
135 if( n->in(i) != NULL ) | |
136 spstack.push(n->in(i)); | |
137 } | |
138 } | |
139 } | |
140 } | |
141 | |
142 #ifdef ASSERT | |
143 // Assert that new input b2 is dominated by all previous inputs. | |
144 // Check this by by seeing that it is dominated by b1, the deepest | |
145 // input observed until b2. | |
146 static void assert_dom(Block* b1, Block* b2, Node* n, Block_Array &bbs) { | |
147 if (b1 == NULL) return; | |
148 assert(b1->_dom_depth < b2->_dom_depth, "sanity"); | |
149 Block* tmp = b2; | |
150 while (tmp != b1 && tmp != NULL) { | |
151 tmp = tmp->_idom; | |
152 } | |
153 if (tmp != b1) { | |
154 // Detected an unschedulable graph. Print some nice stuff and die. | |
155 tty->print_cr("!!! Unschedulable graph !!!"); | |
156 for (uint j=0; j<n->len(); j++) { // For all inputs | |
157 Node* inn = n->in(j); // Get input | |
158 if (inn == NULL) continue; // Ignore NULL, missing inputs | |
159 Block* inb = bbs[inn->_idx]; | |
160 tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order, | |
161 inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth); | |
162 inn->dump(); | |
163 } | |
164 tty->print("Failing node: "); | |
165 n->dump(); | |
166 assert(false, "unscheduable graph"); | |
167 } | |
168 } | |
169 #endif | |
170 | |
171 static Block* find_deepest_input(Node* n, Block_Array &bbs) { | |
172 // Find the last input dominated by all other inputs. | |
173 Block* deepb = NULL; // Deepest block so far | |
174 int deepb_dom_depth = 0; | |
175 for (uint k = 0; k < n->len(); k++) { // For all inputs | |
176 Node* inn = n->in(k); // Get input | |
177 if (inn == NULL) continue; // Ignore NULL, missing inputs | |
178 Block* inb = bbs[inn->_idx]; | |
179 assert(inb != NULL, "must already have scheduled this input"); | |
180 if (deepb_dom_depth < (int) inb->_dom_depth) { | |
181 // The new inb must be dominated by the previous deepb. | |
182 // The various inputs must be linearly ordered in the dom | |
183 // tree, or else there will not be a unique deepest block. | |
184 DEBUG_ONLY(assert_dom(deepb, inb, n, bbs)); | |
185 deepb = inb; // Save deepest block | |
186 deepb_dom_depth = deepb->_dom_depth; | |
187 } | |
188 } | |
189 assert(deepb != NULL, "must be at least one input to n"); | |
190 return deepb; | |
191 } | |
192 | |
193 | |
194 //------------------------------schedule_early--------------------------------- | |
195 // Find the earliest Block any instruction can be placed in. Some instructions | |
196 // are pinned into Blocks. Unpinned instructions can appear in last block in | |
197 // which all their inputs occur. | |
198 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { | |
199 // Allocate stack with enough space to avoid frequent realloc | |
200 Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats | |
201 // roots.push(_root); _root will be processed among C->top() inputs | |
202 roots.push(C->top()); | |
203 visited.set(C->top()->_idx); | |
204 | |
205 while (roots.size() != 0) { | |
206 // Use local variables nstack_top_n & nstack_top_i to cache values | |
207 // on stack's top. | |
208 Node *nstack_top_n = roots.pop(); | |
209 uint nstack_top_i = 0; | |
210 //while_nstack_nonempty: | |
211 while (true) { | |
212 // Get parent node and next input's index from stack's top. | |
213 Node *n = nstack_top_n; | |
214 uint i = nstack_top_i; | |
215 | |
216 if (i == 0) { | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
217 // Fixup some control. Constants without control get attached |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
218 // to root and nodes that use is_block_proj() nodes should be attached |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
219 // to the region that starts their block. |
0 | 220 const Node *in0 = n->in(0); |
221 if (in0 != NULL) { // Control-dependent? | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
222 replace_block_proj_ctrl(n); |
0 | 223 } else { // n->in(0) == NULL |
224 if (n->req() == 1) { // This guy is a constant with NO inputs? | |
225 n->set_req(0, _root); | |
226 } | |
227 } | |
228 } | |
229 | |
230 // First, visit all inputs and force them to get a block. If an | |
231 // input is already in a block we quit following inputs (to avoid | |
232 // cycles). Instead we put that Node on a worklist to be handled | |
233 // later (since IT'S inputs may not have a block yet). | |
234 bool done = true; // Assume all n's inputs will be processed | |
235 while (i < n->len()) { // For all inputs | |
236 Node *in = n->in(i); // Get input | |
237 ++i; | |
238 if (in == NULL) continue; // Ignore NULL, missing inputs | |
239 int is_visited = visited.test_set(in->_idx); | |
240 if (!_bbs.lookup(in->_idx)) { // Missing block selection? | |
241 if (is_visited) { | |
242 // assert( !visited.test(in->_idx), "did not schedule early" ); | |
243 return false; | |
244 } | |
245 nstack.push(n, i); // Save parent node and next input's index. | |
246 nstack_top_n = in; // Process current input now. | |
247 nstack_top_i = 0; | |
248 done = false; // Not all n's inputs processed. | |
249 break; // continue while_nstack_nonempty; | |
250 } else if (!is_visited) { // Input not yet visited? | |
251 roots.push(in); // Visit this guy later, using worklist | |
252 } | |
253 } | |
254 if (done) { | |
255 // All of n's inputs have been processed, complete post-processing. | |
256 | |
257 // Some instructions are pinned into a block. These include Region, | |
258 // Phi, Start, Return, and other control-dependent instructions and | |
259 // any projections which depend on them. | |
260 if (!n->pinned()) { | |
261 // Set earliest legal block. | |
262 _bbs.map(n->_idx, find_deepest_input(n, _bbs)); | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
263 } else { |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
264 assert(_bbs[n->_idx] == _bbs[n->in(0)->_idx], "Pinned Node should be at the same block as its control edge"); |
0 | 265 } |
266 | |
267 if (nstack.is_empty()) { | |
268 // Finished all nodes on stack. | |
269 // Process next node on the worklist 'roots'. | |
270 break; | |
271 } | |
272 // Get saved parent node and next input's index. | |
273 nstack_top_n = nstack.node(); | |
274 nstack_top_i = nstack.index(); | |
275 nstack.pop(); | |
276 } // if (done) | |
277 } // while (true) | |
278 } // while (roots.size() != 0) | |
279 return true; | |
280 } | |
281 | |
282 //------------------------------dom_lca---------------------------------------- | |
283 // Find least common ancestor in dominator tree | |
284 // LCA is a current notion of LCA, to be raised above 'this'. | |
285 // As a convenient boundary condition, return 'this' if LCA is NULL. | |
286 // Find the LCA of those two nodes. | |
287 Block* Block::dom_lca(Block* LCA) { | |
288 if (LCA == NULL || LCA == this) return this; | |
289 | |
290 Block* anc = this; | |
291 while (anc->_dom_depth > LCA->_dom_depth) | |
292 anc = anc->_idom; // Walk up till anc is as high as LCA | |
293 | |
294 while (LCA->_dom_depth > anc->_dom_depth) | |
295 LCA = LCA->_idom; // Walk up till LCA is as high as anc | |
296 | |
297 while (LCA != anc) { // Walk both up till they are the same | |
298 LCA = LCA->_idom; | |
299 anc = anc->_idom; | |
300 } | |
301 | |
302 return LCA; | |
303 } | |
304 | |
305 //--------------------------raise_LCA_above_use-------------------------------- | |
306 // We are placing a definition, and have been given a def->use edge. | |
307 // The definition must dominate the use, so move the LCA upward in the | |
308 // dominator tree to dominate the use. If the use is a phi, adjust | |
309 // the LCA only with the phi input paths which actually use this def. | |
310 static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, Block_Array &bbs) { | |
311 Block* buse = bbs[use->_idx]; | |
312 if (buse == NULL) return LCA; // Unused killing Projs have no use block | |
313 if (!use->is_Phi()) return buse->dom_lca(LCA); | |
314 uint pmax = use->req(); // Number of Phi inputs | |
315 // Why does not this loop just break after finding the matching input to | |
316 // the Phi? Well...it's like this. I do not have true def-use/use-def | |
317 // chains. Means I cannot distinguish, from the def-use direction, which | |
318 // of many use-defs lead from the same use to the same def. That is, this | |
319 // Phi might have several uses of the same def. Each use appears in a | |
320 // different predecessor block. But when I enter here, I cannot distinguish | |
321 // which use-def edge I should find the predecessor block for. So I find | |
322 // them all. Means I do a little extra work if a Phi uses the same value | |
323 // more than once. | |
324 for (uint j=1; j<pmax; j++) { // For all inputs | |
325 if (use->in(j) == def) { // Found matching input? | |
326 Block* pred = bbs[buse->pred(j)->_idx]; | |
327 LCA = pred->dom_lca(LCA); | |
328 } | |
329 } | |
330 return LCA; | |
331 } | |
332 | |
333 //----------------------------raise_LCA_above_marks---------------------------- | |
334 // Return a new LCA that dominates LCA and any of its marked predecessors. | |
335 // Search all my parents up to 'early' (exclusive), looking for predecessors | |
336 // which are marked with the given index. Return the LCA (in the dom tree) | |
337 // of all marked blocks. If there are none marked, return the original | |
338 // LCA. | |
339 static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, | |
340 Block* early, Block_Array &bbs) { | |
341 Block_List worklist; | |
342 worklist.push(LCA); | |
343 while (worklist.size() > 0) { | |
344 Block* mid = worklist.pop(); | |
345 if (mid == early) continue; // stop searching here | |
346 | |
347 // Test and set the visited bit. | |
348 if (mid->raise_LCA_visited() == mark) continue; // already visited | |
349 | |
350 // Don't process the current LCA, otherwise the search may terminate early | |
351 if (mid != LCA && mid->raise_LCA_mark() == mark) { | |
352 // Raise the LCA. | |
353 LCA = mid->dom_lca(LCA); | |
354 if (LCA == early) break; // stop searching everywhere | |
355 assert(early->dominates(LCA), "early is high enough"); | |
356 // Resume searching at that point, skipping intermediate levels. | |
357 worklist.push(LCA); | |
215
273eaa04d9a1
6714694: assertion in 64bit server vm (store->find_edge(load) != -1,"missing precedence edge") with COOPs
kvn
parents:
31
diff
changeset
|
358 if (LCA == mid) |
273eaa04d9a1
6714694: assertion in 64bit server vm (store->find_edge(load) != -1,"missing precedence edge") with COOPs
kvn
parents:
31
diff
changeset
|
359 continue; // Don't mark as visited to avoid early termination. |
0 | 360 } else { |
361 // Keep searching through this block's predecessors. | |
362 for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) { | |
363 Block* mid_parent = bbs[ mid->pred(j)->_idx ]; | |
364 worklist.push(mid_parent); | |
365 } | |
366 } | |
215
273eaa04d9a1
6714694: assertion in 64bit server vm (store->find_edge(load) != -1,"missing precedence edge") with COOPs
kvn
parents:
31
diff
changeset
|
367 mid->set_raise_LCA_visited(mark); |
0 | 368 } |
369 return LCA; | |
370 } | |
371 | |
372 //--------------------------memory_early_block-------------------------------- | |
373 // This is a variation of find_deepest_input, the heart of schedule_early. | |
374 // Find the "early" block for a load, if we considered only memory and | |
375 // address inputs, that is, if other data inputs were ignored. | |
376 // | |
377 // Because a subset of edges are considered, the resulting block will | |
378 // be earlier (at a shallower dom_depth) than the true schedule_early | |
379 // point of the node. We compute this earlier block as a more permissive | |
380 // site for anti-dependency insertion, but only if subsume_loads is enabled. | |
381 static Block* memory_early_block(Node* load, Block* early, Block_Array &bbs) { | |
382 Node* base; | |
383 Node* index; | |
384 Node* store = load->in(MemNode::Memory); | |
385 load->as_Mach()->memory_inputs(base, index); | |
386 | |
387 assert(base != NodeSentinel && index != NodeSentinel, | |
388 "unexpected base/index inputs"); | |
389 | |
390 Node* mem_inputs[4]; | |
391 int mem_inputs_length = 0; | |
392 if (base != NULL) mem_inputs[mem_inputs_length++] = base; | |
393 if (index != NULL) mem_inputs[mem_inputs_length++] = index; | |
394 if (store != NULL) mem_inputs[mem_inputs_length++] = store; | |
395 | |
396 // In the comparision below, add one to account for the control input, | |
397 // which may be null, but always takes up a spot in the in array. | |
398 if (mem_inputs_length + 1 < (int) load->req()) { | |
399 // This "load" has more inputs than just the memory, base and index inputs. | |
400 // For purposes of checking anti-dependences, we need to start | |
401 // from the early block of only the address portion of the instruction, | |
402 // and ignore other blocks that may have factored into the wider | |
403 // schedule_early calculation. | |
404 if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0); | |
405 | |
406 Block* deepb = NULL; // Deepest block so far | |
407 int deepb_dom_depth = 0; | |
408 for (int i = 0; i < mem_inputs_length; i++) { | |
409 Block* inb = bbs[mem_inputs[i]->_idx]; | |
410 if (deepb_dom_depth < (int) inb->_dom_depth) { | |
411 // The new inb must be dominated by the previous deepb. | |
412 // The various inputs must be linearly ordered in the dom | |
413 // tree, or else there will not be a unique deepest block. | |
414 DEBUG_ONLY(assert_dom(deepb, inb, load, bbs)); | |
415 deepb = inb; // Save deepest block | |
416 deepb_dom_depth = deepb->_dom_depth; | |
417 } | |
418 } | |
419 early = deepb; | |
420 } | |
421 | |
422 return early; | |
423 } | |
424 | |
425 //--------------------------insert_anti_dependences--------------------------- | |
426 // A load may need to witness memory that nearby stores can overwrite. | |
427 // For each nearby store, either insert an "anti-dependence" edge | |
428 // from the load to the store, or else move LCA upward to force the | |
429 // load to (eventually) be scheduled in a block above the store. | |
430 // | |
431 // Do not add edges to stores on distinct control-flow paths; | |
432 // only add edges to stores which might interfere. | |
433 // | |
434 // Return the (updated) LCA. There will not be any possibly interfering | |
435 // store between the load's "early block" and the updated LCA. | |
436 // Any stores in the updated LCA will have new precedence edges | |
437 // back to the load. The caller is expected to schedule the load | |
438 // in the LCA, in which case the precedence edges will make LCM | |
439 // preserve anti-dependences. The caller may also hoist the load | |
440 // above the LCA, if it is not the early block. | |
441 Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) { | |
442 assert(load->needs_anti_dependence_check(), "must be a load of some sort"); | |
443 assert(LCA != NULL, ""); | |
444 DEBUG_ONLY(Block* LCA_orig = LCA); | |
445 | |
446 // Compute the alias index. Loads and stores with different alias indices | |
447 // do not need anti-dependence edges. | |
448 uint load_alias_idx = C->get_alias_index(load->adr_type()); | |
449 #ifdef ASSERT | |
450 if (load_alias_idx == Compile::AliasIdxBot && C->AliasLevel() > 0 && | |
451 (PrintOpto || VerifyAliases || | |
452 PrintMiscellaneous && (WizardMode || Verbose))) { | |
453 // Load nodes should not consume all of memory. | |
454 // Reporting a bottom type indicates a bug in adlc. | |
455 // If some particular type of node validly consumes all of memory, | |
456 // sharpen the preceding "if" to exclude it, so we can catch bugs here. | |
457 tty->print_cr("*** Possible Anti-Dependence Bug: Load consumes all of memory."); | |
458 load->dump(2); | |
459 if (VerifyAliases) assert(load_alias_idx != Compile::AliasIdxBot, ""); | |
460 } | |
461 #endif | |
462 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp), | |
463 "String compare is only known 'load' that does not conflict with any stores"); | |
681 | 464 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrEquals), |
465 "String equals is a 'load' that does not conflict with any stores"); | |
466 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOf), | |
467 "String indexOf is a 'load' that does not conflict with any stores"); | |
468 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_AryEq), | |
469 "Arrays equals is a 'load' that do not conflict with any stores"); | |
0 | 470 |
471 if (!C->alias_type(load_alias_idx)->is_rewritable()) { | |
472 // It is impossible to spoil this load by putting stores before it, | |
473 // because we know that the stores will never update the value | |
474 // which 'load' must witness. | |
475 return LCA; | |
476 } | |
477 | |
478 node_idx_t load_index = load->_idx; | |
479 | |
480 // Note the earliest legal placement of 'load', as determined by | |
481 // by the unique point in the dom tree where all memory effects | |
482 // and other inputs are first available. (Computed by schedule_early.) | |
483 // For normal loads, 'early' is the shallowest place (dom graph wise) | |
484 // to look for anti-deps between this load and any store. | |
485 Block* early = _bbs[load_index]; | |
486 | |
487 // If we are subsuming loads, compute an "early" block that only considers | |
488 // memory or address inputs. This block may be different than the | |
489 // schedule_early block in that it could be at an even shallower depth in the | |
490 // dominator tree, and allow for a broader discovery of anti-dependences. | |
491 if (C->subsume_loads()) { | |
492 early = memory_early_block(load, early, _bbs); | |
493 } | |
494 | |
495 ResourceArea *area = Thread::current()->resource_area(); | |
496 Node_List worklist_mem(area); // prior memory state to store | |
497 Node_List worklist_store(area); // possible-def to explore | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
498 Node_List worklist_visited(area); // visited mergemem nodes |
0 | 499 Node_List non_early_stores(area); // all relevant stores outside of early |
500 bool must_raise_LCA = false; | |
501 | |
502 #ifdef TRACK_PHI_INPUTS | |
503 // %%% This extra checking fails because MergeMem nodes are not GVNed. | |
504 // Provide "phi_inputs" to check if every input to a PhiNode is from the | |
505 // original memory state. This indicates a PhiNode for which should not | |
506 // prevent the load from sinking. For such a block, set_raise_LCA_mark | |
507 // may be overly conservative. | |
508 // Mechanism: count inputs seen for each Phi encountered in worklist_store. | |
509 DEBUG_ONLY(GrowableArray<uint> phi_inputs(area, C->unique(),0,0)); | |
510 #endif | |
511 | |
512 // 'load' uses some memory state; look for users of the same state. | |
513 // Recurse through MergeMem nodes to the stores that use them. | |
514 | |
515 // Each of these stores is a possible definition of memory | |
516 // that 'load' needs to use. We need to force 'load' | |
517 // to occur before each such store. When the store is in | |
518 // the same block as 'load', we insert an anti-dependence | |
519 // edge load->store. | |
520 | |
521 // The relevant stores "nearby" the load consist of a tree rooted | |
522 // at initial_mem, with internal nodes of type MergeMem. | |
523 // Therefore, the branches visited by the worklist are of this form: | |
524 // initial_mem -> (MergeMem ->)* store | |
525 // The anti-dependence constraints apply only to the fringe of this tree. | |
526 | |
527 Node* initial_mem = load->in(MemNode::Memory); | |
528 worklist_store.push(initial_mem); | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
529 worklist_visited.push(initial_mem); |
0 | 530 worklist_mem.push(NULL); |
531 while (worklist_store.size() > 0) { | |
532 // Examine a nearby store to see if it might interfere with our load. | |
533 Node* mem = worklist_mem.pop(); | |
534 Node* store = worklist_store.pop(); | |
535 uint op = store->Opcode(); | |
536 | |
537 // MergeMems do not directly have anti-deps. | |
538 // Treat them as internal nodes in a forward tree of memory states, | |
539 // the leaves of which are each a 'possible-def'. | |
540 if (store == initial_mem // root (exclusive) of tree we are searching | |
541 || op == Op_MergeMem // internal node of tree we are searching | |
542 ) { | |
543 mem = store; // It's not a possibly interfering store. | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
544 if (store == initial_mem) |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
545 initial_mem = NULL; // only process initial memory once |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
546 |
0 | 547 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { |
548 store = mem->fast_out(i); | |
549 if (store->is_MergeMem()) { | |
550 // Be sure we don't get into combinatorial problems. | |
551 // (Allow phis to be repeated; they can merge two relevant states.) | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
552 uint j = worklist_visited.size(); |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
553 for (; j > 0; j--) { |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
554 if (worklist_visited.at(j-1) == store) break; |
0 | 555 } |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
556 if (j > 0) continue; // already on work list; do not repeat |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
557 worklist_visited.push(store); |
0 | 558 } |
559 worklist_mem.push(mem); | |
560 worklist_store.push(store); | |
561 } | |
562 continue; | |
563 } | |
564 | |
565 if (op == Op_MachProj || op == Op_Catch) continue; | |
566 if (store->needs_anti_dependence_check()) continue; // not really a store | |
567 | |
568 // Compute the alias index. Loads and stores with different alias | |
569 // indices do not need anti-dependence edges. Wide MemBar's are | |
570 // anti-dependent on everything (except immutable memories). | |
571 const TypePtr* adr_type = store->adr_type(); | |
572 if (!C->can_alias(adr_type, load_alias_idx)) continue; | |
573 | |
574 // Most slow-path runtime calls do NOT modify Java memory, but | |
575 // they can block and so write Raw memory. | |
576 if (store->is_Mach()) { | |
577 MachNode* mstore = store->as_Mach(); | |
578 if (load_alias_idx != Compile::AliasIdxRaw) { | |
579 // Check for call into the runtime using the Java calling | |
580 // convention (and from there into a wrapper); it has no | |
581 // _method. Can't do this optimization for Native calls because | |
582 // they CAN write to Java memory. | |
583 if (mstore->ideal_Opcode() == Op_CallStaticJava) { | |
584 assert(mstore->is_MachSafePoint(), ""); | |
585 MachSafePointNode* ms = (MachSafePointNode*) mstore; | |
586 assert(ms->is_MachCallJava(), ""); | |
587 MachCallJavaNode* mcj = (MachCallJavaNode*) ms; | |
588 if (mcj->_method == NULL) { | |
589 // These runtime calls do not write to Java visible memory | |
590 // (other than Raw) and so do not require anti-dependence edges. | |
591 continue; | |
592 } | |
593 } | |
594 // Same for SafePoints: they read/write Raw but only read otherwise. | |
595 // This is basically a workaround for SafePoints only defining control | |
596 // instead of control + memory. | |
597 if (mstore->ideal_Opcode() == Op_SafePoint) | |
598 continue; | |
599 } else { | |
600 // Some raw memory, such as the load of "top" at an allocation, | |
601 // can be control dependent on the previous safepoint. See | |
602 // comments in GraphKit::allocate_heap() about control input. | |
603 // Inserting an anti-dep between such a safepoint and a use | |
604 // creates a cycle, and will cause a subsequent failure in | |
605 // local scheduling. (BugId 4919904) | |
606 // (%%% How can a control input be a safepoint and not a projection??) | |
607 if (mstore->ideal_Opcode() == Op_SafePoint && load->in(0) == mstore) | |
608 continue; | |
609 } | |
610 } | |
611 | |
612 // Identify a block that the current load must be above, | |
613 // or else observe that 'store' is all the way up in the | |
614 // earliest legal block for 'load'. In the latter case, | |
615 // immediately insert an anti-dependence edge. | |
616 Block* store_block = _bbs[store->_idx]; | |
617 assert(store_block != NULL, "unused killing projections skipped above"); | |
618 | |
619 if (store->is_Phi()) { | |
620 // 'load' uses memory which is one (or more) of the Phi's inputs. | |
621 // It must be scheduled not before the Phi, but rather before | |
622 // each of the relevant Phi inputs. | |
623 // | |
624 // Instead of finding the LCA of all inputs to a Phi that match 'mem', | |
625 // we mark each corresponding predecessor block and do a combined | |
626 // hoisting operation later (raise_LCA_above_marks). | |
627 // | |
628 // Do not assert(store_block != early, "Phi merging memory after access") | |
629 // PhiNode may be at start of block 'early' with backedge to 'early' | |
630 DEBUG_ONLY(bool found_match = false); | |
631 for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) { | |
632 if (store->in(j) == mem) { // Found matching input? | |
633 DEBUG_ONLY(found_match = true); | |
634 Block* pred_block = _bbs[store_block->pred(j)->_idx]; | |
635 if (pred_block != early) { | |
636 // If any predecessor of the Phi matches the load's "early block", | |
637 // we do not need a precedence edge between the Phi and 'load' | |
605 | 638 // since the load will be forced into a block preceding the Phi. |
0 | 639 pred_block->set_raise_LCA_mark(load_index); |
640 assert(!LCA_orig->dominates(pred_block) || | |
641 early->dominates(pred_block), "early is high enough"); | |
642 must_raise_LCA = true; | |
788 | 643 } else { |
644 // anti-dependent upon PHI pinned below 'early', no edge needed | |
645 LCA = early; // but can not schedule below 'early' | |
0 | 646 } |
647 } | |
648 } | |
649 assert(found_match, "no worklist bug"); | |
650 #ifdef TRACK_PHI_INPUTS | |
651 #ifdef ASSERT | |
652 // This assert asks about correct handling of PhiNodes, which may not | |
653 // have all input edges directly from 'mem'. See BugId 4621264 | |
654 int num_mem_inputs = phi_inputs.at_grow(store->_idx,0) + 1; | |
655 // Increment by exactly one even if there are multiple copies of 'mem' | |
656 // coming into the phi, because we will run this block several times | |
657 // if there are several copies of 'mem'. (That's how DU iterators work.) | |
658 phi_inputs.at_put(store->_idx, num_mem_inputs); | |
659 assert(PhiNode::Input + num_mem_inputs < store->req(), | |
660 "Expect at least one phi input will not be from original memory state"); | |
661 #endif //ASSERT | |
662 #endif //TRACK_PHI_INPUTS | |
663 } else if (store_block != early) { | |
664 // 'store' is between the current LCA and earliest possible block. | |
665 // Label its block, and decide later on how to raise the LCA | |
666 // to include the effect on LCA of this store. | |
667 // If this store's block gets chosen as the raised LCA, we | |
668 // will find him on the non_early_stores list and stick him | |
669 // with a precedence edge. | |
670 // (But, don't bother if LCA is already raised all the way.) | |
671 if (LCA != early) { | |
672 store_block->set_raise_LCA_mark(load_index); | |
673 must_raise_LCA = true; | |
674 non_early_stores.push(store); | |
675 } | |
676 } else { | |
677 // Found a possibly-interfering store in the load's 'early' block. | |
678 // This means 'load' cannot sink at all in the dominator tree. | |
679 // Add an anti-dep edge, and squeeze 'load' into the highest block. | |
680 assert(store != load->in(0), "dependence cycle found"); | |
681 if (verify) { | |
682 assert(store->find_edge(load) != -1, "missing precedence edge"); | |
683 } else { | |
684 store->add_prec(load); | |
685 } | |
686 LCA = early; | |
687 // This turns off the process of gathering non_early_stores. | |
688 } | |
689 } | |
690 // (Worklist is now empty; all nearby stores have been visited.) | |
691 | |
692 // Finished if 'load' must be scheduled in its 'early' block. | |
693 // If we found any stores there, they have already been given | |
694 // precedence edges. | |
695 if (LCA == early) return LCA; | |
696 | |
697 // We get here only if there are no possibly-interfering stores | |
698 // in the load's 'early' block. Move LCA up above all predecessors | |
699 // which contain stores we have noted. | |
700 // | |
701 // The raised LCA block can be a home to such interfering stores, | |
702 // but its predecessors must not contain any such stores. | |
703 // | |
704 // The raised LCA will be a lower bound for placing the load, | |
705 // preventing the load from sinking past any block containing | |
706 // a store that may invalidate the memory state required by 'load'. | |
707 if (must_raise_LCA) | |
708 LCA = raise_LCA_above_marks(LCA, load->_idx, early, _bbs); | |
709 if (LCA == early) return LCA; | |
710 | |
711 // Insert anti-dependence edges from 'load' to each store | |
712 // in the non-early LCA block. | |
713 // Mine the non_early_stores list for such stores. | |
714 if (LCA->raise_LCA_mark() == load_index) { | |
715 while (non_early_stores.size() > 0) { | |
716 Node* store = non_early_stores.pop(); | |
717 Block* store_block = _bbs[store->_idx]; | |
718 if (store_block == LCA) { | |
719 // add anti_dependence from store to load in its own block | |
720 assert(store != load->in(0), "dependence cycle found"); | |
721 if (verify) { | |
722 assert(store->find_edge(load) != -1, "missing precedence edge"); | |
723 } else { | |
724 store->add_prec(load); | |
725 } | |
726 } else { | |
727 assert(store_block->raise_LCA_mark() == load_index, "block was marked"); | |
728 // Any other stores we found must be either inside the new LCA | |
729 // or else outside the original LCA. In the latter case, they | |
730 // did not interfere with any use of 'load'. | |
731 assert(LCA->dominates(store_block) | |
732 || !LCA_orig->dominates(store_block), "no stray stores"); | |
733 } | |
734 } | |
735 } | |
736 | |
737 // Return the highest block containing stores; any stores | |
738 // within that block have been given anti-dependence edges. | |
739 return LCA; | |
740 } | |
741 | |
742 // This class is used to iterate backwards over the nodes in the graph. | |
743 | |
744 class Node_Backward_Iterator { | |
745 | |
746 private: | |
747 Node_Backward_Iterator(); | |
748 | |
749 public: | |
750 // Constructor for the iterator | |
751 Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs); | |
752 | |
753 // Postincrement operator to iterate over the nodes | |
754 Node *next(); | |
755 | |
756 private: | |
757 VectorSet &_visited; | |
758 Node_List &_stack; | |
759 Block_Array &_bbs; | |
760 }; | |
761 | |
762 // Constructor for the Node_Backward_Iterator | |
763 Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs ) | |
764 : _visited(visited), _stack(stack), _bbs(bbs) { | |
765 // The stack should contain exactly the root | |
766 stack.clear(); | |
767 stack.push(root); | |
768 | |
769 // Clear the visited bits | |
770 visited.Clear(); | |
771 } | |
772 | |
773 // Iterator for the Node_Backward_Iterator | |
774 Node *Node_Backward_Iterator::next() { | |
775 | |
776 // If the _stack is empty, then just return NULL: finished. | |
777 if ( !_stack.size() ) | |
778 return NULL; | |
779 | |
780 // '_stack' is emulating a real _stack. The 'visit-all-users' loop has been | |
781 // made stateless, so I do not need to record the index 'i' on my _stack. | |
782 // Instead I visit all users each time, scanning for unvisited users. | |
783 // I visit unvisited not-anti-dependence users first, then anti-dependent | |
784 // children next. | |
785 Node *self = _stack.pop(); | |
786 | |
787 // I cycle here when I am entering a deeper level of recursion. | |
788 // The key variable 'self' was set prior to jumping here. | |
789 while( 1 ) { | |
790 | |
791 _visited.set(self->_idx); | |
792 | |
793 // Now schedule all uses as late as possible. | |
794 uint src = self->is_Proj() ? self->in(0)->_idx : self->_idx; | |
795 uint src_rpo = _bbs[src]->_rpo; | |
796 | |
797 // Schedule all nodes in a post-order visit | |
798 Node *unvisited = NULL; // Unvisited anti-dependent Node, if any | |
799 | |
800 // Scan for unvisited nodes | |
801 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { | |
802 // For all uses, schedule late | |
803 Node* n = self->fast_out(i); // Use | |
804 | |
805 // Skip already visited children | |
806 if ( _visited.test(n->_idx) ) | |
807 continue; | |
808 | |
809 // do not traverse backward control edges | |
810 Node *use = n->is_Proj() ? n->in(0) : n; | |
811 uint use_rpo = _bbs[use->_idx]->_rpo; | |
812 | |
813 if ( use_rpo < src_rpo ) | |
814 continue; | |
815 | |
816 // Phi nodes always precede uses in a basic block | |
817 if ( use_rpo == src_rpo && use->is_Phi() ) | |
818 continue; | |
819 | |
820 unvisited = n; // Found unvisited | |
821 | |
822 // Check for possible-anti-dependent | |
823 if( !n->needs_anti_dependence_check() ) | |
824 break; // Not visited, not anti-dep; schedule it NOW | |
825 } | |
826 | |
827 // Did I find an unvisited not-anti-dependent Node? | |
828 if ( !unvisited ) | |
829 break; // All done with children; post-visit 'self' | |
830 | |
831 // Visit the unvisited Node. Contains the obvious push to | |
832 // indicate I'm entering a deeper level of recursion. I push the | |
833 // old state onto the _stack and set a new state and loop (recurse). | |
834 _stack.push(self); | |
835 self = unvisited; | |
836 } // End recursion loop | |
837 | |
838 return self; | |
839 } | |
840 | |
841 //------------------------------ComputeLatenciesBackwards---------------------- | |
842 // Compute the latency of all the instructions. | |
843 void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) { | |
844 #ifndef PRODUCT | |
845 if (trace_opto_pipelining()) | |
846 tty->print("\n#---- ComputeLatenciesBackwards ----\n"); | |
847 #endif | |
848 | |
849 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); | |
850 Node *n; | |
851 | |
852 // Walk over all the nodes from last to first | |
853 while (n = iter.next()) { | |
854 // Set the latency for the definitions of this instruction | |
855 partial_latency_of_defs(n); | |
856 } | |
857 } // end ComputeLatenciesBackwards | |
858 | |
859 //------------------------------partial_latency_of_defs------------------------ | |
860 // Compute the latency impact of this node on all defs. This computes | |
861 // a number that increases as we approach the beginning of the routine. | |
862 void PhaseCFG::partial_latency_of_defs(Node *n) { | |
863 // Set the latency for this instruction | |
864 #ifndef PRODUCT | |
865 if (trace_opto_pipelining()) { | |
866 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", | |
1685 | 867 n->_idx, _node_latency->at_grow(n->_idx)); |
0 | 868 dump(); |
869 } | |
870 #endif | |
871 | |
872 if (n->is_Proj()) | |
873 n = n->in(0); | |
874 | |
875 if (n->is_Root()) | |
876 return; | |
877 | |
878 uint nlen = n->len(); | |
1685 | 879 uint use_latency = _node_latency->at_grow(n->_idx); |
0 | 880 uint use_pre_order = _bbs[n->_idx]->_pre_order; |
881 | |
882 for ( uint j=0; j<nlen; j++ ) { | |
883 Node *def = n->in(j); | |
884 | |
885 if (!def || def == n) | |
886 continue; | |
887 | |
888 // Walk backwards thru projections | |
889 if (def->is_Proj()) | |
890 def = def->in(0); | |
891 | |
892 #ifndef PRODUCT | |
893 if (trace_opto_pipelining()) { | |
894 tty->print("# in(%2d): ", j); | |
895 def->dump(); | |
896 } | |
897 #endif | |
898 | |
899 // If the defining block is not known, assume it is ok | |
900 Block *def_block = _bbs[def->_idx]; | |
901 uint def_pre_order = def_block ? def_block->_pre_order : 0; | |
902 | |
903 if ( (use_pre_order < def_pre_order) || | |
904 (use_pre_order == def_pre_order && n->is_Phi()) ) | |
905 continue; | |
906 | |
907 uint delta_latency = n->latency(j); | |
908 uint current_latency = delta_latency + use_latency; | |
909 | |
1685 | 910 if (_node_latency->at_grow(def->_idx) < current_latency) { |
911 _node_latency->at_put_grow(def->_idx, current_latency); | |
0 | 912 } |
913 | |
914 #ifndef PRODUCT | |
915 if (trace_opto_pipelining()) { | |
916 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", | |
917 use_latency, j, delta_latency, current_latency, def->_idx, | |
1685 | 918 _node_latency->at_grow(def->_idx)); |
0 | 919 } |
920 #endif | |
921 } | |
922 } | |
923 | |
924 //------------------------------latency_from_use------------------------------- | |
925 // Compute the latency of a specific use | |
926 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) { | |
927 // If self-reference, return no latency | |
928 if (use == n || use->is_Root()) | |
929 return 0; | |
930 | |
931 uint def_pre_order = _bbs[def->_idx]->_pre_order; | |
932 uint latency = 0; | |
933 | |
934 // If the use is not a projection, then it is simple... | |
935 if (!use->is_Proj()) { | |
936 #ifndef PRODUCT | |
937 if (trace_opto_pipelining()) { | |
938 tty->print("# out(): "); | |
939 use->dump(); | |
940 } | |
941 #endif | |
942 | |
943 uint use_pre_order = _bbs[use->_idx]->_pre_order; | |
944 | |
945 if (use_pre_order < def_pre_order) | |
946 return 0; | |
947 | |
948 if (use_pre_order == def_pre_order && use->is_Phi()) | |
949 return 0; | |
950 | |
951 uint nlen = use->len(); | |
1685 | 952 uint nl = _node_latency->at_grow(use->_idx); |
0 | 953 |
954 for ( uint j=0; j<nlen; j++ ) { | |
955 if (use->in(j) == n) { | |
956 // Change this if we want local latencies | |
957 uint ul = use->latency(j); | |
958 uint l = ul + nl; | |
959 if (latency < l) latency = l; | |
960 #ifndef PRODUCT | |
961 if (trace_opto_pipelining()) { | |
962 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, latency = %d", | |
963 nl, j, ul, l, latency); | |
964 } | |
965 #endif | |
966 } | |
967 } | |
968 } else { | |
969 // This is a projection, just grab the latency of the use(s) | |
970 for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) { | |
971 uint l = latency_from_use(use, def, use->fast_out(j)); | |
972 if (latency < l) latency = l; | |
973 } | |
974 } | |
975 | |
976 return latency; | |
977 } | |
978 | |
979 //------------------------------latency_from_uses------------------------------ | |
980 // Compute the latency of this instruction relative to all of it's uses. | |
981 // This computes a number that increases as we approach the beginning of the | |
982 // routine. | |
983 void PhaseCFG::latency_from_uses(Node *n) { | |
984 // Set the latency for this instruction | |
985 #ifndef PRODUCT | |
986 if (trace_opto_pipelining()) { | |
987 tty->print("# latency_from_outputs: node_latency[%d] = %d for node", | |
1685 | 988 n->_idx, _node_latency->at_grow(n->_idx)); |
0 | 989 dump(); |
990 } | |
991 #endif | |
992 uint latency=0; | |
993 const Node *def = n->is_Proj() ? n->in(0): n; | |
994 | |
995 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |
996 uint l = latency_from_use(n, def, n->fast_out(i)); | |
997 | |
998 if (latency < l) latency = l; | |
999 } | |
1000 | |
1685 | 1001 _node_latency->at_put_grow(n->_idx, latency); |
0 | 1002 } |
1003 | |
1004 //------------------------------hoist_to_cheaper_block------------------------- | |
1005 // Pick a block for node self, between early and LCA, that is a cheaper | |
1006 // alternative to LCA. | |
1007 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { | |
1008 const double delta = 1+PROB_UNLIKELY_MAG(4); | |
1009 Block* least = LCA; | |
1010 double least_freq = least->_freq; | |
1685 | 1011 uint target = _node_latency->at_grow(self->_idx); |
1012 uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx); | |
1013 uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx); | |
0 | 1014 bool in_latency = (target <= start_latency); |
1015 const Block* root_block = _bbs[_root->_idx]; | |
1016 | |
1017 // Turn off latency scheduling if scheduling is just plain off | |
1018 if (!C->do_scheduling()) | |
1019 in_latency = true; | |
1020 | |
1021 // Do not hoist (to cover latency) instructions which target a | |
1022 // single register. Hoisting stretches the live range of the | |
1023 // single register and may force spilling. | |
1024 MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL; | |
1025 if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty()) | |
1026 in_latency = true; | |
1027 | |
1028 #ifndef PRODUCT | |
1029 if (trace_opto_pipelining()) { | |
1030 tty->print("# Find cheaper block for latency %d: ", | |
1685 | 1031 _node_latency->at_grow(self->_idx)); |
0 | 1032 self->dump(); |
1033 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | |
1034 LCA->_pre_order, | |
1035 LCA->_nodes[0]->_idx, | |
1036 start_latency, | |
1037 LCA->_nodes[LCA->end_idx()]->_idx, | |
1038 end_latency, | |
1039 least_freq); | |
1040 } | |
1041 #endif | |
1042 | |
1043 // Walk up the dominator tree from LCA (Lowest common ancestor) to | |
1044 // the earliest legal location. Capture the least execution frequency. | |
1045 while (LCA != early) { | |
1046 LCA = LCA->_idom; // Follow up the dominator tree | |
1047 | |
1048 if (LCA == NULL) { | |
1049 // Bailout without retry | |
1050 C->record_method_not_compilable("late schedule failed: LCA == NULL"); | |
1051 return least; | |
1052 } | |
1053 | |
1054 // Don't hoist machine instructions to the root basic block | |
1055 if (mach && LCA == root_block) | |
1056 break; | |
1057 | |
1685 | 1058 uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx); |
0 | 1059 uint end_idx = LCA->end_idx(); |
1685 | 1060 uint end_lat = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx); |
0 | 1061 double LCA_freq = LCA->_freq; |
1062 #ifndef PRODUCT | |
1063 if (trace_opto_pipelining()) { | |
1064 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | |
1065 LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq); | |
1066 } | |
1067 #endif | |
1068 if (LCA_freq < least_freq || // Better Frequency | |
1069 ( !in_latency && // No block containing latency | |
1070 LCA_freq < least_freq * delta && // No worse frequency | |
1071 target >= end_lat && // within latency range | |
1072 !self->is_iteratively_computed() ) // But don't hoist IV increments | |
1073 // because they may end up above other uses of their phi forcing | |
1074 // their result register to be different from their input. | |
1075 ) { | |
1076 least = LCA; // Found cheaper block | |
1077 least_freq = LCA_freq; | |
1078 start_latency = start_lat; | |
1079 end_latency = end_lat; | |
1080 if (target <= start_lat) | |
1081 in_latency = true; | |
1082 } | |
1083 } | |
1084 | |
1085 #ifndef PRODUCT | |
1086 if (trace_opto_pipelining()) { | |
1087 tty->print_cr("# Choose block B%d with start latency=%d and freq=%g", | |
1088 least->_pre_order, start_latency, least_freq); | |
1089 } | |
1090 #endif | |
1091 | |
1092 // See if the latency needs to be updated | |
1093 if (target < end_latency) { | |
1094 #ifndef PRODUCT | |
1095 if (trace_opto_pipelining()) { | |
1096 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); | |
1097 } | |
1098 #endif | |
1685 | 1099 _node_latency->at_put_grow(self->_idx, end_latency); |
0 | 1100 partial_latency_of_defs(self); |
1101 } | |
1102 | |
1103 return least; | |
1104 } | |
1105 | |
1106 | |
1107 //------------------------------schedule_late----------------------------------- | |
1108 // Now schedule all codes as LATE as possible. This is the LCA in the | |
1109 // dominator tree of all USES of a value. Pick the block with the least | |
1110 // loop nesting depth that is lowest in the dominator tree. | |
1111 extern const char must_clone[]; | |
1112 void PhaseCFG::schedule_late(VectorSet &visited, Node_List &stack) { | |
1113 #ifndef PRODUCT | |
1114 if (trace_opto_pipelining()) | |
1115 tty->print("\n#---- schedule_late ----\n"); | |
1116 #endif | |
1117 | |
1118 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); | |
1119 Node *self; | |
1120 | |
1121 // Walk over all the nodes from last to first | |
1122 while (self = iter.next()) { | |
1123 Block* early = _bbs[self->_idx]; // Earliest legal placement | |
1124 | |
1125 if (self->is_top()) { | |
1126 // Top node goes in bb #2 with other constants. | |
1127 // It must be special-cased, because it has no out edges. | |
1128 early->add_inst(self); | |
1129 continue; | |
1130 } | |
1131 | |
1132 // No uses, just terminate | |
1133 if (self->outcnt() == 0) { | |
1134 assert(self->Opcode() == Op_MachProj, "sanity"); | |
1135 continue; // Must be a dead machine projection | |
1136 } | |
1137 | |
1138 // If node is pinned in the block, then no scheduling can be done. | |
1139 if( self->pinned() ) // Pinned in block? | |
1140 continue; | |
1141 | |
1142 MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL; | |
1143 if (mach) { | |
1144 switch (mach->ideal_Opcode()) { | |
1145 case Op_CreateEx: | |
1146 // Don't move exception creation | |
1147 early->add_inst(self); | |
1148 continue; | |
1149 break; | |
1150 case Op_CheckCastPP: | |
1151 // Don't move CheckCastPP nodes away from their input, if the input | |
1152 // is a rawptr (5071820). | |
1153 Node *def = self->in(1); | |
1154 if (def != NULL && def->bottom_type()->base() == Type::RawPtr) { | |
1155 early->add_inst(self); | |
833
acba6af809c8
6840775: Multiple JVM crashes seen with 1.6.0_10 through 1.6.0_14
kvn
parents:
788
diff
changeset
|
1156 #ifdef ASSERT |
acba6af809c8
6840775: Multiple JVM crashes seen with 1.6.0_10 through 1.6.0_14
kvn
parents:
788
diff
changeset
|
1157 _raw_oops.push(def); |
acba6af809c8
6840775: Multiple JVM crashes seen with 1.6.0_10 through 1.6.0_14
kvn
parents:
788
diff
changeset
|
1158 #endif |
0 | 1159 continue; |
1160 } | |
1161 break; | |
1162 } | |
1163 } | |
1164 | |
1165 // Gather LCA of all uses | |
1166 Block *LCA = NULL; | |
1167 { | |
1168 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { | |
1169 // For all uses, find LCA | |
1170 Node* use = self->fast_out(i); | |
1171 LCA = raise_LCA_above_use(LCA, use, self, _bbs); | |
1172 } | |
1173 } // (Hide defs of imax, i from rest of block.) | |
1174 | |
1175 // Place temps in the block of their use. This isn't a | |
1176 // requirement for correctness but it reduces useless | |
1177 // interference between temps and other nodes. | |
1178 if (mach != NULL && mach->is_MachTemp()) { | |
1179 _bbs.map(self->_idx, LCA); | |
1180 LCA->add_inst(self); | |
1181 continue; | |
1182 } | |
1183 | |
1184 // Check if 'self' could be anti-dependent on memory | |
1185 if (self->needs_anti_dependence_check()) { | |
1186 // Hoist LCA above possible-defs and insert anti-dependences to | |
1187 // defs in new LCA block. | |
1188 LCA = insert_anti_dependences(LCA, self); | |
1189 } | |
1190 | |
1191 if (early->_dom_depth > LCA->_dom_depth) { | |
1192 // Somehow the LCA has moved above the earliest legal point. | |
1193 // (One way this can happen is via memory_early_block.) | |
1194 if (C->subsume_loads() == true && !C->failing()) { | |
1195 // Retry with subsume_loads == false | |
1196 // If this is the first failure, the sentinel string will "stick" | |
1197 // to the Compile object, and the C2Compiler will see it and retry. | |
1198 C->record_failure(C2Compiler::retry_no_subsuming_loads()); | |
1199 } else { | |
1200 // Bailout without retry when (early->_dom_depth > LCA->_dom_depth) | |
1201 C->record_method_not_compilable("late schedule failed: incorrect graph"); | |
1202 } | |
1203 return; | |
1204 } | |
1205 | |
1206 // If there is no opportunity to hoist, then we're done. | |
1207 bool try_to_hoist = (LCA != early); | |
1208 | |
1209 // Must clone guys stay next to use; no hoisting allowed. | |
1210 // Also cannot hoist guys that alter memory or are otherwise not | |
1211 // allocatable (hoisting can make a value live longer, leading to | |
1212 // anti and output dependency problems which are normally resolved | |
1213 // by the register allocator giving everyone a different register). | |
1214 if (mach != NULL && must_clone[mach->ideal_Opcode()]) | |
1215 try_to_hoist = false; | |
1216 | |
1217 Block* late = NULL; | |
1218 if (try_to_hoist) { | |
1219 // Now find the block with the least execution frequency. | |
1220 // Start at the latest schedule and work up to the earliest schedule | |
1221 // in the dominator tree. Thus the Node will dominate all its uses. | |
1222 late = hoist_to_cheaper_block(LCA, early, self); | |
1223 } else { | |
1224 // Just use the LCA of the uses. | |
1225 late = LCA; | |
1226 } | |
1227 | |
1228 // Put the node into target block | |
1229 schedule_node_into_block(self, late); | |
1230 | |
1231 #ifdef ASSERT | |
1232 if (self->needs_anti_dependence_check()) { | |
1233 // since precedence edges are only inserted when we're sure they | |
1234 // are needed make sure that after placement in a block we don't | |
1235 // need any new precedence edges. | |
1236 verify_anti_dependences(late, self); | |
1237 } | |
1238 #endif | |
1239 } // Loop until all nodes have been visited | |
1240 | |
1241 } // end ScheduleLate | |
1242 | |
1243 //------------------------------GlobalCodeMotion------------------------------- | |
1244 void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) { | |
1245 ResourceMark rm; | |
1246 | |
1247 #ifndef PRODUCT | |
1248 if (trace_opto_pipelining()) { | |
1249 tty->print("\n---- Start GlobalCodeMotion ----\n"); | |
1250 } | |
1251 #endif | |
1252 | |
1253 // Initialize the bbs.map for things on the proj_list | |
1254 uint i; | |
1255 for( i=0; i < proj_list.size(); i++ ) | |
1256 _bbs.map(proj_list[i]->_idx, NULL); | |
1257 | |
1258 // Set the basic block for Nodes pinned into blocks | |
1259 Arena *a = Thread::current()->resource_area(); | |
1260 VectorSet visited(a); | |
1261 schedule_pinned_nodes( visited ); | |
1262 | |
1263 // Find the earliest Block any instruction can be placed in. Some | |
1264 // instructions are pinned into Blocks. Unpinned instructions can | |
1265 // appear in last block in which all their inputs occur. | |
1266 visited.Clear(); | |
1267 Node_List stack(a); | |
1268 stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list | |
1269 if (!schedule_early(visited, stack)) { | |
1270 // Bailout without retry | |
1271 C->record_method_not_compilable("early schedule failed"); | |
1272 return; | |
1273 } | |
1274 | |
1275 // Build Def-Use edges. | |
1276 proj_list.push(_root); // Add real root as another root | |
1277 proj_list.pop(); | |
1278 | |
1279 // Compute the latency information (via backwards walk) for all the | |
1280 // instructions in the graph | |
1685 | 1281 _node_latency = new GrowableArray<uint>(); // resource_area allocation |
0 | 1282 |
1283 if( C->do_scheduling() ) | |
1284 ComputeLatenciesBackwards(visited, stack); | |
1285 | |
1286 // Now schedule all codes as LATE as possible. This is the LCA in the | |
1287 // dominator tree of all USES of a value. Pick the block with the least | |
1288 // loop nesting depth that is lowest in the dominator tree. | |
1289 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) | |
1290 schedule_late(visited, stack); | |
1291 if( C->failing() ) { | |
1292 // schedule_late fails only when graph is incorrect. | |
1293 assert(!VerifyGraphEdges, "verification should have failed"); | |
1294 return; | |
1295 } | |
1296 | |
1297 unique = C->unique(); | |
1298 | |
1299 #ifndef PRODUCT | |
1300 if (trace_opto_pipelining()) { | |
1301 tty->print("\n---- Detect implicit null checks ----\n"); | |
1302 } | |
1303 #endif | |
1304 | |
1305 // Detect implicit-null-check opportunities. Basically, find NULL checks | |
1306 // with suitable memory ops nearby. Use the memory op to do the NULL check. | |
1307 // I can generate a memory op if there is not one nearby. | |
1308 if (C->is_method_compilation()) { | |
1309 // Don't do it for natives, adapters, or runtime stubs | |
1310 int allowed_reasons = 0; | |
1311 // ...and don't do it when there have been too many traps, globally. | |
1312 for (int reason = (int)Deoptimization::Reason_none+1; | |
1313 reason < Compile::trapHistLength; reason++) { | |
1314 assert(reason < BitsPerInt, "recode bit map"); | |
1315 if (!C->too_many_traps((Deoptimization::DeoptReason) reason)) | |
1316 allowed_reasons |= nth_bit(reason); | |
1317 } | |
1318 // By reversing the loop direction we get a very minor gain on mpegaudio. | |
1319 // Feel free to revert to a forward loop for clarity. | |
1320 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { | |
1321 for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) { | |
1322 Node *proj = matcher._null_check_tests[i ]; | |
1323 Node *val = matcher._null_check_tests[i+1]; | |
1324 _bbs[proj->_idx]->implicit_null_check(this, proj, val, allowed_reasons); | |
1325 // The implicit_null_check will only perform the transformation | |
1326 // if the null branch is truly uncommon, *and* it leads to an | |
1327 // uncommon trap. Combined with the too_many_traps guards | |
1328 // above, this prevents SEGV storms reported in 6366351, | |
1329 // by recompiling offending methods without this optimization. | |
1330 } | |
1331 } | |
1332 | |
1333 #ifndef PRODUCT | |
1334 if (trace_opto_pipelining()) { | |
1335 tty->print("\n---- Start Local Scheduling ----\n"); | |
1336 } | |
1337 #endif | |
1338 | |
1339 // Schedule locally. Right now a simple topological sort. | |
1340 // Later, do a real latency aware scheduler. | |
1341 int *ready_cnt = NEW_RESOURCE_ARRAY(int,C->unique()); | |
1342 memset( ready_cnt, -1, C->unique() * sizeof(int) ); | |
1343 visited.Clear(); | |
1344 for (i = 0; i < _num_blocks; i++) { | |
1345 if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) { | |
1346 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { | |
1347 C->record_method_not_compilable("local schedule failed"); | |
1348 } | |
1349 return; | |
1350 } | |
1351 } | |
1352 | |
1353 // If we inserted any instructions between a Call and his CatchNode, | |
1354 // clone the instructions on all paths below the Catch. | |
1355 for( i=0; i < _num_blocks; i++ ) | |
1356 _blocks[i]->call_catch_cleanup(_bbs); | |
1357 | |
1358 #ifndef PRODUCT | |
1359 if (trace_opto_pipelining()) { | |
1360 tty->print("\n---- After GlobalCodeMotion ----\n"); | |
1361 for (uint i = 0; i < _num_blocks; i++) { | |
1362 _blocks[i]->dump(); | |
1363 } | |
1364 } | |
1365 #endif | |
1685 | 1366 // Dead. |
1367 _node_latency = (GrowableArray<uint> *)0xdeadbeef; | |
0 | 1368 } |
1369 | |
1370 | |
1371 //------------------------------Estimate_Block_Frequency----------------------- | |
1372 // Estimate block frequencies based on IfNode probabilities. | |
1373 void PhaseCFG::Estimate_Block_Frequency() { | |
418 | 1374 |
1375 // Force conditional branches leading to uncommon traps to be unlikely, | |
1376 // not because we get to the uncommon_trap with less relative frequency, | |
1377 // but because an uncommon_trap typically causes a deopt, so we only get | |
1378 // there once. | |
1379 if (C->do_freq_based_layout()) { | |
1380 Block_List worklist; | |
1381 Block* root_blk = _blocks[0]; | |
1382 for (uint i = 1; i < root_blk->num_preds(); i++) { | |
1383 Block *pb = _bbs[root_blk->pred(i)->_idx]; | |
1384 if (pb->has_uncommon_code()) { | |
1385 worklist.push(pb); | |
1386 } | |
1387 } | |
1388 while (worklist.size() > 0) { | |
1389 Block* uct = worklist.pop(); | |
1390 if (uct == _broot) continue; | |
1391 for (uint i = 1; i < uct->num_preds(); i++) { | |
1392 Block *pb = _bbs[uct->pred(i)->_idx]; | |
1393 if (pb->_num_succs == 1) { | |
1394 worklist.push(pb); | |
1395 } else if (pb->num_fall_throughs() == 2) { | |
1396 pb->update_uncommon_branch(uct); | |
1397 } | |
1398 } | |
1399 } | |
1400 } | |
0 | 1401 |
1402 // Create the loop tree and calculate loop depth. | |
1403 _root_loop = create_loop_tree(); | |
1404 _root_loop->compute_loop_depth(0); | |
1405 | |
1406 // Compute block frequency of each block, relative to a single loop entry. | |
1407 _root_loop->compute_freq(); | |
1408 | |
1409 // Adjust all frequencies to be relative to a single method entry | |
418 | 1410 _root_loop->_freq = 1.0; |
0 | 1411 _root_loop->scale_freq(); |
1412 | |
673 | 1413 // Save outmost loop frequency for LRG frequency threshold |
1414 _outer_loop_freq = _root_loop->outer_loop_freq(); | |
1415 | |
0 | 1416 // force paths ending at uncommon traps to be infrequent |
418 | 1417 if (!C->do_freq_based_layout()) { |
1418 Block_List worklist; | |
1419 Block* root_blk = _blocks[0]; | |
1420 for (uint i = 1; i < root_blk->num_preds(); i++) { | |
1421 Block *pb = _bbs[root_blk->pred(i)->_idx]; | |
1422 if (pb->has_uncommon_code()) { | |
1423 worklist.push(pb); | |
1424 } | |
0 | 1425 } |
418 | 1426 while (worklist.size() > 0) { |
1427 Block* uct = worklist.pop(); | |
1428 uct->_freq = PROB_MIN; | |
1429 for (uint i = 1; i < uct->num_preds(); i++) { | |
1430 Block *pb = _bbs[uct->pred(i)->_idx]; | |
1431 if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { | |
1432 worklist.push(pb); | |
1433 } | |
0 | 1434 } |
1435 } | |
1436 } | |
1437 | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1438 #ifdef ASSERT |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1439 for (uint i = 0; i < _num_blocks; i++ ) { |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1440 Block *b = _blocks[i]; |
605 | 1441 assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1442 } |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1443 #endif |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1444 |
0 | 1445 #ifndef PRODUCT |
1446 if (PrintCFGBlockFreq) { | |
1447 tty->print_cr("CFG Block Frequencies"); | |
1448 _root_loop->dump_tree(); | |
1449 if (Verbose) { | |
1450 tty->print_cr("PhaseCFG dump"); | |
1451 dump(); | |
1452 tty->print_cr("Node dump"); | |
1453 _root->dump(99999); | |
1454 } | |
1455 } | |
1456 #endif | |
1457 } | |
1458 | |
1459 //----------------------------create_loop_tree-------------------------------- | |
1460 // Create a loop tree from the CFG | |
1461 CFGLoop* PhaseCFG::create_loop_tree() { | |
1462 | |
1463 #ifdef ASSERT | |
1464 assert( _blocks[0] == _broot, "" ); | |
1465 for (uint i = 0; i < _num_blocks; i++ ) { | |
1466 Block *b = _blocks[i]; | |
1467 // Check that _loop field are clear...we could clear them if not. | |
1468 assert(b->_loop == NULL, "clear _loop expected"); | |
1469 // Sanity check that the RPO numbering is reflected in the _blocks array. | |
1470 // It doesn't have to be for the loop tree to be built, but if it is not, | |
1471 // then the blocks have been reordered since dom graph building...which | |
1472 // may question the RPO numbering | |
1473 assert(b->_rpo == i, "unexpected reverse post order number"); | |
1474 } | |
1475 #endif | |
1476 | |
1477 int idct = 0; | |
1478 CFGLoop* root_loop = new CFGLoop(idct++); | |
1479 | |
1480 Block_List worklist; | |
1481 | |
1482 // Assign blocks to loops | |
1483 for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block | |
1484 Block *b = _blocks[i]; | |
1485 | |
1486 if (b->head()->is_Loop()) { | |
1487 Block* loop_head = b; | |
1488 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); | |
1489 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); | |
1490 Block* tail = _bbs[tail_n->_idx]; | |
1491 | |
1492 // Defensively filter out Loop nodes for non-single-entry loops. | |
1493 // For all reasonable loops, the head occurs before the tail in RPO. | |
1494 if (i <= tail->_rpo) { | |
1495 | |
1496 // The tail and (recursive) predecessors of the tail | |
1497 // are made members of a new loop. | |
1498 | |
1499 assert(worklist.size() == 0, "nonempty worklist"); | |
1500 CFGLoop* nloop = new CFGLoop(idct++); | |
1501 assert(loop_head->_loop == NULL, "just checking"); | |
1502 loop_head->_loop = nloop; | |
1503 // Add to nloop so push_pred() will skip over inner loops | |
1504 nloop->add_member(loop_head); | |
1505 nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, _bbs); | |
1506 | |
1507 while (worklist.size() > 0) { | |
1508 Block* member = worklist.pop(); | |
1509 if (member != loop_head) { | |
1510 for (uint j = 1; j < member->num_preds(); j++) { | |
1511 nloop->push_pred(member, j, worklist, _bbs); | |
1512 } | |
1513 } | |
1514 } | |
1515 } | |
1516 } | |
1517 } | |
1518 | |
1519 // Create a member list for each loop consisting | |
1520 // of both blocks and (immediate child) loops. | |
1521 for (uint i = 0; i < _num_blocks; i++) { | |
1522 Block *b = _blocks[i]; | |
1523 CFGLoop* lp = b->_loop; | |
1524 if (lp == NULL) { | |
1525 // Not assigned to a loop. Add it to the method's pseudo loop. | |
1526 b->_loop = root_loop; | |
1527 lp = root_loop; | |
1528 } | |
1529 if (lp == root_loop || b != lp->head()) { // loop heads are already members | |
1530 lp->add_member(b); | |
1531 } | |
1532 if (lp != root_loop) { | |
1533 if (lp->parent() == NULL) { | |
1534 // Not a nested loop. Make it a child of the method's pseudo loop. | |
1535 root_loop->add_nested_loop(lp); | |
1536 } | |
1537 if (b == lp->head()) { | |
1538 // Add nested loop to member list of parent loop. | |
1539 lp->parent()->add_member(lp); | |
1540 } | |
1541 } | |
1542 } | |
1543 | |
1544 return root_loop; | |
1545 } | |
1546 | |
1547 //------------------------------push_pred-------------------------------------- | |
1548 void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk) { | |
1549 Node* pred_n = blk->pred(i); | |
1550 Block* pred = node_to_blk[pred_n->_idx]; | |
1551 CFGLoop *pred_loop = pred->_loop; | |
1552 if (pred_loop == NULL) { | |
1553 // Filter out blocks for non-single-entry loops. | |
1554 // For all reasonable loops, the head occurs before the tail in RPO. | |
1555 if (pred->_rpo > head()->_rpo) { | |
1556 pred->_loop = this; | |
1557 worklist.push(pred); | |
1558 } | |
1559 } else if (pred_loop != this) { | |
1560 // Nested loop. | |
1561 while (pred_loop->_parent != NULL && pred_loop->_parent != this) { | |
1562 pred_loop = pred_loop->_parent; | |
1563 } | |
1564 // Make pred's loop be a child | |
1565 if (pred_loop->_parent == NULL) { | |
1566 add_nested_loop(pred_loop); | |
1567 // Continue with loop entry predecessor. | |
1568 Block* pred_head = pred_loop->head(); | |
1569 assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); | |
1570 assert(pred_head != head(), "loop head in only one loop"); | |
1571 push_pred(pred_head, LoopNode::EntryControl, worklist, node_to_blk); | |
1572 } else { | |
1573 assert(pred_loop->_parent == this && _parent == NULL, "just checking"); | |
1574 } | |
1575 } | |
1576 } | |
1577 | |
1578 //------------------------------add_nested_loop-------------------------------- | |
1579 // Make cl a child of the current loop in the loop tree. | |
1580 void CFGLoop::add_nested_loop(CFGLoop* cl) { | |
1581 assert(_parent == NULL, "no parent yet"); | |
1582 assert(cl != this, "not my own parent"); | |
1583 cl->_parent = this; | |
1584 CFGLoop* ch = _child; | |
1585 if (ch == NULL) { | |
1586 _child = cl; | |
1587 } else { | |
1588 while (ch->_sibling != NULL) { ch = ch->_sibling; } | |
1589 ch->_sibling = cl; | |
1590 } | |
1591 } | |
1592 | |
1593 //------------------------------compute_loop_depth----------------------------- | |
1594 // Store the loop depth in each CFGLoop object. | |
1595 // Recursively walk the children to do the same for them. | |
1596 void CFGLoop::compute_loop_depth(int depth) { | |
1597 _depth = depth; | |
1598 CFGLoop* ch = _child; | |
1599 while (ch != NULL) { | |
1600 ch->compute_loop_depth(depth + 1); | |
1601 ch = ch->_sibling; | |
1602 } | |
1603 } | |
1604 | |
1605 //------------------------------compute_freq----------------------------------- | |
1606 // Compute the frequency of each block and loop, relative to a single entry | |
1607 // into the dominating loop head. | |
1608 void CFGLoop::compute_freq() { | |
1609 // Bottom up traversal of loop tree (visit inner loops first.) | |
1610 // Set loop head frequency to 1.0, then transitively | |
1611 // compute frequency for all successors in the loop, | |
1612 // as well as for each exit edge. Inner loops are | |
1613 // treated as single blocks with loop exit targets | |
1614 // as the successor blocks. | |
1615 | |
1616 // Nested loops first | |
1617 CFGLoop* ch = _child; | |
1618 while (ch != NULL) { | |
1619 ch->compute_freq(); | |
1620 ch = ch->_sibling; | |
1621 } | |
1622 assert (_members.length() > 0, "no empty loops"); | |
1623 Block* hd = head(); | |
1624 hd->_freq = 1.0f; | |
1625 for (int i = 0; i < _members.length(); i++) { | |
1626 CFGElement* s = _members.at(i); | |
1627 float freq = s->_freq; | |
1628 if (s->is_block()) { | |
1629 Block* b = s->as_Block(); | |
1630 for (uint j = 0; j < b->_num_succs; j++) { | |
1631 Block* sb = b->_succs[j]; | |
1632 update_succ_freq(sb, freq * b->succ_prob(j)); | |
1633 } | |
1634 } else { | |
1635 CFGLoop* lp = s->as_CFGLoop(); | |
1636 assert(lp->_parent == this, "immediate child"); | |
1637 for (int k = 0; k < lp->_exits.length(); k++) { | |
1638 Block* eb = lp->_exits.at(k).get_target(); | |
1639 float prob = lp->_exits.at(k).get_prob(); | |
1640 update_succ_freq(eb, freq * prob); | |
1641 } | |
1642 } | |
1643 } | |
1644 | |
1645 // For all loops other than the outer, "method" loop, | |
1646 // sum and normalize the exit probability. The "method" loop | |
1647 // should keep the initial exit probability of 1, so that | |
1648 // inner blocks do not get erroneously scaled. | |
1649 if (_depth != 0) { | |
1650 // Total the exit probabilities for this loop. | |
1651 float exits_sum = 0.0f; | |
1652 for (int i = 0; i < _exits.length(); i++) { | |
1653 exits_sum += _exits.at(i).get_prob(); | |
1654 } | |
1655 | |
1656 // Normalize the exit probabilities. Until now, the | |
1657 // probabilities estimate the possibility of exit per | |
1658 // a single loop iteration; afterward, they estimate | |
1659 // the probability of exit per loop entry. | |
1660 for (int i = 0; i < _exits.length(); i++) { | |
1661 Block* et = _exits.at(i).get_target(); | |
418 | 1662 float new_prob = 0.0f; |
1663 if (_exits.at(i).get_prob() > 0.0f) { | |
1664 new_prob = _exits.at(i).get_prob() / exits_sum; | |
1665 } | |
0 | 1666 BlockProbPair bpp(et, new_prob); |
1667 _exits.at_put(i, bpp); | |
1668 } | |
1669 | |
418 | 1670 // Save the total, but guard against unreasonable probability, |
0 | 1671 // as the value is used to estimate the loop trip count. |
1672 // An infinite trip count would blur relative block | |
1673 // frequencies. | |
1674 if (exits_sum > 1.0f) exits_sum = 1.0; | |
1675 if (exits_sum < PROB_MIN) exits_sum = PROB_MIN; | |
1676 _exit_prob = exits_sum; | |
1677 } | |
1678 } | |
1679 | |
1680 //------------------------------succ_prob------------------------------------- | |
1681 // Determine the probability of reaching successor 'i' from the receiver block. | |
1682 float Block::succ_prob(uint i) { | |
1683 int eidx = end_idx(); | |
1684 Node *n = _nodes[eidx]; // Get ending Node | |
308 | 1685 |
1686 int op = n->Opcode(); | |
1687 if (n->is_Mach()) { | |
1688 if (n->is_MachNullCheck()) { | |
1689 // Can only reach here if called after lcm. The original Op_If is gone, | |
1690 // so we attempt to infer the probability from one or both of the | |
1691 // successor blocks. | |
1692 assert(_num_succs == 2, "expecting 2 successors of a null check"); | |
1693 // If either successor has only one predecessor, then the | |
605 | 1694 // probability estimate can be derived using the |
308 | 1695 // relative frequency of the successor and this block. |
1696 if (_succs[i]->num_preds() == 2) { | |
1697 return _succs[i]->_freq / _freq; | |
1698 } else if (_succs[1-i]->num_preds() == 2) { | |
1699 return 1 - (_succs[1-i]->_freq / _freq); | |
1700 } else { | |
1701 // Estimate using both successor frequencies | |
1702 float freq = _succs[i]->_freq; | |
1703 return freq / (freq + _succs[1-i]->_freq); | |
1704 } | |
1705 } | |
1706 op = n->as_Mach()->ideal_Opcode(); | |
1707 } | |
1708 | |
0 | 1709 |
1710 // Switch on branch type | |
1711 switch( op ) { | |
1712 case Op_CountedLoopEnd: | |
1713 case Op_If: { | |
1714 assert (i < 2, "just checking"); | |
1715 // Conditionals pass on only part of their frequency | |
1716 float prob = n->as_MachIf()->_prob; | |
1717 assert(prob >= 0.0 && prob <= 1.0, "out of range probability"); | |
1718 // If succ[i] is the FALSE branch, invert path info | |
1719 if( _nodes[i + eidx + 1]->Opcode() == Op_IfFalse ) { | |
1720 return 1.0f - prob; // not taken | |
1721 } else { | |
1722 return prob; // taken | |
1723 } | |
1724 } | |
1725 | |
1726 case Op_Jump: | |
1727 // Divide the frequency between all successors evenly | |
1728 return 1.0f/_num_succs; | |
1729 | |
1730 case Op_Catch: { | |
1731 const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj(); | |
1732 if (ci->_con == CatchProjNode::fall_through_index) { | |
1733 // Fall-thru path gets the lion's share. | |
1734 return 1.0f - PROB_UNLIKELY_MAG(5)*_num_succs; | |
1735 } else { | |
1736 // Presume exceptional paths are equally unlikely | |
1737 return PROB_UNLIKELY_MAG(5); | |
1738 } | |
1739 } | |
1740 | |
1741 case Op_Root: | |
1742 case Op_Goto: | |
1743 // Pass frequency straight thru to target | |
1744 return 1.0f; | |
1745 | |
1746 case Op_NeverBranch: | |
1747 return 0.0f; | |
1748 | |
1749 case Op_TailCall: | |
1750 case Op_TailJump: | |
1751 case Op_Return: | |
1752 case Op_Halt: | |
1753 case Op_Rethrow: | |
1754 // Do not push out freq to root block | |
1755 return 0.0f; | |
1756 | |
1757 default: | |
1758 ShouldNotReachHere(); | |
1759 } | |
1760 | |
1761 return 0.0f; | |
1762 } | |
1763 | |
418 | 1764 //------------------------------num_fall_throughs----------------------------- |
1765 // Return the number of fall-through candidates for a block | |
1766 int Block::num_fall_throughs() { | |
1767 int eidx = end_idx(); | |
1768 Node *n = _nodes[eidx]; // Get ending Node | |
1769 | |
1770 int op = n->Opcode(); | |
1771 if (n->is_Mach()) { | |
1772 if (n->is_MachNullCheck()) { | |
1773 // In theory, either side can fall-thru, for simplicity sake, | |
1774 // let's say only the false branch can now. | |
1775 return 1; | |
1776 } | |
1777 op = n->as_Mach()->ideal_Opcode(); | |
1778 } | |
1779 | |
1780 // Switch on branch type | |
1781 switch( op ) { | |
1782 case Op_CountedLoopEnd: | |
1783 case Op_If: | |
1784 return 2; | |
1785 | |
1786 case Op_Root: | |
1787 case Op_Goto: | |
1788 return 1; | |
1789 | |
1790 case Op_Catch: { | |
1791 for (uint i = 0; i < _num_succs; i++) { | |
1792 const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj(); | |
1793 if (ci->_con == CatchProjNode::fall_through_index) { | |
1794 return 1; | |
1795 } | |
1796 } | |
1797 return 0; | |
1798 } | |
1799 | |
1800 case Op_Jump: | |
1801 case Op_NeverBranch: | |
1802 case Op_TailCall: | |
1803 case Op_TailJump: | |
1804 case Op_Return: | |
1805 case Op_Halt: | |
1806 case Op_Rethrow: | |
1807 return 0; | |
1808 | |
1809 default: | |
1810 ShouldNotReachHere(); | |
1811 } | |
1812 | |
1813 return 0; | |
1814 } | |
1815 | |
1816 //------------------------------succ_fall_through----------------------------- | |
1817 // Return true if a specific successor could be fall-through target. | |
1818 bool Block::succ_fall_through(uint i) { | |
1819 int eidx = end_idx(); | |
1820 Node *n = _nodes[eidx]; // Get ending Node | |
1821 | |
1822 int op = n->Opcode(); | |
1823 if (n->is_Mach()) { | |
1824 if (n->is_MachNullCheck()) { | |
1825 // In theory, either side can fall-thru, for simplicity sake, | |
1826 // let's say only the false branch can now. | |
1827 return _nodes[i + eidx + 1]->Opcode() == Op_IfFalse; | |
1828 } | |
1829 op = n->as_Mach()->ideal_Opcode(); | |
1830 } | |
1831 | |
1832 // Switch on branch type | |
1833 switch( op ) { | |
1834 case Op_CountedLoopEnd: | |
1835 case Op_If: | |
1836 case Op_Root: | |
1837 case Op_Goto: | |
1838 return true; | |
1839 | |
1840 case Op_Catch: { | |
1841 const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj(); | |
1842 return ci->_con == CatchProjNode::fall_through_index; | |
1843 } | |
1844 | |
1845 case Op_Jump: | |
1846 case Op_NeverBranch: | |
1847 case Op_TailCall: | |
1848 case Op_TailJump: | |
1849 case Op_Return: | |
1850 case Op_Halt: | |
1851 case Op_Rethrow: | |
1852 return false; | |
1853 | |
1854 default: | |
1855 ShouldNotReachHere(); | |
1856 } | |
1857 | |
1858 return false; | |
1859 } | |
1860 | |
1861 //------------------------------update_uncommon_branch------------------------ | |
1862 // Update the probability of a two-branch to be uncommon | |
1863 void Block::update_uncommon_branch(Block* ub) { | |
1864 int eidx = end_idx(); | |
1865 Node *n = _nodes[eidx]; // Get ending Node | |
1866 | |
1867 int op = n->as_Mach()->ideal_Opcode(); | |
1868 | |
1869 assert(op == Op_CountedLoopEnd || op == Op_If, "must be a If"); | |
1870 assert(num_fall_throughs() == 2, "must be a two way branch block"); | |
1871 | |
1872 // Which successor is ub? | |
1873 uint s; | |
1874 for (s = 0; s <_num_succs; s++) { | |
1875 if (_succs[s] == ub) break; | |
1876 } | |
1877 assert(s < 2, "uncommon successor must be found"); | |
1878 | |
1879 // If ub is the true path, make the proability small, else | |
1880 // ub is the false path, and make the probability large | |
1881 bool invert = (_nodes[s + eidx + 1]->Opcode() == Op_IfFalse); | |
1882 | |
1883 // Get existing probability | |
1884 float p = n->as_MachIf()->_prob; | |
1885 | |
1886 if (invert) p = 1.0 - p; | |
1887 if (p > PROB_MIN) { | |
1888 p = PROB_MIN; | |
1889 } | |
1890 if (invert) p = 1.0 - p; | |
1891 | |
1892 n->as_MachIf()->_prob = p; | |
1893 } | |
1894 | |
0 | 1895 //------------------------------update_succ_freq------------------------------- |
605 | 1896 // Update the appropriate frequency associated with block 'b', a successor of |
0 | 1897 // a block in this loop. |
1898 void CFGLoop::update_succ_freq(Block* b, float freq) { | |
1899 if (b->_loop == this) { | |
1900 if (b == head()) { | |
1901 // back branch within the loop | |
1902 // Do nothing now, the loop carried frequency will be | |
1903 // adjust later in scale_freq(). | |
1904 } else { | |
1905 // simple branch within the loop | |
1906 b->_freq += freq; | |
1907 } | |
1908 } else if (!in_loop_nest(b)) { | |
1909 // branch is exit from this loop | |
1910 BlockProbPair bpp(b, freq); | |
1911 _exits.append(bpp); | |
1912 } else { | |
1913 // branch into nested loop | |
1914 CFGLoop* ch = b->_loop; | |
1915 ch->_freq += freq; | |
1916 } | |
1917 } | |
1918 | |
1919 //------------------------------in_loop_nest----------------------------------- | |
1920 // Determine if block b is in the receiver's loop nest. | |
1921 bool CFGLoop::in_loop_nest(Block* b) { | |
1922 int depth = _depth; | |
1923 CFGLoop* b_loop = b->_loop; | |
1924 int b_depth = b_loop->_depth; | |
1925 if (depth == b_depth) { | |
1926 return true; | |
1927 } | |
1928 while (b_depth > depth) { | |
1929 b_loop = b_loop->_parent; | |
1930 b_depth = b_loop->_depth; | |
1931 } | |
1932 return b_loop == this; | |
1933 } | |
1934 | |
1935 //------------------------------scale_freq------------------------------------- | |
1936 // Scale frequency of loops and blocks by trip counts from outer loops | |
1937 // Do a top down traversal of loop tree (visit outer loops first.) | |
1938 void CFGLoop::scale_freq() { | |
1939 float loop_freq = _freq * trip_count(); | |
673 | 1940 _freq = loop_freq; |
0 | 1941 for (int i = 0; i < _members.length(); i++) { |
1942 CFGElement* s = _members.at(i); | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1943 float block_freq = s->_freq * loop_freq; |
621 | 1944 if (g_isnan(block_freq) || block_freq < MIN_BLOCK_FREQUENCY) |
1945 block_freq = MIN_BLOCK_FREQUENCY; | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1946 s->_freq = block_freq; |
0 | 1947 } |
1948 CFGLoop* ch = _child; | |
1949 while (ch != NULL) { | |
1950 ch->scale_freq(); | |
1951 ch = ch->_sibling; | |
1952 } | |
1953 } | |
1954 | |
673 | 1955 // Frequency of outer loop |
1956 float CFGLoop::outer_loop_freq() const { | |
1957 if (_child != NULL) { | |
1958 return _child->_freq; | |
1959 } | |
1960 return _freq; | |
1961 } | |
1962 | |
0 | 1963 #ifndef PRODUCT |
1964 //------------------------------dump_tree-------------------------------------- | |
1965 void CFGLoop::dump_tree() const { | |
1966 dump(); | |
1967 if (_child != NULL) _child->dump_tree(); | |
1968 if (_sibling != NULL) _sibling->dump_tree(); | |
1969 } | |
1970 | |
1971 //------------------------------dump------------------------------------------- | |
1972 void CFGLoop::dump() const { | |
1973 for (int i = 0; i < _depth; i++) tty->print(" "); | |
1974 tty->print("%s: %d trip_count: %6.0f freq: %6.0f\n", | |
1975 _depth == 0 ? "Method" : "Loop", _id, trip_count(), _freq); | |
1976 for (int i = 0; i < _depth; i++) tty->print(" "); | |
1977 tty->print(" members:", _id); | |
1978 int k = 0; | |
1979 for (int i = 0; i < _members.length(); i++) { | |
1980 if (k++ >= 6) { | |
1981 tty->print("\n "); | |
1982 for (int j = 0; j < _depth+1; j++) tty->print(" "); | |
1983 k = 0; | |
1984 } | |
1985 CFGElement *s = _members.at(i); | |
1986 if (s->is_block()) { | |
1987 Block *b = s->as_Block(); | |
1988 tty->print(" B%d(%6.3f)", b->_pre_order, b->_freq); | |
1989 } else { | |
1990 CFGLoop* lp = s->as_CFGLoop(); | |
1991 tty->print(" L%d(%6.3f)", lp->_id, lp->_freq); | |
1992 } | |
1993 } | |
1994 tty->print("\n"); | |
1995 for (int i = 0; i < _depth; i++) tty->print(" "); | |
1996 tty->print(" exits: "); | |
1997 k = 0; | |
1998 for (int i = 0; i < _exits.length(); i++) { | |
1999 if (k++ >= 7) { | |
2000 tty->print("\n "); | |
2001 for (int j = 0; j < _depth+1; j++) tty->print(" "); | |
2002 k = 0; | |
2003 } | |
2004 Block *blk = _exits.at(i).get_target(); | |
2005 float prob = _exits.at(i).get_prob(); | |
2006 tty->print(" ->%d@%d%%", blk->_pre_order, (int)(prob*100)); | |
2007 } | |
2008 tty->print("\n"); | |
2009 } | |
2010 #endif |