Mercurial > hg > truffle
annotate src/share/vm/opto/gcm.cpp @ 4710:41406797186b
7113012: G1: rename not-fully-young GCs as "mixed"
Summary: Renamed partially-young GCs as mixed and fully-young GCs as young. Change all external output that includes those terms (GC log and GC ergo log) as well as any comments, fields, methods, etc. The changeset also includes very minor code tidying up (added some curly brackets).
Reviewed-by: johnc, brutisso
author | tonyp |
---|---|
date | Fri, 16 Dec 2011 02:14:27 -0500 |
parents | 1bd45abaa507 |
children | cf407b7d3d78 |
rev | line source |
---|---|
0 | 1 /* |
2426
1d1603768966
7010070: Update all 2010 Oracle-changed OpenJDK files to have the proper copyright dates - second pass
trims
parents:
2192
diff
changeset
|
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
833
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
833
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
833
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "libadt/vectset.hpp" | |
27 #include "memory/allocation.inline.hpp" | |
28 #include "opto/block.hpp" | |
29 #include "opto/c2compiler.hpp" | |
30 #include "opto/callnode.hpp" | |
31 #include "opto/cfgnode.hpp" | |
32 #include "opto/machnode.hpp" | |
33 #include "opto/opcodes.hpp" | |
34 #include "opto/phaseX.hpp" | |
35 #include "opto/rootnode.hpp" | |
36 #include "opto/runtime.hpp" | |
37 #include "runtime/deoptimization.hpp" | |
38 #ifdef TARGET_ARCH_MODEL_x86_32 | |
39 # include "adfiles/ad_x86_32.hpp" | |
40 #endif | |
41 #ifdef TARGET_ARCH_MODEL_x86_64 | |
42 # include "adfiles/ad_x86_64.hpp" | |
43 #endif | |
44 #ifdef TARGET_ARCH_MODEL_sparc | |
45 # include "adfiles/ad_sparc.hpp" | |
46 #endif | |
47 #ifdef TARGET_ARCH_MODEL_zero | |
48 # include "adfiles/ad_zero.hpp" | |
49 #endif | |
2192
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
2008
diff
changeset
|
50 #ifdef TARGET_ARCH_MODEL_arm |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
2008
diff
changeset
|
51 # include "adfiles/ad_arm.hpp" |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
2008
diff
changeset
|
52 #endif |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
2008
diff
changeset
|
53 #ifdef TARGET_ARCH_MODEL_ppc |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
2008
diff
changeset
|
54 # include "adfiles/ad_ppc.hpp" |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
2008
diff
changeset
|
55 #endif |
1972 | 56 |
0 | 57 // Portions of code courtesy of Clifford Click |
58 | |
59 // Optimization - Graph Style | |
60 | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
61 // To avoid float value underflow |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
62 #define MIN_BLOCK_FREQUENCY 1.e-35f |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
63 |
0 | 64 //----------------------------schedule_node_into_block------------------------- |
65 // Insert node n into block b. Look for projections of n and make sure they | |
66 // are in b also. | |
67 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) { | |
68 // Set basic block of n, Add n to b, | |
69 _bbs.map(n->_idx, b); | |
70 b->add_inst(n); | |
71 | |
72 // After Matching, nearly any old Node may have projections trailing it. | |
73 // These are usually machine-dependent flags. In any case, they might | |
74 // float to another block below this one. Move them up. | |
75 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |
76 Node* use = n->fast_out(i); | |
77 if (use->is_Proj()) { | |
78 Block* buse = _bbs[use->_idx]; | |
79 if (buse != b) { // In wrong block? | |
80 if (buse != NULL) | |
81 buse->find_remove(use); // Remove from wrong block | |
82 _bbs.map(use->_idx, b); // Re-insert in this block | |
83 b->add_inst(use); | |
84 } | |
85 } | |
86 } | |
87 } | |
88 | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
89 //----------------------------replace_block_proj_ctrl------------------------- |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
90 // Nodes that have is_block_proj() nodes as their control need to use |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
91 // the appropriate Region for their actual block as their control since |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
92 // the projection will be in a predecessor block. |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
93 void PhaseCFG::replace_block_proj_ctrl( Node *n ) { |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
94 const Node *in0 = n->in(0); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
95 assert(in0 != NULL, "Only control-dependent"); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
96 const Node *p = in0->is_block_proj(); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
97 if (p != NULL && p != n) { // Control from a block projection? |
4115 | 98 assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here"); |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
99 // Find trailing Region |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
100 Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
101 uint j = 0; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
102 if (pb->_num_succs != 1) { // More then 1 successor? |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
103 // Search for successor |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
104 uint max = pb->_nodes.size(); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
105 assert( max > 1, "" ); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
106 uint start = max - pb->_num_succs; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
107 // Find which output path belongs to projection |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
108 for (j = start; j < max; j++) { |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
109 if( pb->_nodes[j] == in0 ) |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
110 break; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
111 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
112 assert( j < max, "must find" ); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
113 // Change control to match head of successor basic block |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
114 j -= start; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
115 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
116 n->set_req(0, pb->_succs[j]->head()); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
117 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
118 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
119 |
0 | 120 |
121 //------------------------------schedule_pinned_nodes-------------------------- | |
122 // Set the basic block for Nodes pinned into blocks | |
123 void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) { | |
124 // Allocate node stack of size C->unique()+8 to avoid frequent realloc | |
125 GrowableArray <Node *> spstack(C->unique()+8); | |
126 spstack.push(_root); | |
127 while ( spstack.is_nonempty() ) { | |
128 Node *n = spstack.pop(); | |
129 if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited | |
130 if( n->pinned() && !_bbs.lookup(n->_idx) ) { // Pinned? Nail it down! | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
131 assert( n->in(0), "pinned Node must have Control" ); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
132 // Before setting block replace block_proj control edge |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
133 replace_block_proj_ctrl(n); |
0 | 134 Node *input = n->in(0); |
135 while( !input->is_block_start() ) | |
136 input = input->in(0); | |
137 Block *b = _bbs[input->_idx]; // Basic block of controlling input | |
138 schedule_node_into_block(n, b); | |
139 } | |
140 for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs | |
141 if( n->in(i) != NULL ) | |
142 spstack.push(n->in(i)); | |
143 } | |
144 } | |
145 } | |
146 } | |
147 | |
148 #ifdef ASSERT | |
149 // Assert that new input b2 is dominated by all previous inputs. | |
150 // Check this by by seeing that it is dominated by b1, the deepest | |
151 // input observed until b2. | |
152 static void assert_dom(Block* b1, Block* b2, Node* n, Block_Array &bbs) { | |
153 if (b1 == NULL) return; | |
154 assert(b1->_dom_depth < b2->_dom_depth, "sanity"); | |
155 Block* tmp = b2; | |
156 while (tmp != b1 && tmp != NULL) { | |
157 tmp = tmp->_idom; | |
158 } | |
159 if (tmp != b1) { | |
160 // Detected an unschedulable graph. Print some nice stuff and die. | |
161 tty->print_cr("!!! Unschedulable graph !!!"); | |
162 for (uint j=0; j<n->len(); j++) { // For all inputs | |
163 Node* inn = n->in(j); // Get input | |
164 if (inn == NULL) continue; // Ignore NULL, missing inputs | |
165 Block* inb = bbs[inn->_idx]; | |
166 tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order, | |
167 inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth); | |
168 inn->dump(); | |
169 } | |
170 tty->print("Failing node: "); | |
171 n->dump(); | |
172 assert(false, "unscheduable graph"); | |
173 } | |
174 } | |
175 #endif | |
176 | |
177 static Block* find_deepest_input(Node* n, Block_Array &bbs) { | |
178 // Find the last input dominated by all other inputs. | |
179 Block* deepb = NULL; // Deepest block so far | |
180 int deepb_dom_depth = 0; | |
181 for (uint k = 0; k < n->len(); k++) { // For all inputs | |
182 Node* inn = n->in(k); // Get input | |
183 if (inn == NULL) continue; // Ignore NULL, missing inputs | |
184 Block* inb = bbs[inn->_idx]; | |
185 assert(inb != NULL, "must already have scheduled this input"); | |
186 if (deepb_dom_depth < (int) inb->_dom_depth) { | |
187 // The new inb must be dominated by the previous deepb. | |
188 // The various inputs must be linearly ordered in the dom | |
189 // tree, or else there will not be a unique deepest block. | |
190 DEBUG_ONLY(assert_dom(deepb, inb, n, bbs)); | |
191 deepb = inb; // Save deepest block | |
192 deepb_dom_depth = deepb->_dom_depth; | |
193 } | |
194 } | |
195 assert(deepb != NULL, "must be at least one input to n"); | |
196 return deepb; | |
197 } | |
198 | |
199 | |
200 //------------------------------schedule_early--------------------------------- | |
201 // Find the earliest Block any instruction can be placed in. Some instructions | |
202 // are pinned into Blocks. Unpinned instructions can appear in last block in | |
203 // which all their inputs occur. | |
204 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { | |
205 // Allocate stack with enough space to avoid frequent realloc | |
206 Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats | |
207 // roots.push(_root); _root will be processed among C->top() inputs | |
208 roots.push(C->top()); | |
209 visited.set(C->top()->_idx); | |
210 | |
211 while (roots.size() != 0) { | |
212 // Use local variables nstack_top_n & nstack_top_i to cache values | |
213 // on stack's top. | |
214 Node *nstack_top_n = roots.pop(); | |
215 uint nstack_top_i = 0; | |
216 //while_nstack_nonempty: | |
217 while (true) { | |
218 // Get parent node and next input's index from stack's top. | |
219 Node *n = nstack_top_n; | |
220 uint i = nstack_top_i; | |
221 | |
222 if (i == 0) { | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
223 // Fixup some control. Constants without control get attached |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
224 // to root and nodes that use is_block_proj() nodes should be attached |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
225 // to the region that starts their block. |
0 | 226 const Node *in0 = n->in(0); |
227 if (in0 != NULL) { // Control-dependent? | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
228 replace_block_proj_ctrl(n); |
0 | 229 } else { // n->in(0) == NULL |
230 if (n->req() == 1) { // This guy is a constant with NO inputs? | |
231 n->set_req(0, _root); | |
232 } | |
233 } | |
234 } | |
235 | |
236 // First, visit all inputs and force them to get a block. If an | |
237 // input is already in a block we quit following inputs (to avoid | |
238 // cycles). Instead we put that Node on a worklist to be handled | |
239 // later (since IT'S inputs may not have a block yet). | |
240 bool done = true; // Assume all n's inputs will be processed | |
241 while (i < n->len()) { // For all inputs | |
242 Node *in = n->in(i); // Get input | |
243 ++i; | |
244 if (in == NULL) continue; // Ignore NULL, missing inputs | |
245 int is_visited = visited.test_set(in->_idx); | |
246 if (!_bbs.lookup(in->_idx)) { // Missing block selection? | |
247 if (is_visited) { | |
248 // assert( !visited.test(in->_idx), "did not schedule early" ); | |
249 return false; | |
250 } | |
251 nstack.push(n, i); // Save parent node and next input's index. | |
252 nstack_top_n = in; // Process current input now. | |
253 nstack_top_i = 0; | |
254 done = false; // Not all n's inputs processed. | |
255 break; // continue while_nstack_nonempty; | |
256 } else if (!is_visited) { // Input not yet visited? | |
257 roots.push(in); // Visit this guy later, using worklist | |
258 } | |
259 } | |
260 if (done) { | |
261 // All of n's inputs have been processed, complete post-processing. | |
262 | |
263 // Some instructions are pinned into a block. These include Region, | |
264 // Phi, Start, Return, and other control-dependent instructions and | |
265 // any projections which depend on them. | |
266 if (!n->pinned()) { | |
267 // Set earliest legal block. | |
268 _bbs.map(n->_idx, find_deepest_input(n, _bbs)); | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
269 } else { |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
270 assert(_bbs[n->_idx] == _bbs[n->in(0)->_idx], "Pinned Node should be at the same block as its control edge"); |
0 | 271 } |
272 | |
273 if (nstack.is_empty()) { | |
274 // Finished all nodes on stack. | |
275 // Process next node on the worklist 'roots'. | |
276 break; | |
277 } | |
278 // Get saved parent node and next input's index. | |
279 nstack_top_n = nstack.node(); | |
280 nstack_top_i = nstack.index(); | |
281 nstack.pop(); | |
282 } // if (done) | |
283 } // while (true) | |
284 } // while (roots.size() != 0) | |
285 return true; | |
286 } | |
287 | |
288 //------------------------------dom_lca---------------------------------------- | |
289 // Find least common ancestor in dominator tree | |
290 // LCA is a current notion of LCA, to be raised above 'this'. | |
291 // As a convenient boundary condition, return 'this' if LCA is NULL. | |
292 // Find the LCA of those two nodes. | |
293 Block* Block::dom_lca(Block* LCA) { | |
294 if (LCA == NULL || LCA == this) return this; | |
295 | |
296 Block* anc = this; | |
297 while (anc->_dom_depth > LCA->_dom_depth) | |
298 anc = anc->_idom; // Walk up till anc is as high as LCA | |
299 | |
300 while (LCA->_dom_depth > anc->_dom_depth) | |
301 LCA = LCA->_idom; // Walk up till LCA is as high as anc | |
302 | |
303 while (LCA != anc) { // Walk both up till they are the same | |
304 LCA = LCA->_idom; | |
305 anc = anc->_idom; | |
306 } | |
307 | |
308 return LCA; | |
309 } | |
310 | |
311 //--------------------------raise_LCA_above_use-------------------------------- | |
312 // We are placing a definition, and have been given a def->use edge. | |
313 // The definition must dominate the use, so move the LCA upward in the | |
314 // dominator tree to dominate the use. If the use is a phi, adjust | |
315 // the LCA only with the phi input paths which actually use this def. | |
316 static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, Block_Array &bbs) { | |
317 Block* buse = bbs[use->_idx]; | |
318 if (buse == NULL) return LCA; // Unused killing Projs have no use block | |
319 if (!use->is_Phi()) return buse->dom_lca(LCA); | |
320 uint pmax = use->req(); // Number of Phi inputs | |
321 // Why does not this loop just break after finding the matching input to | |
322 // the Phi? Well...it's like this. I do not have true def-use/use-def | |
323 // chains. Means I cannot distinguish, from the def-use direction, which | |
324 // of many use-defs lead from the same use to the same def. That is, this | |
325 // Phi might have several uses of the same def. Each use appears in a | |
326 // different predecessor block. But when I enter here, I cannot distinguish | |
327 // which use-def edge I should find the predecessor block for. So I find | |
328 // them all. Means I do a little extra work if a Phi uses the same value | |
329 // more than once. | |
330 for (uint j=1; j<pmax; j++) { // For all inputs | |
331 if (use->in(j) == def) { // Found matching input? | |
332 Block* pred = bbs[buse->pred(j)->_idx]; | |
333 LCA = pred->dom_lca(LCA); | |
334 } | |
335 } | |
336 return LCA; | |
337 } | |
338 | |
339 //----------------------------raise_LCA_above_marks---------------------------- | |
340 // Return a new LCA that dominates LCA and any of its marked predecessors. | |
341 // Search all my parents up to 'early' (exclusive), looking for predecessors | |
342 // which are marked with the given index. Return the LCA (in the dom tree) | |
343 // of all marked blocks. If there are none marked, return the original | |
344 // LCA. | |
345 static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, | |
346 Block* early, Block_Array &bbs) { | |
347 Block_List worklist; | |
348 worklist.push(LCA); | |
349 while (worklist.size() > 0) { | |
350 Block* mid = worklist.pop(); | |
351 if (mid == early) continue; // stop searching here | |
352 | |
353 // Test and set the visited bit. | |
354 if (mid->raise_LCA_visited() == mark) continue; // already visited | |
355 | |
356 // Don't process the current LCA, otherwise the search may terminate early | |
357 if (mid != LCA && mid->raise_LCA_mark() == mark) { | |
358 // Raise the LCA. | |
359 LCA = mid->dom_lca(LCA); | |
360 if (LCA == early) break; // stop searching everywhere | |
361 assert(early->dominates(LCA), "early is high enough"); | |
362 // Resume searching at that point, skipping intermediate levels. | |
363 worklist.push(LCA); | |
215
273eaa04d9a1
6714694: assertion in 64bit server vm (store->find_edge(load) != -1,"missing precedence edge") with COOPs
kvn
parents:
31
diff
changeset
|
364 if (LCA == mid) |
273eaa04d9a1
6714694: assertion in 64bit server vm (store->find_edge(load) != -1,"missing precedence edge") with COOPs
kvn
parents:
31
diff
changeset
|
365 continue; // Don't mark as visited to avoid early termination. |
0 | 366 } else { |
367 // Keep searching through this block's predecessors. | |
368 for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) { | |
369 Block* mid_parent = bbs[ mid->pred(j)->_idx ]; | |
370 worklist.push(mid_parent); | |
371 } | |
372 } | |
215
273eaa04d9a1
6714694: assertion in 64bit server vm (store->find_edge(load) != -1,"missing precedence edge") with COOPs
kvn
parents:
31
diff
changeset
|
373 mid->set_raise_LCA_visited(mark); |
0 | 374 } |
375 return LCA; | |
376 } | |
377 | |
378 //--------------------------memory_early_block-------------------------------- | |
379 // This is a variation of find_deepest_input, the heart of schedule_early. | |
380 // Find the "early" block for a load, if we considered only memory and | |
381 // address inputs, that is, if other data inputs were ignored. | |
382 // | |
383 // Because a subset of edges are considered, the resulting block will | |
384 // be earlier (at a shallower dom_depth) than the true schedule_early | |
385 // point of the node. We compute this earlier block as a more permissive | |
386 // site for anti-dependency insertion, but only if subsume_loads is enabled. | |
387 static Block* memory_early_block(Node* load, Block* early, Block_Array &bbs) { | |
388 Node* base; | |
389 Node* index; | |
390 Node* store = load->in(MemNode::Memory); | |
391 load->as_Mach()->memory_inputs(base, index); | |
392 | |
393 assert(base != NodeSentinel && index != NodeSentinel, | |
394 "unexpected base/index inputs"); | |
395 | |
396 Node* mem_inputs[4]; | |
397 int mem_inputs_length = 0; | |
398 if (base != NULL) mem_inputs[mem_inputs_length++] = base; | |
399 if (index != NULL) mem_inputs[mem_inputs_length++] = index; | |
400 if (store != NULL) mem_inputs[mem_inputs_length++] = store; | |
401 | |
402 // In the comparision below, add one to account for the control input, | |
403 // which may be null, but always takes up a spot in the in array. | |
404 if (mem_inputs_length + 1 < (int) load->req()) { | |
405 // This "load" has more inputs than just the memory, base and index inputs. | |
406 // For purposes of checking anti-dependences, we need to start | |
407 // from the early block of only the address portion of the instruction, | |
408 // and ignore other blocks that may have factored into the wider | |
409 // schedule_early calculation. | |
410 if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0); | |
411 | |
412 Block* deepb = NULL; // Deepest block so far | |
413 int deepb_dom_depth = 0; | |
414 for (int i = 0; i < mem_inputs_length; i++) { | |
415 Block* inb = bbs[mem_inputs[i]->_idx]; | |
416 if (deepb_dom_depth < (int) inb->_dom_depth) { | |
417 // The new inb must be dominated by the previous deepb. | |
418 // The various inputs must be linearly ordered in the dom | |
419 // tree, or else there will not be a unique deepest block. | |
420 DEBUG_ONLY(assert_dom(deepb, inb, load, bbs)); | |
421 deepb = inb; // Save deepest block | |
422 deepb_dom_depth = deepb->_dom_depth; | |
423 } | |
424 } | |
425 early = deepb; | |
426 } | |
427 | |
428 return early; | |
429 } | |
430 | |
431 //--------------------------insert_anti_dependences--------------------------- | |
432 // A load may need to witness memory that nearby stores can overwrite. | |
433 // For each nearby store, either insert an "anti-dependence" edge | |
434 // from the load to the store, or else move LCA upward to force the | |
435 // load to (eventually) be scheduled in a block above the store. | |
436 // | |
437 // Do not add edges to stores on distinct control-flow paths; | |
438 // only add edges to stores which might interfere. | |
439 // | |
440 // Return the (updated) LCA. There will not be any possibly interfering | |
441 // store between the load's "early block" and the updated LCA. | |
442 // Any stores in the updated LCA will have new precedence edges | |
443 // back to the load. The caller is expected to schedule the load | |
444 // in the LCA, in which case the precedence edges will make LCM | |
445 // preserve anti-dependences. The caller may also hoist the load | |
446 // above the LCA, if it is not the early block. | |
447 Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) { | |
448 assert(load->needs_anti_dependence_check(), "must be a load of some sort"); | |
449 assert(LCA != NULL, ""); | |
450 DEBUG_ONLY(Block* LCA_orig = LCA); | |
451 | |
452 // Compute the alias index. Loads and stores with different alias indices | |
453 // do not need anti-dependence edges. | |
454 uint load_alias_idx = C->get_alias_index(load->adr_type()); | |
455 #ifdef ASSERT | |
456 if (load_alias_idx == Compile::AliasIdxBot && C->AliasLevel() > 0 && | |
457 (PrintOpto || VerifyAliases || | |
458 PrintMiscellaneous && (WizardMode || Verbose))) { | |
459 // Load nodes should not consume all of memory. | |
460 // Reporting a bottom type indicates a bug in adlc. | |
461 // If some particular type of node validly consumes all of memory, | |
462 // sharpen the preceding "if" to exclude it, so we can catch bugs here. | |
463 tty->print_cr("*** Possible Anti-Dependence Bug: Load consumes all of memory."); | |
464 load->dump(2); | |
465 if (VerifyAliases) assert(load_alias_idx != Compile::AliasIdxBot, ""); | |
466 } | |
467 #endif | |
468 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp), | |
469 "String compare is only known 'load' that does not conflict with any stores"); | |
681 | 470 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrEquals), |
471 "String equals is a 'load' that does not conflict with any stores"); | |
472 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOf), | |
473 "String indexOf is a 'load' that does not conflict with any stores"); | |
474 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_AryEq), | |
475 "Arrays equals is a 'load' that do not conflict with any stores"); | |
0 | 476 |
477 if (!C->alias_type(load_alias_idx)->is_rewritable()) { | |
478 // It is impossible to spoil this load by putting stores before it, | |
479 // because we know that the stores will never update the value | |
480 // which 'load' must witness. | |
481 return LCA; | |
482 } | |
483 | |
484 node_idx_t load_index = load->_idx; | |
485 | |
486 // Note the earliest legal placement of 'load', as determined by | |
487 // by the unique point in the dom tree where all memory effects | |
488 // and other inputs are first available. (Computed by schedule_early.) | |
489 // For normal loads, 'early' is the shallowest place (dom graph wise) | |
490 // to look for anti-deps between this load and any store. | |
491 Block* early = _bbs[load_index]; | |
492 | |
493 // If we are subsuming loads, compute an "early" block that only considers | |
494 // memory or address inputs. This block may be different than the | |
495 // schedule_early block in that it could be at an even shallower depth in the | |
496 // dominator tree, and allow for a broader discovery of anti-dependences. | |
497 if (C->subsume_loads()) { | |
498 early = memory_early_block(load, early, _bbs); | |
499 } | |
500 | |
501 ResourceArea *area = Thread::current()->resource_area(); | |
502 Node_List worklist_mem(area); // prior memory state to store | |
503 Node_List worklist_store(area); // possible-def to explore | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
504 Node_List worklist_visited(area); // visited mergemem nodes |
0 | 505 Node_List non_early_stores(area); // all relevant stores outside of early |
506 bool must_raise_LCA = false; | |
507 | |
508 #ifdef TRACK_PHI_INPUTS | |
509 // %%% This extra checking fails because MergeMem nodes are not GVNed. | |
510 // Provide "phi_inputs" to check if every input to a PhiNode is from the | |
511 // original memory state. This indicates a PhiNode for which should not | |
512 // prevent the load from sinking. For such a block, set_raise_LCA_mark | |
513 // may be overly conservative. | |
514 // Mechanism: count inputs seen for each Phi encountered in worklist_store. | |
515 DEBUG_ONLY(GrowableArray<uint> phi_inputs(area, C->unique(),0,0)); | |
516 #endif | |
517 | |
518 // 'load' uses some memory state; look for users of the same state. | |
519 // Recurse through MergeMem nodes to the stores that use them. | |
520 | |
521 // Each of these stores is a possible definition of memory | |
522 // that 'load' needs to use. We need to force 'load' | |
523 // to occur before each such store. When the store is in | |
524 // the same block as 'load', we insert an anti-dependence | |
525 // edge load->store. | |
526 | |
527 // The relevant stores "nearby" the load consist of a tree rooted | |
528 // at initial_mem, with internal nodes of type MergeMem. | |
529 // Therefore, the branches visited by the worklist are of this form: | |
530 // initial_mem -> (MergeMem ->)* store | |
531 // The anti-dependence constraints apply only to the fringe of this tree. | |
532 | |
533 Node* initial_mem = load->in(MemNode::Memory); | |
534 worklist_store.push(initial_mem); | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
535 worklist_visited.push(initial_mem); |
0 | 536 worklist_mem.push(NULL); |
537 while (worklist_store.size() > 0) { | |
538 // Examine a nearby store to see if it might interfere with our load. | |
539 Node* mem = worklist_mem.pop(); | |
540 Node* store = worklist_store.pop(); | |
541 uint op = store->Opcode(); | |
542 | |
543 // MergeMems do not directly have anti-deps. | |
544 // Treat them as internal nodes in a forward tree of memory states, | |
545 // the leaves of which are each a 'possible-def'. | |
546 if (store == initial_mem // root (exclusive) of tree we are searching | |
547 || op == Op_MergeMem // internal node of tree we are searching | |
548 ) { | |
549 mem = store; // It's not a possibly interfering store. | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
550 if (store == initial_mem) |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
551 initial_mem = NULL; // only process initial memory once |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
552 |
0 | 553 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { |
554 store = mem->fast_out(i); | |
555 if (store->is_MergeMem()) { | |
556 // Be sure we don't get into combinatorial problems. | |
557 // (Allow phis to be repeated; they can merge two relevant states.) | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
558 uint j = worklist_visited.size(); |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
559 for (; j > 0; j--) { |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
560 if (worklist_visited.at(j-1) == store) break; |
0 | 561 } |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
562 if (j > 0) continue; // already on work list; do not repeat |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
563 worklist_visited.push(store); |
0 | 564 } |
565 worklist_mem.push(mem); | |
566 worklist_store.push(store); | |
567 } | |
568 continue; | |
569 } | |
570 | |
571 if (op == Op_MachProj || op == Op_Catch) continue; | |
572 if (store->needs_anti_dependence_check()) continue; // not really a store | |
573 | |
574 // Compute the alias index. Loads and stores with different alias | |
575 // indices do not need anti-dependence edges. Wide MemBar's are | |
576 // anti-dependent on everything (except immutable memories). | |
577 const TypePtr* adr_type = store->adr_type(); | |
578 if (!C->can_alias(adr_type, load_alias_idx)) continue; | |
579 | |
580 // Most slow-path runtime calls do NOT modify Java memory, but | |
581 // they can block and so write Raw memory. | |
582 if (store->is_Mach()) { | |
583 MachNode* mstore = store->as_Mach(); | |
584 if (load_alias_idx != Compile::AliasIdxRaw) { | |
585 // Check for call into the runtime using the Java calling | |
586 // convention (and from there into a wrapper); it has no | |
587 // _method. Can't do this optimization for Native calls because | |
588 // they CAN write to Java memory. | |
589 if (mstore->ideal_Opcode() == Op_CallStaticJava) { | |
590 assert(mstore->is_MachSafePoint(), ""); | |
591 MachSafePointNode* ms = (MachSafePointNode*) mstore; | |
592 assert(ms->is_MachCallJava(), ""); | |
593 MachCallJavaNode* mcj = (MachCallJavaNode*) ms; | |
594 if (mcj->_method == NULL) { | |
595 // These runtime calls do not write to Java visible memory | |
596 // (other than Raw) and so do not require anti-dependence edges. | |
597 continue; | |
598 } | |
599 } | |
600 // Same for SafePoints: they read/write Raw but only read otherwise. | |
601 // This is basically a workaround for SafePoints only defining control | |
602 // instead of control + memory. | |
603 if (mstore->ideal_Opcode() == Op_SafePoint) | |
604 continue; | |
605 } else { | |
606 // Some raw memory, such as the load of "top" at an allocation, | |
607 // can be control dependent on the previous safepoint. See | |
608 // comments in GraphKit::allocate_heap() about control input. | |
609 // Inserting an anti-dep between such a safepoint and a use | |
610 // creates a cycle, and will cause a subsequent failure in | |
611 // local scheduling. (BugId 4919904) | |
612 // (%%% How can a control input be a safepoint and not a projection??) | |
613 if (mstore->ideal_Opcode() == Op_SafePoint && load->in(0) == mstore) | |
614 continue; | |
615 } | |
616 } | |
617 | |
618 // Identify a block that the current load must be above, | |
619 // or else observe that 'store' is all the way up in the | |
620 // earliest legal block for 'load'. In the latter case, | |
621 // immediately insert an anti-dependence edge. | |
622 Block* store_block = _bbs[store->_idx]; | |
623 assert(store_block != NULL, "unused killing projections skipped above"); | |
624 | |
625 if (store->is_Phi()) { | |
626 // 'load' uses memory which is one (or more) of the Phi's inputs. | |
627 // It must be scheduled not before the Phi, but rather before | |
628 // each of the relevant Phi inputs. | |
629 // | |
630 // Instead of finding the LCA of all inputs to a Phi that match 'mem', | |
631 // we mark each corresponding predecessor block and do a combined | |
632 // hoisting operation later (raise_LCA_above_marks). | |
633 // | |
634 // Do not assert(store_block != early, "Phi merging memory after access") | |
635 // PhiNode may be at start of block 'early' with backedge to 'early' | |
636 DEBUG_ONLY(bool found_match = false); | |
637 for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) { | |
638 if (store->in(j) == mem) { // Found matching input? | |
639 DEBUG_ONLY(found_match = true); | |
640 Block* pred_block = _bbs[store_block->pred(j)->_idx]; | |
641 if (pred_block != early) { | |
642 // If any predecessor of the Phi matches the load's "early block", | |
643 // we do not need a precedence edge between the Phi and 'load' | |
605 | 644 // since the load will be forced into a block preceding the Phi. |
0 | 645 pred_block->set_raise_LCA_mark(load_index); |
646 assert(!LCA_orig->dominates(pred_block) || | |
647 early->dominates(pred_block), "early is high enough"); | |
648 must_raise_LCA = true; | |
788 | 649 } else { |
650 // anti-dependent upon PHI pinned below 'early', no edge needed | |
651 LCA = early; // but can not schedule below 'early' | |
0 | 652 } |
653 } | |
654 } | |
655 assert(found_match, "no worklist bug"); | |
656 #ifdef TRACK_PHI_INPUTS | |
657 #ifdef ASSERT | |
658 // This assert asks about correct handling of PhiNodes, which may not | |
659 // have all input edges directly from 'mem'. See BugId 4621264 | |
660 int num_mem_inputs = phi_inputs.at_grow(store->_idx,0) + 1; | |
661 // Increment by exactly one even if there are multiple copies of 'mem' | |
662 // coming into the phi, because we will run this block several times | |
663 // if there are several copies of 'mem'. (That's how DU iterators work.) | |
664 phi_inputs.at_put(store->_idx, num_mem_inputs); | |
665 assert(PhiNode::Input + num_mem_inputs < store->req(), | |
666 "Expect at least one phi input will not be from original memory state"); | |
667 #endif //ASSERT | |
668 #endif //TRACK_PHI_INPUTS | |
669 } else if (store_block != early) { | |
670 // 'store' is between the current LCA and earliest possible block. | |
671 // Label its block, and decide later on how to raise the LCA | |
672 // to include the effect on LCA of this store. | |
673 // If this store's block gets chosen as the raised LCA, we | |
674 // will find him on the non_early_stores list and stick him | |
675 // with a precedence edge. | |
676 // (But, don't bother if LCA is already raised all the way.) | |
677 if (LCA != early) { | |
678 store_block->set_raise_LCA_mark(load_index); | |
679 must_raise_LCA = true; | |
680 non_early_stores.push(store); | |
681 } | |
682 } else { | |
683 // Found a possibly-interfering store in the load's 'early' block. | |
684 // This means 'load' cannot sink at all in the dominator tree. | |
685 // Add an anti-dep edge, and squeeze 'load' into the highest block. | |
686 assert(store != load->in(0), "dependence cycle found"); | |
687 if (verify) { | |
688 assert(store->find_edge(load) != -1, "missing precedence edge"); | |
689 } else { | |
690 store->add_prec(load); | |
691 } | |
692 LCA = early; | |
693 // This turns off the process of gathering non_early_stores. | |
694 } | |
695 } | |
696 // (Worklist is now empty; all nearby stores have been visited.) | |
697 | |
698 // Finished if 'load' must be scheduled in its 'early' block. | |
699 // If we found any stores there, they have already been given | |
700 // precedence edges. | |
701 if (LCA == early) return LCA; | |
702 | |
703 // We get here only if there are no possibly-interfering stores | |
704 // in the load's 'early' block. Move LCA up above all predecessors | |
705 // which contain stores we have noted. | |
706 // | |
707 // The raised LCA block can be a home to such interfering stores, | |
708 // but its predecessors must not contain any such stores. | |
709 // | |
710 // The raised LCA will be a lower bound for placing the load, | |
711 // preventing the load from sinking past any block containing | |
712 // a store that may invalidate the memory state required by 'load'. | |
713 if (must_raise_LCA) | |
714 LCA = raise_LCA_above_marks(LCA, load->_idx, early, _bbs); | |
715 if (LCA == early) return LCA; | |
716 | |
717 // Insert anti-dependence edges from 'load' to each store | |
718 // in the non-early LCA block. | |
719 // Mine the non_early_stores list for such stores. | |
720 if (LCA->raise_LCA_mark() == load_index) { | |
721 while (non_early_stores.size() > 0) { | |
722 Node* store = non_early_stores.pop(); | |
723 Block* store_block = _bbs[store->_idx]; | |
724 if (store_block == LCA) { | |
725 // add anti_dependence from store to load in its own block | |
726 assert(store != load->in(0), "dependence cycle found"); | |
727 if (verify) { | |
728 assert(store->find_edge(load) != -1, "missing precedence edge"); | |
729 } else { | |
730 store->add_prec(load); | |
731 } | |
732 } else { | |
733 assert(store_block->raise_LCA_mark() == load_index, "block was marked"); | |
734 // Any other stores we found must be either inside the new LCA | |
735 // or else outside the original LCA. In the latter case, they | |
736 // did not interfere with any use of 'load'. | |
737 assert(LCA->dominates(store_block) | |
738 || !LCA_orig->dominates(store_block), "no stray stores"); | |
739 } | |
740 } | |
741 } | |
742 | |
743 // Return the highest block containing stores; any stores | |
744 // within that block have been given anti-dependence edges. | |
745 return LCA; | |
746 } | |
747 | |
748 // This class is used to iterate backwards over the nodes in the graph. | |
749 | |
750 class Node_Backward_Iterator { | |
751 | |
752 private: | |
753 Node_Backward_Iterator(); | |
754 | |
755 public: | |
756 // Constructor for the iterator | |
757 Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs); | |
758 | |
759 // Postincrement operator to iterate over the nodes | |
760 Node *next(); | |
761 | |
762 private: | |
763 VectorSet &_visited; | |
764 Node_List &_stack; | |
765 Block_Array &_bbs; | |
766 }; | |
767 | |
768 // Constructor for the Node_Backward_Iterator | |
769 Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs ) | |
770 : _visited(visited), _stack(stack), _bbs(bbs) { | |
771 // The stack should contain exactly the root | |
772 stack.clear(); | |
773 stack.push(root); | |
774 | |
775 // Clear the visited bits | |
776 visited.Clear(); | |
777 } | |
778 | |
779 // Iterator for the Node_Backward_Iterator | |
780 Node *Node_Backward_Iterator::next() { | |
781 | |
782 // If the _stack is empty, then just return NULL: finished. | |
783 if ( !_stack.size() ) | |
784 return NULL; | |
785 | |
786 // '_stack' is emulating a real _stack. The 'visit-all-users' loop has been | |
787 // made stateless, so I do not need to record the index 'i' on my _stack. | |
788 // Instead I visit all users each time, scanning for unvisited users. | |
789 // I visit unvisited not-anti-dependence users first, then anti-dependent | |
790 // children next. | |
791 Node *self = _stack.pop(); | |
792 | |
793 // I cycle here when I am entering a deeper level of recursion. | |
794 // The key variable 'self' was set prior to jumping here. | |
795 while( 1 ) { | |
796 | |
797 _visited.set(self->_idx); | |
798 | |
799 // Now schedule all uses as late as possible. | |
800 uint src = self->is_Proj() ? self->in(0)->_idx : self->_idx; | |
801 uint src_rpo = _bbs[src]->_rpo; | |
802 | |
803 // Schedule all nodes in a post-order visit | |
804 Node *unvisited = NULL; // Unvisited anti-dependent Node, if any | |
805 | |
806 // Scan for unvisited nodes | |
807 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { | |
808 // For all uses, schedule late | |
809 Node* n = self->fast_out(i); // Use | |
810 | |
811 // Skip already visited children | |
812 if ( _visited.test(n->_idx) ) | |
813 continue; | |
814 | |
815 // do not traverse backward control edges | |
816 Node *use = n->is_Proj() ? n->in(0) : n; | |
817 uint use_rpo = _bbs[use->_idx]->_rpo; | |
818 | |
819 if ( use_rpo < src_rpo ) | |
820 continue; | |
821 | |
822 // Phi nodes always precede uses in a basic block | |
823 if ( use_rpo == src_rpo && use->is_Phi() ) | |
824 continue; | |
825 | |
826 unvisited = n; // Found unvisited | |
827 | |
828 // Check for possible-anti-dependent | |
829 if( !n->needs_anti_dependence_check() ) | |
830 break; // Not visited, not anti-dep; schedule it NOW | |
831 } | |
832 | |
833 // Did I find an unvisited not-anti-dependent Node? | |
834 if ( !unvisited ) | |
835 break; // All done with children; post-visit 'self' | |
836 | |
837 // Visit the unvisited Node. Contains the obvious push to | |
838 // indicate I'm entering a deeper level of recursion. I push the | |
839 // old state onto the _stack and set a new state and loop (recurse). | |
840 _stack.push(self); | |
841 self = unvisited; | |
842 } // End recursion loop | |
843 | |
844 return self; | |
845 } | |
846 | |
847 //------------------------------ComputeLatenciesBackwards---------------------- | |
848 // Compute the latency of all the instructions. | |
849 void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) { | |
850 #ifndef PRODUCT | |
851 if (trace_opto_pipelining()) | |
852 tty->print("\n#---- ComputeLatenciesBackwards ----\n"); | |
853 #endif | |
854 | |
855 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); | |
856 Node *n; | |
857 | |
858 // Walk over all the nodes from last to first | |
859 while (n = iter.next()) { | |
860 // Set the latency for the definitions of this instruction | |
861 partial_latency_of_defs(n); | |
862 } | |
863 } // end ComputeLatenciesBackwards | |
864 | |
865 //------------------------------partial_latency_of_defs------------------------ | |
866 // Compute the latency impact of this node on all defs. This computes | |
867 // a number that increases as we approach the beginning of the routine. | |
868 void PhaseCFG::partial_latency_of_defs(Node *n) { | |
869 // Set the latency for this instruction | |
870 #ifndef PRODUCT | |
871 if (trace_opto_pipelining()) { | |
872 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", | |
1685 | 873 n->_idx, _node_latency->at_grow(n->_idx)); |
0 | 874 dump(); |
875 } | |
876 #endif | |
877 | |
878 if (n->is_Proj()) | |
879 n = n->in(0); | |
880 | |
881 if (n->is_Root()) | |
882 return; | |
883 | |
884 uint nlen = n->len(); | |
1685 | 885 uint use_latency = _node_latency->at_grow(n->_idx); |
0 | 886 uint use_pre_order = _bbs[n->_idx]->_pre_order; |
887 | |
888 for ( uint j=0; j<nlen; j++ ) { | |
889 Node *def = n->in(j); | |
890 | |
891 if (!def || def == n) | |
892 continue; | |
893 | |
894 // Walk backwards thru projections | |
895 if (def->is_Proj()) | |
896 def = def->in(0); | |
897 | |
898 #ifndef PRODUCT | |
899 if (trace_opto_pipelining()) { | |
900 tty->print("# in(%2d): ", j); | |
901 def->dump(); | |
902 } | |
903 #endif | |
904 | |
905 // If the defining block is not known, assume it is ok | |
906 Block *def_block = _bbs[def->_idx]; | |
907 uint def_pre_order = def_block ? def_block->_pre_order : 0; | |
908 | |
909 if ( (use_pre_order < def_pre_order) || | |
910 (use_pre_order == def_pre_order && n->is_Phi()) ) | |
911 continue; | |
912 | |
913 uint delta_latency = n->latency(j); | |
914 uint current_latency = delta_latency + use_latency; | |
915 | |
1685 | 916 if (_node_latency->at_grow(def->_idx) < current_latency) { |
917 _node_latency->at_put_grow(def->_idx, current_latency); | |
0 | 918 } |
919 | |
920 #ifndef PRODUCT | |
921 if (trace_opto_pipelining()) { | |
922 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", | |
923 use_latency, j, delta_latency, current_latency, def->_idx, | |
1685 | 924 _node_latency->at_grow(def->_idx)); |
0 | 925 } |
926 #endif | |
927 } | |
928 } | |
929 | |
930 //------------------------------latency_from_use------------------------------- | |
931 // Compute the latency of a specific use | |
932 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) { | |
933 // If self-reference, return no latency | |
934 if (use == n || use->is_Root()) | |
935 return 0; | |
936 | |
937 uint def_pre_order = _bbs[def->_idx]->_pre_order; | |
938 uint latency = 0; | |
939 | |
940 // If the use is not a projection, then it is simple... | |
941 if (!use->is_Proj()) { | |
942 #ifndef PRODUCT | |
943 if (trace_opto_pipelining()) { | |
944 tty->print("# out(): "); | |
945 use->dump(); | |
946 } | |
947 #endif | |
948 | |
949 uint use_pre_order = _bbs[use->_idx]->_pre_order; | |
950 | |
951 if (use_pre_order < def_pre_order) | |
952 return 0; | |
953 | |
954 if (use_pre_order == def_pre_order && use->is_Phi()) | |
955 return 0; | |
956 | |
957 uint nlen = use->len(); | |
1685 | 958 uint nl = _node_latency->at_grow(use->_idx); |
0 | 959 |
960 for ( uint j=0; j<nlen; j++ ) { | |
961 if (use->in(j) == n) { | |
962 // Change this if we want local latencies | |
963 uint ul = use->latency(j); | |
964 uint l = ul + nl; | |
965 if (latency < l) latency = l; | |
966 #ifndef PRODUCT | |
967 if (trace_opto_pipelining()) { | |
968 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, latency = %d", | |
969 nl, j, ul, l, latency); | |
970 } | |
971 #endif | |
972 } | |
973 } | |
974 } else { | |
975 // This is a projection, just grab the latency of the use(s) | |
976 for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) { | |
977 uint l = latency_from_use(use, def, use->fast_out(j)); | |
978 if (latency < l) latency = l; | |
979 } | |
980 } | |
981 | |
982 return latency; | |
983 } | |
984 | |
985 //------------------------------latency_from_uses------------------------------ | |
986 // Compute the latency of this instruction relative to all of it's uses. | |
987 // This computes a number that increases as we approach the beginning of the | |
988 // routine. | |
989 void PhaseCFG::latency_from_uses(Node *n) { | |
990 // Set the latency for this instruction | |
991 #ifndef PRODUCT | |
992 if (trace_opto_pipelining()) { | |
993 tty->print("# latency_from_outputs: node_latency[%d] = %d for node", | |
1685 | 994 n->_idx, _node_latency->at_grow(n->_idx)); |
0 | 995 dump(); |
996 } | |
997 #endif | |
998 uint latency=0; | |
999 const Node *def = n->is_Proj() ? n->in(0): n; | |
1000 | |
1001 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |
1002 uint l = latency_from_use(n, def, n->fast_out(i)); | |
1003 | |
1004 if (latency < l) latency = l; | |
1005 } | |
1006 | |
1685 | 1007 _node_latency->at_put_grow(n->_idx, latency); |
0 | 1008 } |
1009 | |
1010 //------------------------------hoist_to_cheaper_block------------------------- | |
1011 // Pick a block for node self, between early and LCA, that is a cheaper | |
1012 // alternative to LCA. | |
1013 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { | |
1014 const double delta = 1+PROB_UNLIKELY_MAG(4); | |
1015 Block* least = LCA; | |
1016 double least_freq = least->_freq; | |
1685 | 1017 uint target = _node_latency->at_grow(self->_idx); |
1018 uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx); | |
1019 uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx); | |
0 | 1020 bool in_latency = (target <= start_latency); |
1021 const Block* root_block = _bbs[_root->_idx]; | |
1022 | |
1023 // Turn off latency scheduling if scheduling is just plain off | |
1024 if (!C->do_scheduling()) | |
1025 in_latency = true; | |
1026 | |
1027 // Do not hoist (to cover latency) instructions which target a | |
1028 // single register. Hoisting stretches the live range of the | |
1029 // single register and may force spilling. | |
1030 MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL; | |
1031 if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty()) | |
1032 in_latency = true; | |
1033 | |
1034 #ifndef PRODUCT | |
1035 if (trace_opto_pipelining()) { | |
1036 tty->print("# Find cheaper block for latency %d: ", | |
1685 | 1037 _node_latency->at_grow(self->_idx)); |
0 | 1038 self->dump(); |
1039 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | |
1040 LCA->_pre_order, | |
1041 LCA->_nodes[0]->_idx, | |
1042 start_latency, | |
1043 LCA->_nodes[LCA->end_idx()]->_idx, | |
1044 end_latency, | |
1045 least_freq); | |
1046 } | |
1047 #endif | |
1048 | |
1049 // Walk up the dominator tree from LCA (Lowest common ancestor) to | |
1050 // the earliest legal location. Capture the least execution frequency. | |
1051 while (LCA != early) { | |
1052 LCA = LCA->_idom; // Follow up the dominator tree | |
1053 | |
1054 if (LCA == NULL) { | |
1055 // Bailout without retry | |
1056 C->record_method_not_compilable("late schedule failed: LCA == NULL"); | |
1057 return least; | |
1058 } | |
1059 | |
1060 // Don't hoist machine instructions to the root basic block | |
1061 if (mach && LCA == root_block) | |
1062 break; | |
1063 | |
1685 | 1064 uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx); |
0 | 1065 uint end_idx = LCA->end_idx(); |
1685 | 1066 uint end_lat = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx); |
0 | 1067 double LCA_freq = LCA->_freq; |
1068 #ifndef PRODUCT | |
1069 if (trace_opto_pipelining()) { | |
1070 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | |
1071 LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq); | |
1072 } | |
1073 #endif | |
1074 if (LCA_freq < least_freq || // Better Frequency | |
1075 ( !in_latency && // No block containing latency | |
1076 LCA_freq < least_freq * delta && // No worse frequency | |
1077 target >= end_lat && // within latency range | |
1078 !self->is_iteratively_computed() ) // But don't hoist IV increments | |
1079 // because they may end up above other uses of their phi forcing | |
1080 // their result register to be different from their input. | |
1081 ) { | |
1082 least = LCA; // Found cheaper block | |
1083 least_freq = LCA_freq; | |
1084 start_latency = start_lat; | |
1085 end_latency = end_lat; | |
1086 if (target <= start_lat) | |
1087 in_latency = true; | |
1088 } | |
1089 } | |
1090 | |
1091 #ifndef PRODUCT | |
1092 if (trace_opto_pipelining()) { | |
1093 tty->print_cr("# Choose block B%d with start latency=%d and freq=%g", | |
1094 least->_pre_order, start_latency, least_freq); | |
1095 } | |
1096 #endif | |
1097 | |
1098 // See if the latency needs to be updated | |
1099 if (target < end_latency) { | |
1100 #ifndef PRODUCT | |
1101 if (trace_opto_pipelining()) { | |
1102 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); | |
1103 } | |
1104 #endif | |
1685 | 1105 _node_latency->at_put_grow(self->_idx, end_latency); |
0 | 1106 partial_latency_of_defs(self); |
1107 } | |
1108 | |
1109 return least; | |
1110 } | |
1111 | |
1112 | |
1113 //------------------------------schedule_late----------------------------------- | |
1114 // Now schedule all codes as LATE as possible. This is the LCA in the | |
1115 // dominator tree of all USES of a value. Pick the block with the least | |
1116 // loop nesting depth that is lowest in the dominator tree. | |
1117 extern const char must_clone[]; | |
1118 void PhaseCFG::schedule_late(VectorSet &visited, Node_List &stack) { | |
1119 #ifndef PRODUCT | |
1120 if (trace_opto_pipelining()) | |
1121 tty->print("\n#---- schedule_late ----\n"); | |
1122 #endif | |
1123 | |
1124 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); | |
1125 Node *self; | |
1126 | |
1127 // Walk over all the nodes from last to first | |
1128 while (self = iter.next()) { | |
1129 Block* early = _bbs[self->_idx]; // Earliest legal placement | |
1130 | |
1131 if (self->is_top()) { | |
1132 // Top node goes in bb #2 with other constants. | |
1133 // It must be special-cased, because it has no out edges. | |
1134 early->add_inst(self); | |
1135 continue; | |
1136 } | |
1137 | |
1138 // No uses, just terminate | |
1139 if (self->outcnt() == 0) { | |
3842 | 1140 assert(self->is_MachProj(), "sanity"); |
0 | 1141 continue; // Must be a dead machine projection |
1142 } | |
1143 | |
1144 // If node is pinned in the block, then no scheduling can be done. | |
1145 if( self->pinned() ) // Pinned in block? | |
1146 continue; | |
1147 | |
1148 MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL; | |
1149 if (mach) { | |
1150 switch (mach->ideal_Opcode()) { | |
1151 case Op_CreateEx: | |
1152 // Don't move exception creation | |
1153 early->add_inst(self); | |
1154 continue; | |
1155 break; | |
1156 case Op_CheckCastPP: | |
1157 // Don't move CheckCastPP nodes away from their input, if the input | |
1158 // is a rawptr (5071820). | |
1159 Node *def = self->in(1); | |
1160 if (def != NULL && def->bottom_type()->base() == Type::RawPtr) { | |
1161 early->add_inst(self); | |
833
acba6af809c8
6840775: Multiple JVM crashes seen with 1.6.0_10 through 1.6.0_14
kvn
parents:
788
diff
changeset
|
1162 #ifdef ASSERT |
acba6af809c8
6840775: Multiple JVM crashes seen with 1.6.0_10 through 1.6.0_14
kvn
parents:
788
diff
changeset
|
1163 _raw_oops.push(def); |
acba6af809c8
6840775: Multiple JVM crashes seen with 1.6.0_10 through 1.6.0_14
kvn
parents:
788
diff
changeset
|
1164 #endif |
0 | 1165 continue; |
1166 } | |
1167 break; | |
1168 } | |
1169 } | |
1170 | |
1171 // Gather LCA of all uses | |
1172 Block *LCA = NULL; | |
1173 { | |
1174 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { | |
1175 // For all uses, find LCA | |
1176 Node* use = self->fast_out(i); | |
1177 LCA = raise_LCA_above_use(LCA, use, self, _bbs); | |
1178 } | |
1179 } // (Hide defs of imax, i from rest of block.) | |
1180 | |
1181 // Place temps in the block of their use. This isn't a | |
1182 // requirement for correctness but it reduces useless | |
1183 // interference between temps and other nodes. | |
1184 if (mach != NULL && mach->is_MachTemp()) { | |
1185 _bbs.map(self->_idx, LCA); | |
1186 LCA->add_inst(self); | |
1187 continue; | |
1188 } | |
1189 | |
1190 // Check if 'self' could be anti-dependent on memory | |
1191 if (self->needs_anti_dependence_check()) { | |
1192 // Hoist LCA above possible-defs and insert anti-dependences to | |
1193 // defs in new LCA block. | |
1194 LCA = insert_anti_dependences(LCA, self); | |
1195 } | |
1196 | |
1197 if (early->_dom_depth > LCA->_dom_depth) { | |
1198 // Somehow the LCA has moved above the earliest legal point. | |
1199 // (One way this can happen is via memory_early_block.) | |
1200 if (C->subsume_loads() == true && !C->failing()) { | |
1201 // Retry with subsume_loads == false | |
1202 // If this is the first failure, the sentinel string will "stick" | |
1203 // to the Compile object, and the C2Compiler will see it and retry. | |
1204 C->record_failure(C2Compiler::retry_no_subsuming_loads()); | |
1205 } else { | |
1206 // Bailout without retry when (early->_dom_depth > LCA->_dom_depth) | |
1207 C->record_method_not_compilable("late schedule failed: incorrect graph"); | |
1208 } | |
1209 return; | |
1210 } | |
1211 | |
1212 // If there is no opportunity to hoist, then we're done. | |
1213 bool try_to_hoist = (LCA != early); | |
1214 | |
1215 // Must clone guys stay next to use; no hoisting allowed. | |
1216 // Also cannot hoist guys that alter memory or are otherwise not | |
1217 // allocatable (hoisting can make a value live longer, leading to | |
1218 // anti and output dependency problems which are normally resolved | |
1219 // by the register allocator giving everyone a different register). | |
1220 if (mach != NULL && must_clone[mach->ideal_Opcode()]) | |
1221 try_to_hoist = false; | |
1222 | |
1223 Block* late = NULL; | |
1224 if (try_to_hoist) { | |
1225 // Now find the block with the least execution frequency. | |
1226 // Start at the latest schedule and work up to the earliest schedule | |
1227 // in the dominator tree. Thus the Node will dominate all its uses. | |
1228 late = hoist_to_cheaper_block(LCA, early, self); | |
1229 } else { | |
1230 // Just use the LCA of the uses. | |
1231 late = LCA; | |
1232 } | |
1233 | |
1234 // Put the node into target block | |
1235 schedule_node_into_block(self, late); | |
1236 | |
1237 #ifdef ASSERT | |
1238 if (self->needs_anti_dependence_check()) { | |
1239 // since precedence edges are only inserted when we're sure they | |
1240 // are needed make sure that after placement in a block we don't | |
1241 // need any new precedence edges. | |
1242 verify_anti_dependences(late, self); | |
1243 } | |
1244 #endif | |
1245 } // Loop until all nodes have been visited | |
1246 | |
1247 } // end ScheduleLate | |
1248 | |
1249 //------------------------------GlobalCodeMotion------------------------------- | |
1250 void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) { | |
1251 ResourceMark rm; | |
1252 | |
1253 #ifndef PRODUCT | |
1254 if (trace_opto_pipelining()) { | |
1255 tty->print("\n---- Start GlobalCodeMotion ----\n"); | |
1256 } | |
1257 #endif | |
1258 | |
1259 // Initialize the bbs.map for things on the proj_list | |
1260 uint i; | |
1261 for( i=0; i < proj_list.size(); i++ ) | |
1262 _bbs.map(proj_list[i]->_idx, NULL); | |
1263 | |
1264 // Set the basic block for Nodes pinned into blocks | |
1265 Arena *a = Thread::current()->resource_area(); | |
1266 VectorSet visited(a); | |
1267 schedule_pinned_nodes( visited ); | |
1268 | |
1269 // Find the earliest Block any instruction can be placed in. Some | |
1270 // instructions are pinned into Blocks. Unpinned instructions can | |
1271 // appear in last block in which all their inputs occur. | |
1272 visited.Clear(); | |
1273 Node_List stack(a); | |
1274 stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list | |
1275 if (!schedule_early(visited, stack)) { | |
1276 // Bailout without retry | |
1277 C->record_method_not_compilable("early schedule failed"); | |
1278 return; | |
1279 } | |
1280 | |
1281 // Build Def-Use edges. | |
1282 proj_list.push(_root); // Add real root as another root | |
1283 proj_list.pop(); | |
1284 | |
1285 // Compute the latency information (via backwards walk) for all the | |
1286 // instructions in the graph | |
1685 | 1287 _node_latency = new GrowableArray<uint>(); // resource_area allocation |
0 | 1288 |
1289 if( C->do_scheduling() ) | |
1290 ComputeLatenciesBackwards(visited, stack); | |
1291 | |
1292 // Now schedule all codes as LATE as possible. This is the LCA in the | |
1293 // dominator tree of all USES of a value. Pick the block with the least | |
1294 // loop nesting depth that is lowest in the dominator tree. | |
1295 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) | |
1296 schedule_late(visited, stack); | |
1297 if( C->failing() ) { | |
1298 // schedule_late fails only when graph is incorrect. | |
1299 assert(!VerifyGraphEdges, "verification should have failed"); | |
1300 return; | |
1301 } | |
1302 | |
1303 unique = C->unique(); | |
1304 | |
1305 #ifndef PRODUCT | |
1306 if (trace_opto_pipelining()) { | |
1307 tty->print("\n---- Detect implicit null checks ----\n"); | |
1308 } | |
1309 #endif | |
1310 | |
1311 // Detect implicit-null-check opportunities. Basically, find NULL checks | |
1312 // with suitable memory ops nearby. Use the memory op to do the NULL check. | |
1313 // I can generate a memory op if there is not one nearby. | |
1314 if (C->is_method_compilation()) { | |
1315 // Don't do it for natives, adapters, or runtime stubs | |
1316 int allowed_reasons = 0; | |
1317 // ...and don't do it when there have been too many traps, globally. | |
1318 for (int reason = (int)Deoptimization::Reason_none+1; | |
1319 reason < Compile::trapHistLength; reason++) { | |
1320 assert(reason < BitsPerInt, "recode bit map"); | |
1321 if (!C->too_many_traps((Deoptimization::DeoptReason) reason)) | |
1322 allowed_reasons |= nth_bit(reason); | |
1323 } | |
1324 // By reversing the loop direction we get a very minor gain on mpegaudio. | |
1325 // Feel free to revert to a forward loop for clarity. | |
1326 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { | |
1327 for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) { | |
1328 Node *proj = matcher._null_check_tests[i ]; | |
1329 Node *val = matcher._null_check_tests[i+1]; | |
1330 _bbs[proj->_idx]->implicit_null_check(this, proj, val, allowed_reasons); | |
1331 // The implicit_null_check will only perform the transformation | |
1332 // if the null branch is truly uncommon, *and* it leads to an | |
1333 // uncommon trap. Combined with the too_many_traps guards | |
1334 // above, this prevents SEGV storms reported in 6366351, | |
1335 // by recompiling offending methods without this optimization. | |
1336 } | |
1337 } | |
1338 | |
1339 #ifndef PRODUCT | |
1340 if (trace_opto_pipelining()) { | |
1341 tty->print("\n---- Start Local Scheduling ----\n"); | |
1342 } | |
1343 #endif | |
1344 | |
1345 // Schedule locally. Right now a simple topological sort. | |
1346 // Later, do a real latency aware scheduler. | |
1347 int *ready_cnt = NEW_RESOURCE_ARRAY(int,C->unique()); | |
1348 memset( ready_cnt, -1, C->unique() * sizeof(int) ); | |
1349 visited.Clear(); | |
1350 for (i = 0; i < _num_blocks; i++) { | |
1351 if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) { | |
1352 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { | |
1353 C->record_method_not_compilable("local schedule failed"); | |
1354 } | |
1355 return; | |
1356 } | |
1357 } | |
1358 | |
1359 // If we inserted any instructions between a Call and his CatchNode, | |
1360 // clone the instructions on all paths below the Catch. | |
1361 for( i=0; i < _num_blocks; i++ ) | |
1362 _blocks[i]->call_catch_cleanup(_bbs); | |
1363 | |
1364 #ifndef PRODUCT | |
1365 if (trace_opto_pipelining()) { | |
1366 tty->print("\n---- After GlobalCodeMotion ----\n"); | |
1367 for (uint i = 0; i < _num_blocks; i++) { | |
1368 _blocks[i]->dump(); | |
1369 } | |
1370 } | |
1371 #endif | |
1685 | 1372 // Dead. |
1373 _node_latency = (GrowableArray<uint> *)0xdeadbeef; | |
0 | 1374 } |
1375 | |
1376 | |
1377 //------------------------------Estimate_Block_Frequency----------------------- | |
1378 // Estimate block frequencies based on IfNode probabilities. | |
1379 void PhaseCFG::Estimate_Block_Frequency() { | |
418 | 1380 |
1381 // Force conditional branches leading to uncommon traps to be unlikely, | |
1382 // not because we get to the uncommon_trap with less relative frequency, | |
1383 // but because an uncommon_trap typically causes a deopt, so we only get | |
1384 // there once. | |
1385 if (C->do_freq_based_layout()) { | |
1386 Block_List worklist; | |
1387 Block* root_blk = _blocks[0]; | |
1388 for (uint i = 1; i < root_blk->num_preds(); i++) { | |
1389 Block *pb = _bbs[root_blk->pred(i)->_idx]; | |
1390 if (pb->has_uncommon_code()) { | |
1391 worklist.push(pb); | |
1392 } | |
1393 } | |
1394 while (worklist.size() > 0) { | |
1395 Block* uct = worklist.pop(); | |
1396 if (uct == _broot) continue; | |
1397 for (uint i = 1; i < uct->num_preds(); i++) { | |
1398 Block *pb = _bbs[uct->pred(i)->_idx]; | |
1399 if (pb->_num_succs == 1) { | |
1400 worklist.push(pb); | |
1401 } else if (pb->num_fall_throughs() == 2) { | |
1402 pb->update_uncommon_branch(uct); | |
1403 } | |
1404 } | |
1405 } | |
1406 } | |
0 | 1407 |
1408 // Create the loop tree and calculate loop depth. | |
1409 _root_loop = create_loop_tree(); | |
1410 _root_loop->compute_loop_depth(0); | |
1411 | |
1412 // Compute block frequency of each block, relative to a single loop entry. | |
1413 _root_loop->compute_freq(); | |
1414 | |
1415 // Adjust all frequencies to be relative to a single method entry | |
418 | 1416 _root_loop->_freq = 1.0; |
0 | 1417 _root_loop->scale_freq(); |
1418 | |
673 | 1419 // Save outmost loop frequency for LRG frequency threshold |
1420 _outer_loop_freq = _root_loop->outer_loop_freq(); | |
1421 | |
0 | 1422 // force paths ending at uncommon traps to be infrequent |
418 | 1423 if (!C->do_freq_based_layout()) { |
1424 Block_List worklist; | |
1425 Block* root_blk = _blocks[0]; | |
1426 for (uint i = 1; i < root_blk->num_preds(); i++) { | |
1427 Block *pb = _bbs[root_blk->pred(i)->_idx]; | |
1428 if (pb->has_uncommon_code()) { | |
1429 worklist.push(pb); | |
1430 } | |
0 | 1431 } |
418 | 1432 while (worklist.size() > 0) { |
1433 Block* uct = worklist.pop(); | |
1434 uct->_freq = PROB_MIN; | |
1435 for (uint i = 1; i < uct->num_preds(); i++) { | |
1436 Block *pb = _bbs[uct->pred(i)->_idx]; | |
1437 if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { | |
1438 worklist.push(pb); | |
1439 } | |
0 | 1440 } |
1441 } | |
1442 } | |
1443 | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1444 #ifdef ASSERT |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1445 for (uint i = 0; i < _num_blocks; i++ ) { |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1446 Block *b = _blocks[i]; |
605 | 1447 assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1448 } |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1449 #endif |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1450 |
0 | 1451 #ifndef PRODUCT |
1452 if (PrintCFGBlockFreq) { | |
1453 tty->print_cr("CFG Block Frequencies"); | |
1454 _root_loop->dump_tree(); | |
1455 if (Verbose) { | |
1456 tty->print_cr("PhaseCFG dump"); | |
1457 dump(); | |
1458 tty->print_cr("Node dump"); | |
1459 _root->dump(99999); | |
1460 } | |
1461 } | |
1462 #endif | |
1463 } | |
1464 | |
1465 //----------------------------create_loop_tree-------------------------------- | |
1466 // Create a loop tree from the CFG | |
1467 CFGLoop* PhaseCFG::create_loop_tree() { | |
1468 | |
1469 #ifdef ASSERT | |
1470 assert( _blocks[0] == _broot, "" ); | |
1471 for (uint i = 0; i < _num_blocks; i++ ) { | |
1472 Block *b = _blocks[i]; | |
1473 // Check that _loop field are clear...we could clear them if not. | |
1474 assert(b->_loop == NULL, "clear _loop expected"); | |
1475 // Sanity check that the RPO numbering is reflected in the _blocks array. | |
1476 // It doesn't have to be for the loop tree to be built, but if it is not, | |
1477 // then the blocks have been reordered since dom graph building...which | |
1478 // may question the RPO numbering | |
1479 assert(b->_rpo == i, "unexpected reverse post order number"); | |
1480 } | |
1481 #endif | |
1482 | |
1483 int idct = 0; | |
1484 CFGLoop* root_loop = new CFGLoop(idct++); | |
1485 | |
1486 Block_List worklist; | |
1487 | |
1488 // Assign blocks to loops | |
1489 for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block | |
1490 Block *b = _blocks[i]; | |
1491 | |
1492 if (b->head()->is_Loop()) { | |
1493 Block* loop_head = b; | |
1494 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); | |
1495 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); | |
1496 Block* tail = _bbs[tail_n->_idx]; | |
1497 | |
1498 // Defensively filter out Loop nodes for non-single-entry loops. | |
1499 // For all reasonable loops, the head occurs before the tail in RPO. | |
1500 if (i <= tail->_rpo) { | |
1501 | |
1502 // The tail and (recursive) predecessors of the tail | |
1503 // are made members of a new loop. | |
1504 | |
1505 assert(worklist.size() == 0, "nonempty worklist"); | |
1506 CFGLoop* nloop = new CFGLoop(idct++); | |
1507 assert(loop_head->_loop == NULL, "just checking"); | |
1508 loop_head->_loop = nloop; | |
1509 // Add to nloop so push_pred() will skip over inner loops | |
1510 nloop->add_member(loop_head); | |
1511 nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, _bbs); | |
1512 | |
1513 while (worklist.size() > 0) { | |
1514 Block* member = worklist.pop(); | |
1515 if (member != loop_head) { | |
1516 for (uint j = 1; j < member->num_preds(); j++) { | |
1517 nloop->push_pred(member, j, worklist, _bbs); | |
1518 } | |
1519 } | |
1520 } | |
1521 } | |
1522 } | |
1523 } | |
1524 | |
1525 // Create a member list for each loop consisting | |
1526 // of both blocks and (immediate child) loops. | |
1527 for (uint i = 0; i < _num_blocks; i++) { | |
1528 Block *b = _blocks[i]; | |
1529 CFGLoop* lp = b->_loop; | |
1530 if (lp == NULL) { | |
1531 // Not assigned to a loop. Add it to the method's pseudo loop. | |
1532 b->_loop = root_loop; | |
1533 lp = root_loop; | |
1534 } | |
1535 if (lp == root_loop || b != lp->head()) { // loop heads are already members | |
1536 lp->add_member(b); | |
1537 } | |
1538 if (lp != root_loop) { | |
1539 if (lp->parent() == NULL) { | |
1540 // Not a nested loop. Make it a child of the method's pseudo loop. | |
1541 root_loop->add_nested_loop(lp); | |
1542 } | |
1543 if (b == lp->head()) { | |
1544 // Add nested loop to member list of parent loop. | |
1545 lp->parent()->add_member(lp); | |
1546 } | |
1547 } | |
1548 } | |
1549 | |
1550 return root_loop; | |
1551 } | |
1552 | |
1553 //------------------------------push_pred-------------------------------------- | |
1554 void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk) { | |
1555 Node* pred_n = blk->pred(i); | |
1556 Block* pred = node_to_blk[pred_n->_idx]; | |
1557 CFGLoop *pred_loop = pred->_loop; | |
1558 if (pred_loop == NULL) { | |
1559 // Filter out blocks for non-single-entry loops. | |
1560 // For all reasonable loops, the head occurs before the tail in RPO. | |
1561 if (pred->_rpo > head()->_rpo) { | |
1562 pred->_loop = this; | |
1563 worklist.push(pred); | |
1564 } | |
1565 } else if (pred_loop != this) { | |
1566 // Nested loop. | |
1567 while (pred_loop->_parent != NULL && pred_loop->_parent != this) { | |
1568 pred_loop = pred_loop->_parent; | |
1569 } | |
1570 // Make pred's loop be a child | |
1571 if (pred_loop->_parent == NULL) { | |
1572 add_nested_loop(pred_loop); | |
1573 // Continue with loop entry predecessor. | |
1574 Block* pred_head = pred_loop->head(); | |
1575 assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); | |
1576 assert(pred_head != head(), "loop head in only one loop"); | |
1577 push_pred(pred_head, LoopNode::EntryControl, worklist, node_to_blk); | |
1578 } else { | |
1579 assert(pred_loop->_parent == this && _parent == NULL, "just checking"); | |
1580 } | |
1581 } | |
1582 } | |
1583 | |
1584 //------------------------------add_nested_loop-------------------------------- | |
1585 // Make cl a child of the current loop in the loop tree. | |
1586 void CFGLoop::add_nested_loop(CFGLoop* cl) { | |
1587 assert(_parent == NULL, "no parent yet"); | |
1588 assert(cl != this, "not my own parent"); | |
1589 cl->_parent = this; | |
1590 CFGLoop* ch = _child; | |
1591 if (ch == NULL) { | |
1592 _child = cl; | |
1593 } else { | |
1594 while (ch->_sibling != NULL) { ch = ch->_sibling; } | |
1595 ch->_sibling = cl; | |
1596 } | |
1597 } | |
1598 | |
1599 //------------------------------compute_loop_depth----------------------------- | |
1600 // Store the loop depth in each CFGLoop object. | |
1601 // Recursively walk the children to do the same for them. | |
1602 void CFGLoop::compute_loop_depth(int depth) { | |
1603 _depth = depth; | |
1604 CFGLoop* ch = _child; | |
1605 while (ch != NULL) { | |
1606 ch->compute_loop_depth(depth + 1); | |
1607 ch = ch->_sibling; | |
1608 } | |
1609 } | |
1610 | |
1611 //------------------------------compute_freq----------------------------------- | |
1612 // Compute the frequency of each block and loop, relative to a single entry | |
1613 // into the dominating loop head. | |
1614 void CFGLoop::compute_freq() { | |
1615 // Bottom up traversal of loop tree (visit inner loops first.) | |
1616 // Set loop head frequency to 1.0, then transitively | |
1617 // compute frequency for all successors in the loop, | |
1618 // as well as for each exit edge. Inner loops are | |
1619 // treated as single blocks with loop exit targets | |
1620 // as the successor blocks. | |
1621 | |
1622 // Nested loops first | |
1623 CFGLoop* ch = _child; | |
1624 while (ch != NULL) { | |
1625 ch->compute_freq(); | |
1626 ch = ch->_sibling; | |
1627 } | |
1628 assert (_members.length() > 0, "no empty loops"); | |
1629 Block* hd = head(); | |
1630 hd->_freq = 1.0f; | |
1631 for (int i = 0; i < _members.length(); i++) { | |
1632 CFGElement* s = _members.at(i); | |
1633 float freq = s->_freq; | |
1634 if (s->is_block()) { | |
1635 Block* b = s->as_Block(); | |
1636 for (uint j = 0; j < b->_num_succs; j++) { | |
1637 Block* sb = b->_succs[j]; | |
1638 update_succ_freq(sb, freq * b->succ_prob(j)); | |
1639 } | |
1640 } else { | |
1641 CFGLoop* lp = s->as_CFGLoop(); | |
1642 assert(lp->_parent == this, "immediate child"); | |
1643 for (int k = 0; k < lp->_exits.length(); k++) { | |
1644 Block* eb = lp->_exits.at(k).get_target(); | |
1645 float prob = lp->_exits.at(k).get_prob(); | |
1646 update_succ_freq(eb, freq * prob); | |
1647 } | |
1648 } | |
1649 } | |
1650 | |
1651 // For all loops other than the outer, "method" loop, | |
1652 // sum and normalize the exit probability. The "method" loop | |
1653 // should keep the initial exit probability of 1, so that | |
1654 // inner blocks do not get erroneously scaled. | |
1655 if (_depth != 0) { | |
1656 // Total the exit probabilities for this loop. | |
1657 float exits_sum = 0.0f; | |
1658 for (int i = 0; i < _exits.length(); i++) { | |
1659 exits_sum += _exits.at(i).get_prob(); | |
1660 } | |
1661 | |
1662 // Normalize the exit probabilities. Until now, the | |
1663 // probabilities estimate the possibility of exit per | |
1664 // a single loop iteration; afterward, they estimate | |
1665 // the probability of exit per loop entry. | |
1666 for (int i = 0; i < _exits.length(); i++) { | |
1667 Block* et = _exits.at(i).get_target(); | |
418 | 1668 float new_prob = 0.0f; |
1669 if (_exits.at(i).get_prob() > 0.0f) { | |
1670 new_prob = _exits.at(i).get_prob() / exits_sum; | |
1671 } | |
0 | 1672 BlockProbPair bpp(et, new_prob); |
1673 _exits.at_put(i, bpp); | |
1674 } | |
1675 | |
418 | 1676 // Save the total, but guard against unreasonable probability, |
0 | 1677 // as the value is used to estimate the loop trip count. |
1678 // An infinite trip count would blur relative block | |
1679 // frequencies. | |
1680 if (exits_sum > 1.0f) exits_sum = 1.0; | |
1681 if (exits_sum < PROB_MIN) exits_sum = PROB_MIN; | |
1682 _exit_prob = exits_sum; | |
1683 } | |
1684 } | |
1685 | |
1686 //------------------------------succ_prob------------------------------------- | |
1687 // Determine the probability of reaching successor 'i' from the receiver block. | |
1688 float Block::succ_prob(uint i) { | |
1689 int eidx = end_idx(); | |
1690 Node *n = _nodes[eidx]; // Get ending Node | |
308 | 1691 |
1692 int op = n->Opcode(); | |
1693 if (n->is_Mach()) { | |
1694 if (n->is_MachNullCheck()) { | |
1695 // Can only reach here if called after lcm. The original Op_If is gone, | |
1696 // so we attempt to infer the probability from one or both of the | |
1697 // successor blocks. | |
1698 assert(_num_succs == 2, "expecting 2 successors of a null check"); | |
1699 // If either successor has only one predecessor, then the | |
605 | 1700 // probability estimate can be derived using the |
308 | 1701 // relative frequency of the successor and this block. |
1702 if (_succs[i]->num_preds() == 2) { | |
1703 return _succs[i]->_freq / _freq; | |
1704 } else if (_succs[1-i]->num_preds() == 2) { | |
1705 return 1 - (_succs[1-i]->_freq / _freq); | |
1706 } else { | |
1707 // Estimate using both successor frequencies | |
1708 float freq = _succs[i]->_freq; | |
1709 return freq / (freq + _succs[1-i]->_freq); | |
1710 } | |
1711 } | |
1712 op = n->as_Mach()->ideal_Opcode(); | |
1713 } | |
1714 | |
0 | 1715 |
1716 // Switch on branch type | |
1717 switch( op ) { | |
1718 case Op_CountedLoopEnd: | |
1719 case Op_If: { | |
1720 assert (i < 2, "just checking"); | |
1721 // Conditionals pass on only part of their frequency | |
1722 float prob = n->as_MachIf()->_prob; | |
1723 assert(prob >= 0.0 && prob <= 1.0, "out of range probability"); | |
1724 // If succ[i] is the FALSE branch, invert path info | |
1725 if( _nodes[i + eidx + 1]->Opcode() == Op_IfFalse ) { | |
1726 return 1.0f - prob; // not taken | |
1727 } else { | |
1728 return prob; // taken | |
1729 } | |
1730 } | |
1731 | |
1732 case Op_Jump: | |
1733 // Divide the frequency between all successors evenly | |
1734 return 1.0f/_num_succs; | |
1735 | |
1736 case Op_Catch: { | |
1737 const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj(); | |
1738 if (ci->_con == CatchProjNode::fall_through_index) { | |
1739 // Fall-thru path gets the lion's share. | |
1740 return 1.0f - PROB_UNLIKELY_MAG(5)*_num_succs; | |
1741 } else { | |
1742 // Presume exceptional paths are equally unlikely | |
1743 return PROB_UNLIKELY_MAG(5); | |
1744 } | |
1745 } | |
1746 | |
1747 case Op_Root: | |
1748 case Op_Goto: | |
1749 // Pass frequency straight thru to target | |
1750 return 1.0f; | |
1751 | |
1752 case Op_NeverBranch: | |
1753 return 0.0f; | |
1754 | |
1755 case Op_TailCall: | |
1756 case Op_TailJump: | |
1757 case Op_Return: | |
1758 case Op_Halt: | |
1759 case Op_Rethrow: | |
1760 // Do not push out freq to root block | |
1761 return 0.0f; | |
1762 | |
1763 default: | |
1764 ShouldNotReachHere(); | |
1765 } | |
1766 | |
1767 return 0.0f; | |
1768 } | |
1769 | |
418 | 1770 //------------------------------num_fall_throughs----------------------------- |
1771 // Return the number of fall-through candidates for a block | |
1772 int Block::num_fall_throughs() { | |
1773 int eidx = end_idx(); | |
1774 Node *n = _nodes[eidx]; // Get ending Node | |
1775 | |
1776 int op = n->Opcode(); | |
1777 if (n->is_Mach()) { | |
1778 if (n->is_MachNullCheck()) { | |
1779 // In theory, either side can fall-thru, for simplicity sake, | |
1780 // let's say only the false branch can now. | |
1781 return 1; | |
1782 } | |
1783 op = n->as_Mach()->ideal_Opcode(); | |
1784 } | |
1785 | |
1786 // Switch on branch type | |
1787 switch( op ) { | |
1788 case Op_CountedLoopEnd: | |
1789 case Op_If: | |
1790 return 2; | |
1791 | |
1792 case Op_Root: | |
1793 case Op_Goto: | |
1794 return 1; | |
1795 | |
1796 case Op_Catch: { | |
1797 for (uint i = 0; i < _num_succs; i++) { | |
1798 const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj(); | |
1799 if (ci->_con == CatchProjNode::fall_through_index) { | |
1800 return 1; | |
1801 } | |
1802 } | |
1803 return 0; | |
1804 } | |
1805 | |
1806 case Op_Jump: | |
1807 case Op_NeverBranch: | |
1808 case Op_TailCall: | |
1809 case Op_TailJump: | |
1810 case Op_Return: | |
1811 case Op_Halt: | |
1812 case Op_Rethrow: | |
1813 return 0; | |
1814 | |
1815 default: | |
1816 ShouldNotReachHere(); | |
1817 } | |
1818 | |
1819 return 0; | |
1820 } | |
1821 | |
1822 //------------------------------succ_fall_through----------------------------- | |
1823 // Return true if a specific successor could be fall-through target. | |
1824 bool Block::succ_fall_through(uint i) { | |
1825 int eidx = end_idx(); | |
1826 Node *n = _nodes[eidx]; // Get ending Node | |
1827 | |
1828 int op = n->Opcode(); | |
1829 if (n->is_Mach()) { | |
1830 if (n->is_MachNullCheck()) { | |
1831 // In theory, either side can fall-thru, for simplicity sake, | |
1832 // let's say only the false branch can now. | |
1833 return _nodes[i + eidx + 1]->Opcode() == Op_IfFalse; | |
1834 } | |
1835 op = n->as_Mach()->ideal_Opcode(); | |
1836 } | |
1837 | |
1838 // Switch on branch type | |
1839 switch( op ) { | |
1840 case Op_CountedLoopEnd: | |
1841 case Op_If: | |
1842 case Op_Root: | |
1843 case Op_Goto: | |
1844 return true; | |
1845 | |
1846 case Op_Catch: { | |
1847 const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj(); | |
1848 return ci->_con == CatchProjNode::fall_through_index; | |
1849 } | |
1850 | |
1851 case Op_Jump: | |
1852 case Op_NeverBranch: | |
1853 case Op_TailCall: | |
1854 case Op_TailJump: | |
1855 case Op_Return: | |
1856 case Op_Halt: | |
1857 case Op_Rethrow: | |
1858 return false; | |
1859 | |
1860 default: | |
1861 ShouldNotReachHere(); | |
1862 } | |
1863 | |
1864 return false; | |
1865 } | |
1866 | |
1867 //------------------------------update_uncommon_branch------------------------ | |
1868 // Update the probability of a two-branch to be uncommon | |
1869 void Block::update_uncommon_branch(Block* ub) { | |
1870 int eidx = end_idx(); | |
1871 Node *n = _nodes[eidx]; // Get ending Node | |
1872 | |
1873 int op = n->as_Mach()->ideal_Opcode(); | |
1874 | |
1875 assert(op == Op_CountedLoopEnd || op == Op_If, "must be a If"); | |
1876 assert(num_fall_throughs() == 2, "must be a two way branch block"); | |
1877 | |
1878 // Which successor is ub? | |
1879 uint s; | |
1880 for (s = 0; s <_num_succs; s++) { | |
1881 if (_succs[s] == ub) break; | |
1882 } | |
1883 assert(s < 2, "uncommon successor must be found"); | |
1884 | |
1885 // If ub is the true path, make the proability small, else | |
1886 // ub is the false path, and make the probability large | |
1887 bool invert = (_nodes[s + eidx + 1]->Opcode() == Op_IfFalse); | |
1888 | |
1889 // Get existing probability | |
1890 float p = n->as_MachIf()->_prob; | |
1891 | |
1892 if (invert) p = 1.0 - p; | |
1893 if (p > PROB_MIN) { | |
1894 p = PROB_MIN; | |
1895 } | |
1896 if (invert) p = 1.0 - p; | |
1897 | |
1898 n->as_MachIf()->_prob = p; | |
1899 } | |
1900 | |
0 | 1901 //------------------------------update_succ_freq------------------------------- |
605 | 1902 // Update the appropriate frequency associated with block 'b', a successor of |
0 | 1903 // a block in this loop. |
1904 void CFGLoop::update_succ_freq(Block* b, float freq) { | |
1905 if (b->_loop == this) { | |
1906 if (b == head()) { | |
1907 // back branch within the loop | |
1908 // Do nothing now, the loop carried frequency will be | |
1909 // adjust later in scale_freq(). | |
1910 } else { | |
1911 // simple branch within the loop | |
1912 b->_freq += freq; | |
1913 } | |
1914 } else if (!in_loop_nest(b)) { | |
1915 // branch is exit from this loop | |
1916 BlockProbPair bpp(b, freq); | |
1917 _exits.append(bpp); | |
1918 } else { | |
1919 // branch into nested loop | |
1920 CFGLoop* ch = b->_loop; | |
1921 ch->_freq += freq; | |
1922 } | |
1923 } | |
1924 | |
1925 //------------------------------in_loop_nest----------------------------------- | |
1926 // Determine if block b is in the receiver's loop nest. | |
1927 bool CFGLoop::in_loop_nest(Block* b) { | |
1928 int depth = _depth; | |
1929 CFGLoop* b_loop = b->_loop; | |
1930 int b_depth = b_loop->_depth; | |
1931 if (depth == b_depth) { | |
1932 return true; | |
1933 } | |
1934 while (b_depth > depth) { | |
1935 b_loop = b_loop->_parent; | |
1936 b_depth = b_loop->_depth; | |
1937 } | |
1938 return b_loop == this; | |
1939 } | |
1940 | |
1941 //------------------------------scale_freq------------------------------------- | |
1942 // Scale frequency of loops and blocks by trip counts from outer loops | |
1943 // Do a top down traversal of loop tree (visit outer loops first.) | |
1944 void CFGLoop::scale_freq() { | |
1945 float loop_freq = _freq * trip_count(); | |
673 | 1946 _freq = loop_freq; |
0 | 1947 for (int i = 0; i < _members.length(); i++) { |
1948 CFGElement* s = _members.at(i); | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1949 float block_freq = s->_freq * loop_freq; |
621 | 1950 if (g_isnan(block_freq) || block_freq < MIN_BLOCK_FREQUENCY) |
1951 block_freq = MIN_BLOCK_FREQUENCY; | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1952 s->_freq = block_freq; |
0 | 1953 } |
1954 CFGLoop* ch = _child; | |
1955 while (ch != NULL) { | |
1956 ch->scale_freq(); | |
1957 ch = ch->_sibling; | |
1958 } | |
1959 } | |
1960 | |
673 | 1961 // Frequency of outer loop |
1962 float CFGLoop::outer_loop_freq() const { | |
1963 if (_child != NULL) { | |
1964 return _child->_freq; | |
1965 } | |
1966 return _freq; | |
1967 } | |
1968 | |
0 | 1969 #ifndef PRODUCT |
1970 //------------------------------dump_tree-------------------------------------- | |
1971 void CFGLoop::dump_tree() const { | |
1972 dump(); | |
1973 if (_child != NULL) _child->dump_tree(); | |
1974 if (_sibling != NULL) _sibling->dump_tree(); | |
1975 } | |
1976 | |
1977 //------------------------------dump------------------------------------------- | |
1978 void CFGLoop::dump() const { | |
1979 for (int i = 0; i < _depth; i++) tty->print(" "); | |
1980 tty->print("%s: %d trip_count: %6.0f freq: %6.0f\n", | |
1981 _depth == 0 ? "Method" : "Loop", _id, trip_count(), _freq); | |
1982 for (int i = 0; i < _depth; i++) tty->print(" "); | |
1983 tty->print(" members:", _id); | |
1984 int k = 0; | |
1985 for (int i = 0; i < _members.length(); i++) { | |
1986 if (k++ >= 6) { | |
1987 tty->print("\n "); | |
1988 for (int j = 0; j < _depth+1; j++) tty->print(" "); | |
1989 k = 0; | |
1990 } | |
1991 CFGElement *s = _members.at(i); | |
1992 if (s->is_block()) { | |
1993 Block *b = s->as_Block(); | |
1994 tty->print(" B%d(%6.3f)", b->_pre_order, b->_freq); | |
1995 } else { | |
1996 CFGLoop* lp = s->as_CFGLoop(); | |
1997 tty->print(" L%d(%6.3f)", lp->_id, lp->_freq); | |
1998 } | |
1999 } | |
2000 tty->print("\n"); | |
2001 for (int i = 0; i < _depth; i++) tty->print(" "); | |
2002 tty->print(" exits: "); | |
2003 k = 0; | |
2004 for (int i = 0; i < _exits.length(); i++) { | |
2005 if (k++ >= 7) { | |
2006 tty->print("\n "); | |
2007 for (int j = 0; j < _depth+1; j++) tty->print(" "); | |
2008 k = 0; | |
2009 } | |
2010 Block *blk = _exits.at(i).get_target(); | |
2011 float prob = _exits.at(i).get_prob(); | |
2012 tty->print(" ->%d@%d%%", blk->_pre_order, (int)(prob*100)); | |
2013 } | |
2014 tty->print("\n"); | |
2015 } | |
2016 #endif |