Mercurial > hg > truffle
annotate src/share/vm/opto/gcm.cpp @ 17716:cdb71841f4bc
6498581: ThreadInterruptTest3 produces wrong output on Windows
Summary: There is race condition between os::interrupt and os::is_interrupted on Windows. In JVM_Sleep(Thread.sleep), check if thread gets interrupted, it may see interrupted but not really interrupted so cause spurious waking up (early return from sleep). Fix by checking if interrupt event really gets set thus prevent false return. For intrinsic of _isInterrupted, on Windows, go fastpath only on bit not set.
Reviewed-by: acorn, kvn
Contributed-by: david.holmes@oracle.com, yumin.qi@oracle.com
author | minqi |
---|---|
date | Wed, 26 Feb 2014 15:20:41 -0800 |
parents | 55fb97c4c58d |
children | abec000618bf |
rev | line source |
---|---|
0 | 1 /* |
17467
55fb97c4c58d
8029233: Update copyright year to match last edit in jdk8 hotspot repository for 2013
mikael
parents:
12171
diff
changeset
|
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
833
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
833
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
833
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "libadt/vectset.hpp" | |
27 #include "memory/allocation.inline.hpp" | |
28 #include "opto/block.hpp" | |
29 #include "opto/c2compiler.hpp" | |
30 #include "opto/callnode.hpp" | |
31 #include "opto/cfgnode.hpp" | |
32 #include "opto/machnode.hpp" | |
33 #include "opto/opcodes.hpp" | |
34 #include "opto/phaseX.hpp" | |
35 #include "opto/rootnode.hpp" | |
36 #include "opto/runtime.hpp" | |
37 #include "runtime/deoptimization.hpp" | |
38 #ifdef TARGET_ARCH_MODEL_x86_32 | |
39 # include "adfiles/ad_x86_32.hpp" | |
40 #endif | |
41 #ifdef TARGET_ARCH_MODEL_x86_64 | |
42 # include "adfiles/ad_x86_64.hpp" | |
43 #endif | |
44 #ifdef TARGET_ARCH_MODEL_sparc | |
45 # include "adfiles/ad_sparc.hpp" | |
46 #endif | |
47 #ifdef TARGET_ARCH_MODEL_zero | |
48 # include "adfiles/ad_zero.hpp" | |
49 #endif | |
2192
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
2008
diff
changeset
|
50 #ifdef TARGET_ARCH_MODEL_arm |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
2008
diff
changeset
|
51 # include "adfiles/ad_arm.hpp" |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
2008
diff
changeset
|
52 #endif |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
2008
diff
changeset
|
53 #ifdef TARGET_ARCH_MODEL_ppc |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
2008
diff
changeset
|
54 # include "adfiles/ad_ppc.hpp" |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
2008
diff
changeset
|
55 #endif |
1972 | 56 |
0 | 57 // Portions of code courtesy of Clifford Click |
58 | |
59 // Optimization - Graph Style | |
60 | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
61 // To avoid float value underflow |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
62 #define MIN_BLOCK_FREQUENCY 1.e-35f |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
63 |
0 | 64 //----------------------------schedule_node_into_block------------------------- |
65 // Insert node n into block b. Look for projections of n and make sure they | |
66 // are in b also. | |
67 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) { | |
68 // Set basic block of n, Add n to b, | |
12023 | 69 map_node_to_block(n, b); |
0 | 70 b->add_inst(n); |
71 | |
72 // After Matching, nearly any old Node may have projections trailing it. | |
73 // These are usually machine-dependent flags. In any case, they might | |
74 // float to another block below this one. Move them up. | |
75 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |
76 Node* use = n->fast_out(i); | |
77 if (use->is_Proj()) { | |
12023 | 78 Block* buse = get_block_for_node(use); |
0 | 79 if (buse != b) { // In wrong block? |
12023 | 80 if (buse != NULL) { |
0 | 81 buse->find_remove(use); // Remove from wrong block |
12023 | 82 } |
83 map_node_to_block(use, b); | |
0 | 84 b->add_inst(use); |
85 } | |
86 } | |
87 } | |
88 } | |
89 | |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
90 //----------------------------replace_block_proj_ctrl------------------------- |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
91 // Nodes that have is_block_proj() nodes as their control need to use |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
92 // the appropriate Region for their actual block as their control since |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
93 // the projection will be in a predecessor block. |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
94 void PhaseCFG::replace_block_proj_ctrl( Node *n ) { |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
95 const Node *in0 = n->in(0); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
96 assert(in0 != NULL, "Only control-dependent"); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
97 const Node *p = in0->is_block_proj(); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
98 if (p != NULL && p != n) { // Control from a block projection? |
4115 | 99 assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here"); |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
100 // Find trailing Region |
12023 | 101 Block *pb = get_block_for_node(in0); // Block-projection already has basic block |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
102 uint j = 0; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
103 if (pb->_num_succs != 1) { // More then 1 successor? |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
104 // Search for successor |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
105 uint max = pb->number_of_nodes(); |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
106 assert( max > 1, "" ); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
107 uint start = max - pb->_num_succs; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
108 // Find which output path belongs to projection |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
109 for (j = start; j < max; j++) { |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
110 if( pb->get_node(j) == in0 ) |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
111 break; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
112 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
113 assert( j < max, "must find" ); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
114 // Change control to match head of successor basic block |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
115 j -= start; |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
116 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
117 n->set_req(0, pb->_succs[j]->head()); |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
118 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
119 } |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
120 |
0 | 121 |
122 //------------------------------schedule_pinned_nodes-------------------------- | |
123 // Set the basic block for Nodes pinned into blocks | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
124 void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) { |
0 | 125 // Allocate node stack of size C->unique()+8 to avoid frequent realloc |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
126 GrowableArray <Node *> spstack(C->unique() + 8); |
0 | 127 spstack.push(_root); |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
128 while (spstack.is_nonempty()) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
129 Node* node = spstack.pop(); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
130 if (!visited.test_set(node->_idx)) { // Test node and flag it as visited |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
131 if (node->pinned() && !has_block(node)) { // Pinned? Nail it down! |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
132 assert(node->in(0), "pinned Node must have Control"); |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
133 // Before setting block replace block_proj control edge |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
134 replace_block_proj_ctrl(node); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
135 Node* input = node->in(0); |
12023 | 136 while (!input->is_block_start()) { |
0 | 137 input = input->in(0); |
12023 | 138 } |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
139 Block* block = get_block_for_node(input); // Basic block of controlling input |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
140 schedule_node_into_block(node, block); |
0 | 141 } |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
142 |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
143 // process all inputs that are non NULL |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
144 for (int i = node->req() - 1; i >= 0; --i) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
145 if (node->in(i) != NULL) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
146 spstack.push(node->in(i)); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
147 } |
0 | 148 } |
149 } | |
150 } | |
151 } | |
152 | |
153 #ifdef ASSERT | |
154 // Assert that new input b2 is dominated by all previous inputs. | |
155 // Check this by by seeing that it is dominated by b1, the deepest | |
156 // input observed until b2. | |
12023 | 157 static void assert_dom(Block* b1, Block* b2, Node* n, const PhaseCFG* cfg) { |
0 | 158 if (b1 == NULL) return; |
159 assert(b1->_dom_depth < b2->_dom_depth, "sanity"); | |
160 Block* tmp = b2; | |
161 while (tmp != b1 && tmp != NULL) { | |
162 tmp = tmp->_idom; | |
163 } | |
164 if (tmp != b1) { | |
165 // Detected an unschedulable graph. Print some nice stuff and die. | |
166 tty->print_cr("!!! Unschedulable graph !!!"); | |
167 for (uint j=0; j<n->len(); j++) { // For all inputs | |
168 Node* inn = n->in(j); // Get input | |
169 if (inn == NULL) continue; // Ignore NULL, missing inputs | |
12023 | 170 Block* inb = cfg->get_block_for_node(inn); |
0 | 171 tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order, |
172 inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth); | |
173 inn->dump(); | |
174 } | |
175 tty->print("Failing node: "); | |
176 n->dump(); | |
177 assert(false, "unscheduable graph"); | |
178 } | |
179 } | |
180 #endif | |
181 | |
12023 | 182 static Block* find_deepest_input(Node* n, const PhaseCFG* cfg) { |
0 | 183 // Find the last input dominated by all other inputs. |
184 Block* deepb = NULL; // Deepest block so far | |
185 int deepb_dom_depth = 0; | |
186 for (uint k = 0; k < n->len(); k++) { // For all inputs | |
187 Node* inn = n->in(k); // Get input | |
188 if (inn == NULL) continue; // Ignore NULL, missing inputs | |
12023 | 189 Block* inb = cfg->get_block_for_node(inn); |
0 | 190 assert(inb != NULL, "must already have scheduled this input"); |
191 if (deepb_dom_depth < (int) inb->_dom_depth) { | |
192 // The new inb must be dominated by the previous deepb. | |
193 // The various inputs must be linearly ordered in the dom | |
194 // tree, or else there will not be a unique deepest block. | |
12023 | 195 DEBUG_ONLY(assert_dom(deepb, inb, n, cfg)); |
0 | 196 deepb = inb; // Save deepest block |
197 deepb_dom_depth = deepb->_dom_depth; | |
198 } | |
199 } | |
200 assert(deepb != NULL, "must be at least one input to n"); | |
201 return deepb; | |
202 } | |
203 | |
204 | |
205 //------------------------------schedule_early--------------------------------- | |
206 // Find the earliest Block any instruction can be placed in. Some instructions | |
207 // are pinned into Blocks. Unpinned instructions can appear in last block in | |
208 // which all their inputs occur. | |
209 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { | |
210 // Allocate stack with enough space to avoid frequent realloc | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
211 Node_Stack nstack(roots.Size() + 8); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
212 // _root will be processed among C->top() inputs |
0 | 213 roots.push(C->top()); |
214 visited.set(C->top()->_idx); | |
215 | |
216 while (roots.size() != 0) { | |
217 // Use local variables nstack_top_n & nstack_top_i to cache values | |
218 // on stack's top. | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
219 Node* parent_node = roots.pop(); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
220 uint input_index = 0; |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
221 |
0 | 222 while (true) { |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
223 if (input_index == 0) { |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
224 // Fixup some control. Constants without control get attached |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
225 // to root and nodes that use is_block_proj() nodes should be attached |
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
226 // to the region that starts their block. |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
227 const Node* control_input = parent_node->in(0); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
228 if (control_input != NULL) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
229 replace_block_proj_ctrl(parent_node); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
230 } else { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
231 // Is a constant with NO inputs? |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
232 if (parent_node->req() == 1) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
233 parent_node->set_req(0, _root); |
0 | 234 } |
235 } | |
236 } | |
237 | |
238 // First, visit all inputs and force them to get a block. If an | |
239 // input is already in a block we quit following inputs (to avoid | |
240 // cycles). Instead we put that Node on a worklist to be handled | |
241 // later (since IT'S inputs may not have a block yet). | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
242 |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
243 // Assume all n's inputs will be processed |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
244 bool done = true; |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
245 |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
246 while (input_index < parent_node->len()) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
247 Node* in = parent_node->in(input_index++); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
248 if (in == NULL) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
249 continue; |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
250 } |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
251 |
0 | 252 int is_visited = visited.test_set(in->_idx); |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
253 if (!has_block(in)) { |
0 | 254 if (is_visited) { |
255 return false; | |
256 } | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
257 // Save parent node and next input's index. |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
258 nstack.push(parent_node, input_index); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
259 // Process current input now. |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
260 parent_node = in; |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
261 input_index = 0; |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
262 // Not all n's inputs processed. |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
263 done = false; |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
264 break; |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
265 } else if (!is_visited) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
266 // Visit this guy later, using worklist |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
267 roots.push(in); |
0 | 268 } |
269 } | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
270 |
0 | 271 if (done) { |
272 // All of n's inputs have been processed, complete post-processing. | |
273 | |
274 // Some instructions are pinned into a block. These include Region, | |
275 // Phi, Start, Return, and other control-dependent instructions and | |
276 // any projections which depend on them. | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
277 if (!parent_node->pinned()) { |
0 | 278 // Set earliest legal block. |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
279 Block* earliest_block = find_deepest_input(parent_node, this); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
280 map_node_to_block(parent_node, earliest_block); |
601
523ded093c31
6809798: SafePointScalarObject node placed into incorrect block during GCM
kvn
parents:
552
diff
changeset
|
281 } else { |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
282 assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge"); |
0 | 283 } |
284 | |
285 if (nstack.is_empty()) { | |
286 // Finished all nodes on stack. | |
287 // Process next node on the worklist 'roots'. | |
288 break; | |
289 } | |
290 // Get saved parent node and next input's index. | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
291 parent_node = nstack.node(); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
292 input_index = nstack.index(); |
0 | 293 nstack.pop(); |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
294 } |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
295 } |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
296 } |
0 | 297 return true; |
298 } | |
299 | |
300 //------------------------------dom_lca---------------------------------------- | |
301 // Find least common ancestor in dominator tree | |
302 // LCA is a current notion of LCA, to be raised above 'this'. | |
303 // As a convenient boundary condition, return 'this' if LCA is NULL. | |
304 // Find the LCA of those two nodes. | |
305 Block* Block::dom_lca(Block* LCA) { | |
306 if (LCA == NULL || LCA == this) return this; | |
307 | |
308 Block* anc = this; | |
309 while (anc->_dom_depth > LCA->_dom_depth) | |
310 anc = anc->_idom; // Walk up till anc is as high as LCA | |
311 | |
312 while (LCA->_dom_depth > anc->_dom_depth) | |
313 LCA = LCA->_idom; // Walk up till LCA is as high as anc | |
314 | |
315 while (LCA != anc) { // Walk both up till they are the same | |
316 LCA = LCA->_idom; | |
317 anc = anc->_idom; | |
318 } | |
319 | |
320 return LCA; | |
321 } | |
322 | |
323 //--------------------------raise_LCA_above_use-------------------------------- | |
324 // We are placing a definition, and have been given a def->use edge. | |
325 // The definition must dominate the use, so move the LCA upward in the | |
326 // dominator tree to dominate the use. If the use is a phi, adjust | |
327 // the LCA only with the phi input paths which actually use this def. | |
12023 | 328 static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, const PhaseCFG* cfg) { |
329 Block* buse = cfg->get_block_for_node(use); | |
0 | 330 if (buse == NULL) return LCA; // Unused killing Projs have no use block |
331 if (!use->is_Phi()) return buse->dom_lca(LCA); | |
332 uint pmax = use->req(); // Number of Phi inputs | |
333 // Why does not this loop just break after finding the matching input to | |
334 // the Phi? Well...it's like this. I do not have true def-use/use-def | |
335 // chains. Means I cannot distinguish, from the def-use direction, which | |
336 // of many use-defs lead from the same use to the same def. That is, this | |
337 // Phi might have several uses of the same def. Each use appears in a | |
338 // different predecessor block. But when I enter here, I cannot distinguish | |
339 // which use-def edge I should find the predecessor block for. So I find | |
340 // them all. Means I do a little extra work if a Phi uses the same value | |
341 // more than once. | |
342 for (uint j=1; j<pmax; j++) { // For all inputs | |
343 if (use->in(j) == def) { // Found matching input? | |
12023 | 344 Block* pred = cfg->get_block_for_node(buse->pred(j)); |
0 | 345 LCA = pred->dom_lca(LCA); |
346 } | |
347 } | |
348 return LCA; | |
349 } | |
350 | |
351 //----------------------------raise_LCA_above_marks---------------------------- | |
352 // Return a new LCA that dominates LCA and any of its marked predecessors. | |
353 // Search all my parents up to 'early' (exclusive), looking for predecessors | |
354 // which are marked with the given index. Return the LCA (in the dom tree) | |
355 // of all marked blocks. If there are none marked, return the original | |
356 // LCA. | |
12023 | 357 static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, Block* early, const PhaseCFG* cfg) { |
0 | 358 Block_List worklist; |
359 worklist.push(LCA); | |
360 while (worklist.size() > 0) { | |
361 Block* mid = worklist.pop(); | |
362 if (mid == early) continue; // stop searching here | |
363 | |
364 // Test and set the visited bit. | |
365 if (mid->raise_LCA_visited() == mark) continue; // already visited | |
366 | |
367 // Don't process the current LCA, otherwise the search may terminate early | |
368 if (mid != LCA && mid->raise_LCA_mark() == mark) { | |
369 // Raise the LCA. | |
370 LCA = mid->dom_lca(LCA); | |
371 if (LCA == early) break; // stop searching everywhere | |
372 assert(early->dominates(LCA), "early is high enough"); | |
373 // Resume searching at that point, skipping intermediate levels. | |
374 worklist.push(LCA); | |
215
273eaa04d9a1
6714694: assertion in 64bit server vm (store->find_edge(load) != -1,"missing precedence edge") with COOPs
kvn
parents:
31
diff
changeset
|
375 if (LCA == mid) |
273eaa04d9a1
6714694: assertion in 64bit server vm (store->find_edge(load) != -1,"missing precedence edge") with COOPs
kvn
parents:
31
diff
changeset
|
376 continue; // Don't mark as visited to avoid early termination. |
0 | 377 } else { |
378 // Keep searching through this block's predecessors. | |
379 for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) { | |
12023 | 380 Block* mid_parent = cfg->get_block_for_node(mid->pred(j)); |
0 | 381 worklist.push(mid_parent); |
382 } | |
383 } | |
215
273eaa04d9a1
6714694: assertion in 64bit server vm (store->find_edge(load) != -1,"missing precedence edge") with COOPs
kvn
parents:
31
diff
changeset
|
384 mid->set_raise_LCA_visited(mark); |
0 | 385 } |
386 return LCA; | |
387 } | |
388 | |
389 //--------------------------memory_early_block-------------------------------- | |
390 // This is a variation of find_deepest_input, the heart of schedule_early. | |
391 // Find the "early" block for a load, if we considered only memory and | |
392 // address inputs, that is, if other data inputs were ignored. | |
393 // | |
394 // Because a subset of edges are considered, the resulting block will | |
395 // be earlier (at a shallower dom_depth) than the true schedule_early | |
396 // point of the node. We compute this earlier block as a more permissive | |
397 // site for anti-dependency insertion, but only if subsume_loads is enabled. | |
12023 | 398 static Block* memory_early_block(Node* load, Block* early, const PhaseCFG* cfg) { |
0 | 399 Node* base; |
400 Node* index; | |
401 Node* store = load->in(MemNode::Memory); | |
402 load->as_Mach()->memory_inputs(base, index); | |
403 | |
404 assert(base != NodeSentinel && index != NodeSentinel, | |
405 "unexpected base/index inputs"); | |
406 | |
407 Node* mem_inputs[4]; | |
408 int mem_inputs_length = 0; | |
409 if (base != NULL) mem_inputs[mem_inputs_length++] = base; | |
410 if (index != NULL) mem_inputs[mem_inputs_length++] = index; | |
411 if (store != NULL) mem_inputs[mem_inputs_length++] = store; | |
412 | |
413 // In the comparision below, add one to account for the control input, | |
414 // which may be null, but always takes up a spot in the in array. | |
415 if (mem_inputs_length + 1 < (int) load->req()) { | |
416 // This "load" has more inputs than just the memory, base and index inputs. | |
417 // For purposes of checking anti-dependences, we need to start | |
418 // from the early block of only the address portion of the instruction, | |
419 // and ignore other blocks that may have factored into the wider | |
420 // schedule_early calculation. | |
421 if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0); | |
422 | |
423 Block* deepb = NULL; // Deepest block so far | |
424 int deepb_dom_depth = 0; | |
425 for (int i = 0; i < mem_inputs_length; i++) { | |
12023 | 426 Block* inb = cfg->get_block_for_node(mem_inputs[i]); |
0 | 427 if (deepb_dom_depth < (int) inb->_dom_depth) { |
428 // The new inb must be dominated by the previous deepb. | |
429 // The various inputs must be linearly ordered in the dom | |
430 // tree, or else there will not be a unique deepest block. | |
12023 | 431 DEBUG_ONLY(assert_dom(deepb, inb, load, cfg)); |
0 | 432 deepb = inb; // Save deepest block |
433 deepb_dom_depth = deepb->_dom_depth; | |
434 } | |
435 } | |
436 early = deepb; | |
437 } | |
438 | |
439 return early; | |
440 } | |
441 | |
442 //--------------------------insert_anti_dependences--------------------------- | |
443 // A load may need to witness memory that nearby stores can overwrite. | |
444 // For each nearby store, either insert an "anti-dependence" edge | |
445 // from the load to the store, or else move LCA upward to force the | |
446 // load to (eventually) be scheduled in a block above the store. | |
447 // | |
448 // Do not add edges to stores on distinct control-flow paths; | |
449 // only add edges to stores which might interfere. | |
450 // | |
451 // Return the (updated) LCA. There will not be any possibly interfering | |
452 // store between the load's "early block" and the updated LCA. | |
453 // Any stores in the updated LCA will have new precedence edges | |
454 // back to the load. The caller is expected to schedule the load | |
455 // in the LCA, in which case the precedence edges will make LCM | |
456 // preserve anti-dependences. The caller may also hoist the load | |
457 // above the LCA, if it is not the early block. | |
458 Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) { | |
459 assert(load->needs_anti_dependence_check(), "must be a load of some sort"); | |
460 assert(LCA != NULL, ""); | |
461 DEBUG_ONLY(Block* LCA_orig = LCA); | |
462 | |
463 // Compute the alias index. Loads and stores with different alias indices | |
464 // do not need anti-dependence edges. | |
465 uint load_alias_idx = C->get_alias_index(load->adr_type()); | |
466 #ifdef ASSERT | |
467 if (load_alias_idx == Compile::AliasIdxBot && C->AliasLevel() > 0 && | |
468 (PrintOpto || VerifyAliases || | |
469 PrintMiscellaneous && (WizardMode || Verbose))) { | |
470 // Load nodes should not consume all of memory. | |
471 // Reporting a bottom type indicates a bug in adlc. | |
472 // If some particular type of node validly consumes all of memory, | |
473 // sharpen the preceding "if" to exclude it, so we can catch bugs here. | |
474 tty->print_cr("*** Possible Anti-Dependence Bug: Load consumes all of memory."); | |
475 load->dump(2); | |
476 if (VerifyAliases) assert(load_alias_idx != Compile::AliasIdxBot, ""); | |
477 } | |
478 #endif | |
479 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp), | |
480 "String compare is only known 'load' that does not conflict with any stores"); | |
681 | 481 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrEquals), |
482 "String equals is a 'load' that does not conflict with any stores"); | |
483 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOf), | |
484 "String indexOf is a 'load' that does not conflict with any stores"); | |
485 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_AryEq), | |
486 "Arrays equals is a 'load' that do not conflict with any stores"); | |
0 | 487 |
488 if (!C->alias_type(load_alias_idx)->is_rewritable()) { | |
489 // It is impossible to spoil this load by putting stores before it, | |
490 // because we know that the stores will never update the value | |
491 // which 'load' must witness. | |
492 return LCA; | |
493 } | |
494 | |
495 node_idx_t load_index = load->_idx; | |
496 | |
497 // Note the earliest legal placement of 'load', as determined by | |
498 // by the unique point in the dom tree where all memory effects | |
499 // and other inputs are first available. (Computed by schedule_early.) | |
500 // For normal loads, 'early' is the shallowest place (dom graph wise) | |
501 // to look for anti-deps between this load and any store. | |
12023 | 502 Block* early = get_block_for_node(load); |
0 | 503 |
504 // If we are subsuming loads, compute an "early" block that only considers | |
505 // memory or address inputs. This block may be different than the | |
506 // schedule_early block in that it could be at an even shallower depth in the | |
507 // dominator tree, and allow for a broader discovery of anti-dependences. | |
508 if (C->subsume_loads()) { | |
12023 | 509 early = memory_early_block(load, early, this); |
0 | 510 } |
511 | |
512 ResourceArea *area = Thread::current()->resource_area(); | |
513 Node_List worklist_mem(area); // prior memory state to store | |
514 Node_List worklist_store(area); // possible-def to explore | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
515 Node_List worklist_visited(area); // visited mergemem nodes |
0 | 516 Node_List non_early_stores(area); // all relevant stores outside of early |
517 bool must_raise_LCA = false; | |
518 | |
519 #ifdef TRACK_PHI_INPUTS | |
520 // %%% This extra checking fails because MergeMem nodes are not GVNed. | |
521 // Provide "phi_inputs" to check if every input to a PhiNode is from the | |
522 // original memory state. This indicates a PhiNode for which should not | |
523 // prevent the load from sinking. For such a block, set_raise_LCA_mark | |
524 // may be overly conservative. | |
525 // Mechanism: count inputs seen for each Phi encountered in worklist_store. | |
526 DEBUG_ONLY(GrowableArray<uint> phi_inputs(area, C->unique(),0,0)); | |
527 #endif | |
528 | |
529 // 'load' uses some memory state; look for users of the same state. | |
530 // Recurse through MergeMem nodes to the stores that use them. | |
531 | |
532 // Each of these stores is a possible definition of memory | |
533 // that 'load' needs to use. We need to force 'load' | |
534 // to occur before each such store. When the store is in | |
535 // the same block as 'load', we insert an anti-dependence | |
536 // edge load->store. | |
537 | |
538 // The relevant stores "nearby" the load consist of a tree rooted | |
539 // at initial_mem, with internal nodes of type MergeMem. | |
540 // Therefore, the branches visited by the worklist are of this form: | |
541 // initial_mem -> (MergeMem ->)* store | |
542 // The anti-dependence constraints apply only to the fringe of this tree. | |
543 | |
544 Node* initial_mem = load->in(MemNode::Memory); | |
545 worklist_store.push(initial_mem); | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
546 worklist_visited.push(initial_mem); |
0 | 547 worklist_mem.push(NULL); |
548 while (worklist_store.size() > 0) { | |
549 // Examine a nearby store to see if it might interfere with our load. | |
550 Node* mem = worklist_mem.pop(); | |
551 Node* store = worklist_store.pop(); | |
552 uint op = store->Opcode(); | |
553 | |
554 // MergeMems do not directly have anti-deps. | |
555 // Treat them as internal nodes in a forward tree of memory states, | |
556 // the leaves of which are each a 'possible-def'. | |
557 if (store == initial_mem // root (exclusive) of tree we are searching | |
558 || op == Op_MergeMem // internal node of tree we are searching | |
559 ) { | |
560 mem = store; // It's not a possibly interfering store. | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
561 if (store == initial_mem) |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
562 initial_mem = NULL; // only process initial memory once |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
563 |
0 | 564 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { |
565 store = mem->fast_out(i); | |
566 if (store->is_MergeMem()) { | |
567 // Be sure we don't get into combinatorial problems. | |
568 // (Allow phis to be repeated; they can merge two relevant states.) | |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
569 uint j = worklist_visited.size(); |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
570 for (; j > 0; j--) { |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
571 if (worklist_visited.at(j-1) == store) break; |
0 | 572 } |
31
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
573 if (j > 0) continue; // already on work list; do not repeat |
6152cbb08ce9
6590177: jck60019 test assert(!repeated,"do not walk merges twice")
kvn
parents:
0
diff
changeset
|
574 worklist_visited.push(store); |
0 | 575 } |
576 worklist_mem.push(mem); | |
577 worklist_store.push(store); | |
578 } | |
579 continue; | |
580 } | |
581 | |
582 if (op == Op_MachProj || op == Op_Catch) continue; | |
583 if (store->needs_anti_dependence_check()) continue; // not really a store | |
584 | |
585 // Compute the alias index. Loads and stores with different alias | |
586 // indices do not need anti-dependence edges. Wide MemBar's are | |
587 // anti-dependent on everything (except immutable memories). | |
588 const TypePtr* adr_type = store->adr_type(); | |
589 if (!C->can_alias(adr_type, load_alias_idx)) continue; | |
590 | |
591 // Most slow-path runtime calls do NOT modify Java memory, but | |
592 // they can block and so write Raw memory. | |
593 if (store->is_Mach()) { | |
594 MachNode* mstore = store->as_Mach(); | |
595 if (load_alias_idx != Compile::AliasIdxRaw) { | |
596 // Check for call into the runtime using the Java calling | |
597 // convention (and from there into a wrapper); it has no | |
598 // _method. Can't do this optimization for Native calls because | |
599 // they CAN write to Java memory. | |
600 if (mstore->ideal_Opcode() == Op_CallStaticJava) { | |
601 assert(mstore->is_MachSafePoint(), ""); | |
602 MachSafePointNode* ms = (MachSafePointNode*) mstore; | |
603 assert(ms->is_MachCallJava(), ""); | |
604 MachCallJavaNode* mcj = (MachCallJavaNode*) ms; | |
605 if (mcj->_method == NULL) { | |
606 // These runtime calls do not write to Java visible memory | |
607 // (other than Raw) and so do not require anti-dependence edges. | |
608 continue; | |
609 } | |
610 } | |
611 // Same for SafePoints: they read/write Raw but only read otherwise. | |
612 // This is basically a workaround for SafePoints only defining control | |
613 // instead of control + memory. | |
614 if (mstore->ideal_Opcode() == Op_SafePoint) | |
615 continue; | |
616 } else { | |
617 // Some raw memory, such as the load of "top" at an allocation, | |
618 // can be control dependent on the previous safepoint. See | |
619 // comments in GraphKit::allocate_heap() about control input. | |
620 // Inserting an anti-dep between such a safepoint and a use | |
621 // creates a cycle, and will cause a subsequent failure in | |
622 // local scheduling. (BugId 4919904) | |
623 // (%%% How can a control input be a safepoint and not a projection??) | |
624 if (mstore->ideal_Opcode() == Op_SafePoint && load->in(0) == mstore) | |
625 continue; | |
626 } | |
627 } | |
628 | |
629 // Identify a block that the current load must be above, | |
630 // or else observe that 'store' is all the way up in the | |
631 // earliest legal block for 'load'. In the latter case, | |
632 // immediately insert an anti-dependence edge. | |
12023 | 633 Block* store_block = get_block_for_node(store); |
0 | 634 assert(store_block != NULL, "unused killing projections skipped above"); |
635 | |
636 if (store->is_Phi()) { | |
637 // 'load' uses memory which is one (or more) of the Phi's inputs. | |
638 // It must be scheduled not before the Phi, but rather before | |
639 // each of the relevant Phi inputs. | |
640 // | |
641 // Instead of finding the LCA of all inputs to a Phi that match 'mem', | |
642 // we mark each corresponding predecessor block and do a combined | |
643 // hoisting operation later (raise_LCA_above_marks). | |
644 // | |
645 // Do not assert(store_block != early, "Phi merging memory after access") | |
646 // PhiNode may be at start of block 'early' with backedge to 'early' | |
647 DEBUG_ONLY(bool found_match = false); | |
648 for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) { | |
649 if (store->in(j) == mem) { // Found matching input? | |
650 DEBUG_ONLY(found_match = true); | |
12023 | 651 Block* pred_block = get_block_for_node(store_block->pred(j)); |
0 | 652 if (pred_block != early) { |
653 // If any predecessor of the Phi matches the load's "early block", | |
654 // we do not need a precedence edge between the Phi and 'load' | |
605 | 655 // since the load will be forced into a block preceding the Phi. |
0 | 656 pred_block->set_raise_LCA_mark(load_index); |
657 assert(!LCA_orig->dominates(pred_block) || | |
658 early->dominates(pred_block), "early is high enough"); | |
659 must_raise_LCA = true; | |
788 | 660 } else { |
661 // anti-dependent upon PHI pinned below 'early', no edge needed | |
662 LCA = early; // but can not schedule below 'early' | |
0 | 663 } |
664 } | |
665 } | |
666 assert(found_match, "no worklist bug"); | |
667 #ifdef TRACK_PHI_INPUTS | |
668 #ifdef ASSERT | |
669 // This assert asks about correct handling of PhiNodes, which may not | |
670 // have all input edges directly from 'mem'. See BugId 4621264 | |
671 int num_mem_inputs = phi_inputs.at_grow(store->_idx,0) + 1; | |
672 // Increment by exactly one even if there are multiple copies of 'mem' | |
673 // coming into the phi, because we will run this block several times | |
674 // if there are several copies of 'mem'. (That's how DU iterators work.) | |
675 phi_inputs.at_put(store->_idx, num_mem_inputs); | |
676 assert(PhiNode::Input + num_mem_inputs < store->req(), | |
677 "Expect at least one phi input will not be from original memory state"); | |
678 #endif //ASSERT | |
679 #endif //TRACK_PHI_INPUTS | |
680 } else if (store_block != early) { | |
681 // 'store' is between the current LCA and earliest possible block. | |
682 // Label its block, and decide later on how to raise the LCA | |
683 // to include the effect on LCA of this store. | |
684 // If this store's block gets chosen as the raised LCA, we | |
685 // will find him on the non_early_stores list and stick him | |
686 // with a precedence edge. | |
687 // (But, don't bother if LCA is already raised all the way.) | |
688 if (LCA != early) { | |
689 store_block->set_raise_LCA_mark(load_index); | |
690 must_raise_LCA = true; | |
691 non_early_stores.push(store); | |
692 } | |
693 } else { | |
694 // Found a possibly-interfering store in the load's 'early' block. | |
695 // This means 'load' cannot sink at all in the dominator tree. | |
696 // Add an anti-dep edge, and squeeze 'load' into the highest block. | |
697 assert(store != load->in(0), "dependence cycle found"); | |
698 if (verify) { | |
699 assert(store->find_edge(load) != -1, "missing precedence edge"); | |
700 } else { | |
701 store->add_prec(load); | |
702 } | |
703 LCA = early; | |
704 // This turns off the process of gathering non_early_stores. | |
705 } | |
706 } | |
707 // (Worklist is now empty; all nearby stores have been visited.) | |
708 | |
709 // Finished if 'load' must be scheduled in its 'early' block. | |
710 // If we found any stores there, they have already been given | |
711 // precedence edges. | |
712 if (LCA == early) return LCA; | |
713 | |
714 // We get here only if there are no possibly-interfering stores | |
715 // in the load's 'early' block. Move LCA up above all predecessors | |
716 // which contain stores we have noted. | |
717 // | |
718 // The raised LCA block can be a home to such interfering stores, | |
719 // but its predecessors must not contain any such stores. | |
720 // | |
721 // The raised LCA will be a lower bound for placing the load, | |
722 // preventing the load from sinking past any block containing | |
723 // a store that may invalidate the memory state required by 'load'. | |
724 if (must_raise_LCA) | |
12023 | 725 LCA = raise_LCA_above_marks(LCA, load->_idx, early, this); |
0 | 726 if (LCA == early) return LCA; |
727 | |
728 // Insert anti-dependence edges from 'load' to each store | |
729 // in the non-early LCA block. | |
730 // Mine the non_early_stores list for such stores. | |
731 if (LCA->raise_LCA_mark() == load_index) { | |
732 while (non_early_stores.size() > 0) { | |
733 Node* store = non_early_stores.pop(); | |
12023 | 734 Block* store_block = get_block_for_node(store); |
0 | 735 if (store_block == LCA) { |
736 // add anti_dependence from store to load in its own block | |
737 assert(store != load->in(0), "dependence cycle found"); | |
738 if (verify) { | |
739 assert(store->find_edge(load) != -1, "missing precedence edge"); | |
740 } else { | |
741 store->add_prec(load); | |
742 } | |
743 } else { | |
744 assert(store_block->raise_LCA_mark() == load_index, "block was marked"); | |
745 // Any other stores we found must be either inside the new LCA | |
746 // or else outside the original LCA. In the latter case, they | |
747 // did not interfere with any use of 'load'. | |
748 assert(LCA->dominates(store_block) | |
749 || !LCA_orig->dominates(store_block), "no stray stores"); | |
750 } | |
751 } | |
752 } | |
753 | |
754 // Return the highest block containing stores; any stores | |
755 // within that block have been given anti-dependence edges. | |
756 return LCA; | |
757 } | |
758 | |
759 // This class is used to iterate backwards over the nodes in the graph. | |
760 | |
761 class Node_Backward_Iterator { | |
762 | |
763 private: | |
764 Node_Backward_Iterator(); | |
765 | |
766 public: | |
767 // Constructor for the iterator | |
12023 | 768 Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg); |
0 | 769 |
770 // Postincrement operator to iterate over the nodes | |
771 Node *next(); | |
772 | |
773 private: | |
774 VectorSet &_visited; | |
775 Node_List &_stack; | |
12023 | 776 PhaseCFG &_cfg; |
0 | 777 }; |
778 | |
779 // Constructor for the Node_Backward_Iterator | |
12023 | 780 Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg) |
781 : _visited(visited), _stack(stack), _cfg(cfg) { | |
0 | 782 // The stack should contain exactly the root |
783 stack.clear(); | |
784 stack.push(root); | |
785 | |
786 // Clear the visited bits | |
787 visited.Clear(); | |
788 } | |
789 | |
790 // Iterator for the Node_Backward_Iterator | |
791 Node *Node_Backward_Iterator::next() { | |
792 | |
793 // If the _stack is empty, then just return NULL: finished. | |
794 if ( !_stack.size() ) | |
795 return NULL; | |
796 | |
797 // '_stack' is emulating a real _stack. The 'visit-all-users' loop has been | |
798 // made stateless, so I do not need to record the index 'i' on my _stack. | |
799 // Instead I visit all users each time, scanning for unvisited users. | |
800 // I visit unvisited not-anti-dependence users first, then anti-dependent | |
801 // children next. | |
802 Node *self = _stack.pop(); | |
803 | |
804 // I cycle here when I am entering a deeper level of recursion. | |
805 // The key variable 'self' was set prior to jumping here. | |
806 while( 1 ) { | |
807 | |
808 _visited.set(self->_idx); | |
809 | |
810 // Now schedule all uses as late as possible. | |
12023 | 811 const Node* src = self->is_Proj() ? self->in(0) : self; |
812 uint src_rpo = _cfg.get_block_for_node(src)->_rpo; | |
0 | 813 |
814 // Schedule all nodes in a post-order visit | |
815 Node *unvisited = NULL; // Unvisited anti-dependent Node, if any | |
816 | |
817 // Scan for unvisited nodes | |
818 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { | |
819 // For all uses, schedule late | |
820 Node* n = self->fast_out(i); // Use | |
821 | |
822 // Skip already visited children | |
823 if ( _visited.test(n->_idx) ) | |
824 continue; | |
825 | |
826 // do not traverse backward control edges | |
827 Node *use = n->is_Proj() ? n->in(0) : n; | |
12023 | 828 uint use_rpo = _cfg.get_block_for_node(use)->_rpo; |
0 | 829 |
830 if ( use_rpo < src_rpo ) | |
831 continue; | |
832 | |
833 // Phi nodes always precede uses in a basic block | |
834 if ( use_rpo == src_rpo && use->is_Phi() ) | |
835 continue; | |
836 | |
837 unvisited = n; // Found unvisited | |
838 | |
839 // Check for possible-anti-dependent | |
840 if( !n->needs_anti_dependence_check() ) | |
841 break; // Not visited, not anti-dep; schedule it NOW | |
842 } | |
843 | |
844 // Did I find an unvisited not-anti-dependent Node? | |
845 if ( !unvisited ) | |
846 break; // All done with children; post-visit 'self' | |
847 | |
848 // Visit the unvisited Node. Contains the obvious push to | |
849 // indicate I'm entering a deeper level of recursion. I push the | |
850 // old state onto the _stack and set a new state and loop (recurse). | |
851 _stack.push(self); | |
852 self = unvisited; | |
853 } // End recursion loop | |
854 | |
855 return self; | |
856 } | |
857 | |
858 //------------------------------ComputeLatenciesBackwards---------------------- | |
859 // Compute the latency of all the instructions. | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
860 void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) { |
0 | 861 #ifndef PRODUCT |
862 if (trace_opto_pipelining()) | |
863 tty->print("\n#---- ComputeLatenciesBackwards ----\n"); | |
864 #endif | |
865 | |
12023 | 866 Node_Backward_Iterator iter((Node *)_root, visited, stack, *this); |
0 | 867 Node *n; |
868 | |
869 // Walk over all the nodes from last to first | |
870 while (n = iter.next()) { | |
871 // Set the latency for the definitions of this instruction | |
872 partial_latency_of_defs(n); | |
873 } | |
874 } // end ComputeLatenciesBackwards | |
875 | |
876 //------------------------------partial_latency_of_defs------------------------ | |
877 // Compute the latency impact of this node on all defs. This computes | |
878 // a number that increases as we approach the beginning of the routine. | |
879 void PhaseCFG::partial_latency_of_defs(Node *n) { | |
880 // Set the latency for this instruction | |
881 #ifndef PRODUCT | |
882 if (trace_opto_pipelining()) { | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
883 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); |
0 | 884 dump(); |
885 } | |
886 #endif | |
887 | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
888 if (n->is_Proj()) { |
0 | 889 n = n->in(0); |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
890 } |
0 | 891 |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
892 if (n->is_Root()) { |
0 | 893 return; |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
894 } |
0 | 895 |
896 uint nlen = n->len(); | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
897 uint use_latency = get_latency_for_node(n); |
12023 | 898 uint use_pre_order = get_block_for_node(n)->_pre_order; |
0 | 899 |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
900 for (uint j = 0; j < nlen; j++) { |
0 | 901 Node *def = n->in(j); |
902 | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
903 if (!def || def == n) { |
0 | 904 continue; |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
905 } |
0 | 906 |
907 // Walk backwards thru projections | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
908 if (def->is_Proj()) { |
0 | 909 def = def->in(0); |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
910 } |
0 | 911 |
912 #ifndef PRODUCT | |
913 if (trace_opto_pipelining()) { | |
914 tty->print("# in(%2d): ", j); | |
915 def->dump(); | |
916 } | |
917 #endif | |
918 | |
919 // If the defining block is not known, assume it is ok | |
12023 | 920 Block *def_block = get_block_for_node(def); |
0 | 921 uint def_pre_order = def_block ? def_block->_pre_order : 0; |
922 | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
923 if ((use_pre_order < def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) { |
0 | 924 continue; |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
925 } |
0 | 926 |
927 uint delta_latency = n->latency(j); | |
928 uint current_latency = delta_latency + use_latency; | |
929 | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
930 if (get_latency_for_node(def) < current_latency) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
931 set_latency_for_node(def, current_latency); |
0 | 932 } |
933 | |
934 #ifndef PRODUCT | |
935 if (trace_opto_pipelining()) { | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
936 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def)); |
0 | 937 } |
938 #endif | |
939 } | |
940 } | |
941 | |
942 //------------------------------latency_from_use------------------------------- | |
943 // Compute the latency of a specific use | |
944 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) { | |
945 // If self-reference, return no latency | |
12023 | 946 if (use == n || use->is_Root()) { |
0 | 947 return 0; |
12023 | 948 } |
0 | 949 |
12023 | 950 uint def_pre_order = get_block_for_node(def)->_pre_order; |
0 | 951 uint latency = 0; |
952 | |
953 // If the use is not a projection, then it is simple... | |
954 if (!use->is_Proj()) { | |
955 #ifndef PRODUCT | |
956 if (trace_opto_pipelining()) { | |
957 tty->print("# out(): "); | |
958 use->dump(); | |
959 } | |
960 #endif | |
961 | |
12023 | 962 uint use_pre_order = get_block_for_node(use)->_pre_order; |
0 | 963 |
964 if (use_pre_order < def_pre_order) | |
965 return 0; | |
966 | |
967 if (use_pre_order == def_pre_order && use->is_Phi()) | |
968 return 0; | |
969 | |
970 uint nlen = use->len(); | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
971 uint nl = get_latency_for_node(use); |
0 | 972 |
973 for ( uint j=0; j<nlen; j++ ) { | |
974 if (use->in(j) == n) { | |
975 // Change this if we want local latencies | |
976 uint ul = use->latency(j); | |
977 uint l = ul + nl; | |
978 if (latency < l) latency = l; | |
979 #ifndef PRODUCT | |
980 if (trace_opto_pipelining()) { | |
981 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, latency = %d", | |
982 nl, j, ul, l, latency); | |
983 } | |
984 #endif | |
985 } | |
986 } | |
987 } else { | |
988 // This is a projection, just grab the latency of the use(s) | |
989 for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) { | |
990 uint l = latency_from_use(use, def, use->fast_out(j)); | |
991 if (latency < l) latency = l; | |
992 } | |
993 } | |
994 | |
995 return latency; | |
996 } | |
997 | |
998 //------------------------------latency_from_uses------------------------------ | |
999 // Compute the latency of this instruction relative to all of it's uses. | |
1000 // This computes a number that increases as we approach the beginning of the | |
1001 // routine. | |
1002 void PhaseCFG::latency_from_uses(Node *n) { | |
1003 // Set the latency for this instruction | |
1004 #ifndef PRODUCT | |
1005 if (trace_opto_pipelining()) { | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1006 tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); |
0 | 1007 dump(); |
1008 } | |
1009 #endif | |
1010 uint latency=0; | |
1011 const Node *def = n->is_Proj() ? n->in(0): n; | |
1012 | |
1013 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |
1014 uint l = latency_from_use(n, def, n->fast_out(i)); | |
1015 | |
1016 if (latency < l) latency = l; | |
1017 } | |
1018 | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1019 set_latency_for_node(n, latency); |
0 | 1020 } |
1021 | |
1022 //------------------------------hoist_to_cheaper_block------------------------- | |
1023 // Pick a block for node self, between early and LCA, that is a cheaper | |
1024 // alternative to LCA. | |
1025 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { | |
1026 const double delta = 1+PROB_UNLIKELY_MAG(4); | |
1027 Block* least = LCA; | |
1028 double least_freq = least->_freq; | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1029 uint target = get_latency_for_node(self); |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1030 uint start_latency = get_latency_for_node(LCA->head()); |
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1031 uint end_latency = get_latency_for_node(LCA->get_node(LCA->end_idx())); |
0 | 1032 bool in_latency = (target <= start_latency); |
12023 | 1033 const Block* root_block = get_block_for_node(_root); |
0 | 1034 |
1035 // Turn off latency scheduling if scheduling is just plain off | |
1036 if (!C->do_scheduling()) | |
1037 in_latency = true; | |
1038 | |
1039 // Do not hoist (to cover latency) instructions which target a | |
1040 // single register. Hoisting stretches the live range of the | |
1041 // single register and may force spilling. | |
1042 MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL; | |
1043 if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty()) | |
1044 in_latency = true; | |
1045 | |
1046 #ifndef PRODUCT | |
1047 if (trace_opto_pipelining()) { | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1048 tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self)); |
0 | 1049 self->dump(); |
1050 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | |
1051 LCA->_pre_order, | |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1052 LCA->head()->_idx, |
0 | 1053 start_latency, |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1054 LCA->get_node(LCA->end_idx())->_idx, |
0 | 1055 end_latency, |
1056 least_freq); | |
1057 } | |
1058 #endif | |
1059 | |
8691
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7196
diff
changeset
|
1060 int cand_cnt = 0; // number of candidates tried |
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7196
diff
changeset
|
1061 |
0 | 1062 // Walk up the dominator tree from LCA (Lowest common ancestor) to |
1063 // the earliest legal location. Capture the least execution frequency. | |
1064 while (LCA != early) { | |
1065 LCA = LCA->_idom; // Follow up the dominator tree | |
1066 | |
1067 if (LCA == NULL) { | |
1068 // Bailout without retry | |
1069 C->record_method_not_compilable("late schedule failed: LCA == NULL"); | |
1070 return least; | |
1071 } | |
1072 | |
1073 // Don't hoist machine instructions to the root basic block | |
1074 if (mach && LCA == root_block) | |
1075 break; | |
1076 | |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1077 uint start_lat = get_latency_for_node(LCA->head()); |
0 | 1078 uint end_idx = LCA->end_idx(); |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1079 uint end_lat = get_latency_for_node(LCA->get_node(end_idx)); |
0 | 1080 double LCA_freq = LCA->_freq; |
1081 #ifndef PRODUCT | |
1082 if (trace_opto_pipelining()) { | |
1083 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1084 LCA->_pre_order, LCA->head()->_idx, start_lat, end_idx, end_lat, LCA_freq); |
0 | 1085 } |
1086 #endif | |
8691
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7196
diff
changeset
|
1087 cand_cnt++; |
0 | 1088 if (LCA_freq < least_freq || // Better Frequency |
8691
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7196
diff
changeset
|
1089 (StressGCM && Compile::randomized_select(cand_cnt)) || // Should be randomly accepted in stress mode |
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7196
diff
changeset
|
1090 (!StressGCM && // Otherwise, choose with latency |
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7196
diff
changeset
|
1091 !in_latency && // No block containing latency |
0 | 1092 LCA_freq < least_freq * delta && // No worse frequency |
1093 target >= end_lat && // within latency range | |
1094 !self->is_iteratively_computed() ) // But don't hoist IV increments | |
1095 // because they may end up above other uses of their phi forcing | |
1096 // their result register to be different from their input. | |
1097 ) { | |
1098 least = LCA; // Found cheaper block | |
1099 least_freq = LCA_freq; | |
1100 start_latency = start_lat; | |
1101 end_latency = end_lat; | |
1102 if (target <= start_lat) | |
1103 in_latency = true; | |
1104 } | |
1105 } | |
1106 | |
1107 #ifndef PRODUCT | |
1108 if (trace_opto_pipelining()) { | |
1109 tty->print_cr("# Choose block B%d with start latency=%d and freq=%g", | |
1110 least->_pre_order, start_latency, least_freq); | |
1111 } | |
1112 #endif | |
1113 | |
1114 // See if the latency needs to be updated | |
1115 if (target < end_latency) { | |
1116 #ifndef PRODUCT | |
1117 if (trace_opto_pipelining()) { | |
1118 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); | |
1119 } | |
1120 #endif | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1121 set_latency_for_node(self, end_latency); |
0 | 1122 partial_latency_of_defs(self); |
1123 } | |
1124 | |
1125 return least; | |
1126 } | |
1127 | |
1128 | |
1129 //------------------------------schedule_late----------------------------------- | |
1130 // Now schedule all codes as LATE as possible. This is the LCA in the | |
1131 // dominator tree of all USES of a value. Pick the block with the least | |
1132 // loop nesting depth that is lowest in the dominator tree. | |
1133 extern const char must_clone[]; | |
1134 void PhaseCFG::schedule_late(VectorSet &visited, Node_List &stack) { | |
1135 #ifndef PRODUCT | |
1136 if (trace_opto_pipelining()) | |
1137 tty->print("\n#---- schedule_late ----\n"); | |
1138 #endif | |
1139 | |
12023 | 1140 Node_Backward_Iterator iter((Node *)_root, visited, stack, *this); |
0 | 1141 Node *self; |
1142 | |
1143 // Walk over all the nodes from last to first | |
1144 while (self = iter.next()) { | |
12023 | 1145 Block* early = get_block_for_node(self); // Earliest legal placement |
0 | 1146 |
1147 if (self->is_top()) { | |
1148 // Top node goes in bb #2 with other constants. | |
1149 // It must be special-cased, because it has no out edges. | |
1150 early->add_inst(self); | |
1151 continue; | |
1152 } | |
1153 | |
1154 // No uses, just terminate | |
1155 if (self->outcnt() == 0) { | |
3842 | 1156 assert(self->is_MachProj(), "sanity"); |
0 | 1157 continue; // Must be a dead machine projection |
1158 } | |
1159 | |
1160 // If node is pinned in the block, then no scheduling can be done. | |
1161 if( self->pinned() ) // Pinned in block? | |
1162 continue; | |
1163 | |
1164 MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL; | |
1165 if (mach) { | |
1166 switch (mach->ideal_Opcode()) { | |
1167 case Op_CreateEx: | |
1168 // Don't move exception creation | |
1169 early->add_inst(self); | |
1170 continue; | |
1171 break; | |
1172 case Op_CheckCastPP: | |
1173 // Don't move CheckCastPP nodes away from their input, if the input | |
1174 // is a rawptr (5071820). | |
1175 Node *def = self->in(1); | |
1176 if (def != NULL && def->bottom_type()->base() == Type::RawPtr) { | |
1177 early->add_inst(self); | |
833
acba6af809c8
6840775: Multiple JVM crashes seen with 1.6.0_10 through 1.6.0_14
kvn
parents:
788
diff
changeset
|
1178 #ifdef ASSERT |
acba6af809c8
6840775: Multiple JVM crashes seen with 1.6.0_10 through 1.6.0_14
kvn
parents:
788
diff
changeset
|
1179 _raw_oops.push(def); |
acba6af809c8
6840775: Multiple JVM crashes seen with 1.6.0_10 through 1.6.0_14
kvn
parents:
788
diff
changeset
|
1180 #endif |
0 | 1181 continue; |
1182 } | |
1183 break; | |
1184 } | |
1185 } | |
1186 | |
1187 // Gather LCA of all uses | |
1188 Block *LCA = NULL; | |
1189 { | |
1190 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { | |
1191 // For all uses, find LCA | |
1192 Node* use = self->fast_out(i); | |
12023 | 1193 LCA = raise_LCA_above_use(LCA, use, self, this); |
0 | 1194 } |
1195 } // (Hide defs of imax, i from rest of block.) | |
1196 | |
1197 // Place temps in the block of their use. This isn't a | |
1198 // requirement for correctness but it reduces useless | |
1199 // interference between temps and other nodes. | |
1200 if (mach != NULL && mach->is_MachTemp()) { | |
12023 | 1201 map_node_to_block(self, LCA); |
0 | 1202 LCA->add_inst(self); |
1203 continue; | |
1204 } | |
1205 | |
1206 // Check if 'self' could be anti-dependent on memory | |
1207 if (self->needs_anti_dependence_check()) { | |
1208 // Hoist LCA above possible-defs and insert anti-dependences to | |
1209 // defs in new LCA block. | |
1210 LCA = insert_anti_dependences(LCA, self); | |
1211 } | |
1212 | |
1213 if (early->_dom_depth > LCA->_dom_depth) { | |
1214 // Somehow the LCA has moved above the earliest legal point. | |
1215 // (One way this can happen is via memory_early_block.) | |
1216 if (C->subsume_loads() == true && !C->failing()) { | |
1217 // Retry with subsume_loads == false | |
1218 // If this is the first failure, the sentinel string will "stick" | |
1219 // to the Compile object, and the C2Compiler will see it and retry. | |
1220 C->record_failure(C2Compiler::retry_no_subsuming_loads()); | |
1221 } else { | |
1222 // Bailout without retry when (early->_dom_depth > LCA->_dom_depth) | |
1223 C->record_method_not_compilable("late schedule failed: incorrect graph"); | |
1224 } | |
1225 return; | |
1226 } | |
1227 | |
1228 // If there is no opportunity to hoist, then we're done. | |
8691
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7196
diff
changeset
|
1229 // In stress mode, try to hoist even the single operations. |
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7196
diff
changeset
|
1230 bool try_to_hoist = StressGCM || (LCA != early); |
0 | 1231 |
1232 // Must clone guys stay next to use; no hoisting allowed. | |
1233 // Also cannot hoist guys that alter memory or are otherwise not | |
1234 // allocatable (hoisting can make a value live longer, leading to | |
1235 // anti and output dependency problems which are normally resolved | |
1236 // by the register allocator giving everyone a different register). | |
1237 if (mach != NULL && must_clone[mach->ideal_Opcode()]) | |
1238 try_to_hoist = false; | |
1239 | |
1240 Block* late = NULL; | |
1241 if (try_to_hoist) { | |
1242 // Now find the block with the least execution frequency. | |
1243 // Start at the latest schedule and work up to the earliest schedule | |
1244 // in the dominator tree. Thus the Node will dominate all its uses. | |
1245 late = hoist_to_cheaper_block(LCA, early, self); | |
1246 } else { | |
1247 // Just use the LCA of the uses. | |
1248 late = LCA; | |
1249 } | |
1250 | |
1251 // Put the node into target block | |
1252 schedule_node_into_block(self, late); | |
1253 | |
1254 #ifdef ASSERT | |
1255 if (self->needs_anti_dependence_check()) { | |
1256 // since precedence edges are only inserted when we're sure they | |
1257 // are needed make sure that after placement in a block we don't | |
1258 // need any new precedence edges. | |
1259 verify_anti_dependences(late, self); | |
1260 } | |
1261 #endif | |
1262 } // Loop until all nodes have been visited | |
1263 | |
1264 } // end ScheduleLate | |
1265 | |
1266 //------------------------------GlobalCodeMotion------------------------------- | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1267 void PhaseCFG::global_code_motion() { |
0 | 1268 ResourceMark rm; |
1269 | |
1270 #ifndef PRODUCT | |
1271 if (trace_opto_pipelining()) { | |
1272 tty->print("\n---- Start GlobalCodeMotion ----\n"); | |
1273 } | |
1274 #endif | |
1275 | |
12023 | 1276 // Initialize the node to block mapping for things on the proj_list |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1277 for (uint i = 0; i < _matcher.number_of_projections(); i++) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1278 unmap_node_from_block(_matcher.get_projection(i)); |
12023 | 1279 } |
0 | 1280 |
1281 // Set the basic block for Nodes pinned into blocks | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1282 Arena* arena = Thread::current()->resource_area(); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1283 VectorSet visited(arena); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1284 schedule_pinned_nodes(visited); |
0 | 1285 |
1286 // Find the earliest Block any instruction can be placed in. Some | |
1287 // instructions are pinned into Blocks. Unpinned instructions can | |
1288 // appear in last block in which all their inputs occur. | |
1289 visited.Clear(); | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1290 Node_List stack(arena); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1291 // Pre-grow the list |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1292 stack.map((C->unique() >> 1) + 16, NULL); |
0 | 1293 if (!schedule_early(visited, stack)) { |
1294 // Bailout without retry | |
1295 C->record_method_not_compilable("early schedule failed"); | |
1296 return; | |
1297 } | |
1298 | |
1299 // Build Def-Use edges. | |
1300 // Compute the latency information (via backwards walk) for all the | |
1301 // instructions in the graph | |
1685 | 1302 _node_latency = new GrowableArray<uint>(); // resource_area allocation |
0 | 1303 |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1304 if (C->do_scheduling()) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1305 compute_latencies_backwards(visited, stack); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1306 } |
0 | 1307 |
1308 // Now schedule all codes as LATE as possible. This is the LCA in the | |
1309 // dominator tree of all USES of a value. Pick the block with the least | |
1310 // loop nesting depth that is lowest in the dominator tree. | |
1311 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) | |
1312 schedule_late(visited, stack); | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1313 if (C->failing()) { |
0 | 1314 // schedule_late fails only when graph is incorrect. |
1315 assert(!VerifyGraphEdges, "verification should have failed"); | |
1316 return; | |
1317 } | |
1318 | |
1319 #ifndef PRODUCT | |
1320 if (trace_opto_pipelining()) { | |
1321 tty->print("\n---- Detect implicit null checks ----\n"); | |
1322 } | |
1323 #endif | |
1324 | |
1325 // Detect implicit-null-check opportunities. Basically, find NULL checks | |
1326 // with suitable memory ops nearby. Use the memory op to do the NULL check. | |
1327 // I can generate a memory op if there is not one nearby. | |
1328 if (C->is_method_compilation()) { | |
1329 // Don't do it for natives, adapters, or runtime stubs | |
1330 int allowed_reasons = 0; | |
1331 // ...and don't do it when there have been too many traps, globally. | |
1332 for (int reason = (int)Deoptimization::Reason_none+1; | |
1333 reason < Compile::trapHistLength; reason++) { | |
1334 assert(reason < BitsPerInt, "recode bit map"); | |
1335 if (!C->too_many_traps((Deoptimization::DeoptReason) reason)) | |
1336 allowed_reasons |= nth_bit(reason); | |
1337 } | |
1338 // By reversing the loop direction we get a very minor gain on mpegaudio. | |
1339 // Feel free to revert to a forward loop for clarity. | |
1340 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1341 for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1342 Node* proj = _matcher._null_check_tests[i]; |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1343 Node* val = _matcher._null_check_tests[i + 1]; |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1344 Block* block = get_block_for_node(proj); |
12171
4b078f877b56
8023988: Move local scheduling of nodes to the CFG creation and code motion phase (PhaseCFG)
adlertz
parents:
12167
diff
changeset
|
1345 implicit_null_check(block, proj, val, allowed_reasons); |
0 | 1346 // The implicit_null_check will only perform the transformation |
1347 // if the null branch is truly uncommon, *and* it leads to an | |
1348 // uncommon trap. Combined with the too_many_traps guards | |
1349 // above, this prevents SEGV storms reported in 6366351, | |
1350 // by recompiling offending methods without this optimization. | |
1351 } | |
1352 } | |
1353 | |
1354 #ifndef PRODUCT | |
1355 if (trace_opto_pipelining()) { | |
1356 tty->print("\n---- Start Local Scheduling ----\n"); | |
1357 } | |
1358 #endif | |
1359 | |
1360 // Schedule locally. Right now a simple topological sort. | |
1361 // Later, do a real latency aware scheduler. | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1362 GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1); |
0 | 1363 visited.Clear(); |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1364 for (uint i = 0; i < number_of_blocks(); i++) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1365 Block* block = get_block(i); |
12171
4b078f877b56
8023988: Move local scheduling of nodes to the CFG creation and code motion phase (PhaseCFG)
adlertz
parents:
12167
diff
changeset
|
1366 if (!schedule_local(block, ready_cnt, visited)) { |
0 | 1367 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { |
1368 C->record_method_not_compilable("local schedule failed"); | |
1369 } | |
1370 return; | |
1371 } | |
1372 } | |
1373 | |
1374 // If we inserted any instructions between a Call and his CatchNode, | |
1375 // clone the instructions on all paths below the Catch. | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1376 for (uint i = 0; i < number_of_blocks(); i++) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1377 Block* block = get_block(i); |
12171
4b078f877b56
8023988: Move local scheduling of nodes to the CFG creation and code motion phase (PhaseCFG)
adlertz
parents:
12167
diff
changeset
|
1378 call_catch_cleanup(block); |
12023 | 1379 } |
0 | 1380 |
1381 #ifndef PRODUCT | |
1382 if (trace_opto_pipelining()) { | |
1383 tty->print("\n---- After GlobalCodeMotion ----\n"); | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1384 for (uint i = 0; i < number_of_blocks(); i++) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1385 Block* block = get_block(i); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1386 block->dump(); |
0 | 1387 } |
1388 } | |
1389 #endif | |
1685 | 1390 // Dead. |
1391 _node_latency = (GrowableArray<uint> *)0xdeadbeef; | |
0 | 1392 } |
1393 | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1394 bool PhaseCFG::do_global_code_motion() { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1395 |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1396 build_dominator_tree(); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1397 if (C->failing()) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1398 return false; |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1399 } |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1400 |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1401 NOT_PRODUCT( C->verify_graph_edges(); ) |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1402 |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1403 estimate_block_frequency(); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1404 |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1405 global_code_motion(); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1406 |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1407 if (C->failing()) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1408 return false; |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1409 } |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1410 |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1411 return true; |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1412 } |
0 | 1413 |
1414 //------------------------------Estimate_Block_Frequency----------------------- | |
1415 // Estimate block frequencies based on IfNode probabilities. | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1416 void PhaseCFG::estimate_block_frequency() { |
418 | 1417 |
1418 // Force conditional branches leading to uncommon traps to be unlikely, | |
1419 // not because we get to the uncommon_trap with less relative frequency, | |
1420 // but because an uncommon_trap typically causes a deopt, so we only get | |
1421 // there once. | |
1422 if (C->do_freq_based_layout()) { | |
1423 Block_List worklist; | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1424 Block* root_blk = get_block(0); |
418 | 1425 for (uint i = 1; i < root_blk->num_preds(); i++) { |
12023 | 1426 Block *pb = get_block_for_node(root_blk->pred(i)); |
418 | 1427 if (pb->has_uncommon_code()) { |
1428 worklist.push(pb); | |
1429 } | |
1430 } | |
1431 while (worklist.size() > 0) { | |
1432 Block* uct = worklist.pop(); | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1433 if (uct == get_root_block()) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1434 continue; |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1435 } |
418 | 1436 for (uint i = 1; i < uct->num_preds(); i++) { |
12023 | 1437 Block *pb = get_block_for_node(uct->pred(i)); |
418 | 1438 if (pb->_num_succs == 1) { |
1439 worklist.push(pb); | |
1440 } else if (pb->num_fall_throughs() == 2) { | |
1441 pb->update_uncommon_branch(uct); | |
1442 } | |
1443 } | |
1444 } | |
1445 } | |
0 | 1446 |
1447 // Create the loop tree and calculate loop depth. | |
1448 _root_loop = create_loop_tree(); | |
1449 _root_loop->compute_loop_depth(0); | |
1450 | |
1451 // Compute block frequency of each block, relative to a single loop entry. | |
1452 _root_loop->compute_freq(); | |
1453 | |
1454 // Adjust all frequencies to be relative to a single method entry | |
418 | 1455 _root_loop->_freq = 1.0; |
0 | 1456 _root_loop->scale_freq(); |
1457 | |
673 | 1458 // Save outmost loop frequency for LRG frequency threshold |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1459 _outer_loop_frequency = _root_loop->outer_loop_freq(); |
673 | 1460 |
0 | 1461 // force paths ending at uncommon traps to be infrequent |
418 | 1462 if (!C->do_freq_based_layout()) { |
1463 Block_List worklist; | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1464 Block* root_blk = get_block(0); |
418 | 1465 for (uint i = 1; i < root_blk->num_preds(); i++) { |
12023 | 1466 Block *pb = get_block_for_node(root_blk->pred(i)); |
418 | 1467 if (pb->has_uncommon_code()) { |
1468 worklist.push(pb); | |
1469 } | |
0 | 1470 } |
418 | 1471 while (worklist.size() > 0) { |
1472 Block* uct = worklist.pop(); | |
1473 uct->_freq = PROB_MIN; | |
1474 for (uint i = 1; i < uct->num_preds(); i++) { | |
12023 | 1475 Block *pb = get_block_for_node(uct->pred(i)); |
418 | 1476 if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { |
1477 worklist.push(pb); | |
1478 } | |
0 | 1479 } |
1480 } | |
1481 } | |
1482 | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1483 #ifdef ASSERT |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1484 for (uint i = 0; i < number_of_blocks(); i++) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1485 Block* b = get_block(i); |
605 | 1486 assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1487 } |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1488 #endif |
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1489 |
0 | 1490 #ifndef PRODUCT |
1491 if (PrintCFGBlockFreq) { | |
1492 tty->print_cr("CFG Block Frequencies"); | |
1493 _root_loop->dump_tree(); | |
1494 if (Verbose) { | |
1495 tty->print_cr("PhaseCFG dump"); | |
1496 dump(); | |
1497 tty->print_cr("Node dump"); | |
1498 _root->dump(99999); | |
1499 } | |
1500 } | |
1501 #endif | |
1502 } | |
1503 | |
1504 //----------------------------create_loop_tree-------------------------------- | |
1505 // Create a loop tree from the CFG | |
1506 CFGLoop* PhaseCFG::create_loop_tree() { | |
1507 | |
1508 #ifdef ASSERT | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1509 assert(get_block(0) == get_root_block(), "first block should be root block"); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1510 for (uint i = 0; i < number_of_blocks(); i++) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1511 Block* block = get_block(i); |
0 | 1512 // Check that _loop field are clear...we could clear them if not. |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1513 assert(block->_loop == NULL, "clear _loop expected"); |
0 | 1514 // Sanity check that the RPO numbering is reflected in the _blocks array. |
1515 // It doesn't have to be for the loop tree to be built, but if it is not, | |
1516 // then the blocks have been reordered since dom graph building...which | |
1517 // may question the RPO numbering | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1518 assert(block->_rpo == i, "unexpected reverse post order number"); |
0 | 1519 } |
1520 #endif | |
1521 | |
1522 int idct = 0; | |
1523 CFGLoop* root_loop = new CFGLoop(idct++); | |
1524 | |
1525 Block_List worklist; | |
1526 | |
1527 // Assign blocks to loops | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1528 for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1529 Block* block = get_block(i); |
0 | 1530 |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1531 if (block->head()->is_Loop()) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1532 Block* loop_head = block; |
0 | 1533 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); |
1534 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); | |
12023 | 1535 Block* tail = get_block_for_node(tail_n); |
0 | 1536 |
1537 // Defensively filter out Loop nodes for non-single-entry loops. | |
1538 // For all reasonable loops, the head occurs before the tail in RPO. | |
1539 if (i <= tail->_rpo) { | |
1540 | |
1541 // The tail and (recursive) predecessors of the tail | |
1542 // are made members of a new loop. | |
1543 | |
1544 assert(worklist.size() == 0, "nonempty worklist"); | |
1545 CFGLoop* nloop = new CFGLoop(idct++); | |
1546 assert(loop_head->_loop == NULL, "just checking"); | |
1547 loop_head->_loop = nloop; | |
1548 // Add to nloop so push_pred() will skip over inner loops | |
1549 nloop->add_member(loop_head); | |
12023 | 1550 nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, this); |
0 | 1551 |
1552 while (worklist.size() > 0) { | |
1553 Block* member = worklist.pop(); | |
1554 if (member != loop_head) { | |
1555 for (uint j = 1; j < member->num_preds(); j++) { | |
12023 | 1556 nloop->push_pred(member, j, worklist, this); |
0 | 1557 } |
1558 } | |
1559 } | |
1560 } | |
1561 } | |
1562 } | |
1563 | |
1564 // Create a member list for each loop consisting | |
1565 // of both blocks and (immediate child) loops. | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1566 for (uint i = 0; i < number_of_blocks(); i++) { |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1567 Block* block = get_block(i); |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1568 CFGLoop* lp = block->_loop; |
0 | 1569 if (lp == NULL) { |
1570 // Not assigned to a loop. Add it to the method's pseudo loop. | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1571 block->_loop = root_loop; |
0 | 1572 lp = root_loop; |
1573 } | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1574 if (lp == root_loop || block != lp->head()) { // loop heads are already members |
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1575 lp->add_member(block); |
0 | 1576 } |
1577 if (lp != root_loop) { | |
1578 if (lp->parent() == NULL) { | |
1579 // Not a nested loop. Make it a child of the method's pseudo loop. | |
1580 root_loop->add_nested_loop(lp); | |
1581 } | |
12071
adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
adlertz
parents:
12023
diff
changeset
|
1582 if (block == lp->head()) { |
0 | 1583 // Add nested loop to member list of parent loop. |
1584 lp->parent()->add_member(lp); | |
1585 } | |
1586 } | |
1587 } | |
1588 | |
1589 return root_loop; | |
1590 } | |
1591 | |
1592 //------------------------------push_pred-------------------------------------- | |
12023 | 1593 void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg) { |
0 | 1594 Node* pred_n = blk->pred(i); |
12023 | 1595 Block* pred = cfg->get_block_for_node(pred_n); |
0 | 1596 CFGLoop *pred_loop = pred->_loop; |
1597 if (pred_loop == NULL) { | |
1598 // Filter out blocks for non-single-entry loops. | |
1599 // For all reasonable loops, the head occurs before the tail in RPO. | |
1600 if (pred->_rpo > head()->_rpo) { | |
1601 pred->_loop = this; | |
1602 worklist.push(pred); | |
1603 } | |
1604 } else if (pred_loop != this) { | |
1605 // Nested loop. | |
1606 while (pred_loop->_parent != NULL && pred_loop->_parent != this) { | |
1607 pred_loop = pred_loop->_parent; | |
1608 } | |
1609 // Make pred's loop be a child | |
1610 if (pred_loop->_parent == NULL) { | |
1611 add_nested_loop(pred_loop); | |
1612 // Continue with loop entry predecessor. | |
1613 Block* pred_head = pred_loop->head(); | |
1614 assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); | |
1615 assert(pred_head != head(), "loop head in only one loop"); | |
12023 | 1616 push_pred(pred_head, LoopNode::EntryControl, worklist, cfg); |
0 | 1617 } else { |
1618 assert(pred_loop->_parent == this && _parent == NULL, "just checking"); | |
1619 } | |
1620 } | |
1621 } | |
1622 | |
1623 //------------------------------add_nested_loop-------------------------------- | |
1624 // Make cl a child of the current loop in the loop tree. | |
1625 void CFGLoop::add_nested_loop(CFGLoop* cl) { | |
1626 assert(_parent == NULL, "no parent yet"); | |
1627 assert(cl != this, "not my own parent"); | |
1628 cl->_parent = this; | |
1629 CFGLoop* ch = _child; | |
1630 if (ch == NULL) { | |
1631 _child = cl; | |
1632 } else { | |
1633 while (ch->_sibling != NULL) { ch = ch->_sibling; } | |
1634 ch->_sibling = cl; | |
1635 } | |
1636 } | |
1637 | |
1638 //------------------------------compute_loop_depth----------------------------- | |
1639 // Store the loop depth in each CFGLoop object. | |
1640 // Recursively walk the children to do the same for them. | |
1641 void CFGLoop::compute_loop_depth(int depth) { | |
1642 _depth = depth; | |
1643 CFGLoop* ch = _child; | |
1644 while (ch != NULL) { | |
1645 ch->compute_loop_depth(depth + 1); | |
1646 ch = ch->_sibling; | |
1647 } | |
1648 } | |
1649 | |
1650 //------------------------------compute_freq----------------------------------- | |
1651 // Compute the frequency of each block and loop, relative to a single entry | |
1652 // into the dominating loop head. | |
1653 void CFGLoop::compute_freq() { | |
1654 // Bottom up traversal of loop tree (visit inner loops first.) | |
1655 // Set loop head frequency to 1.0, then transitively | |
1656 // compute frequency for all successors in the loop, | |
1657 // as well as for each exit edge. Inner loops are | |
1658 // treated as single blocks with loop exit targets | |
1659 // as the successor blocks. | |
1660 | |
1661 // Nested loops first | |
1662 CFGLoop* ch = _child; | |
1663 while (ch != NULL) { | |
1664 ch->compute_freq(); | |
1665 ch = ch->_sibling; | |
1666 } | |
1667 assert (_members.length() > 0, "no empty loops"); | |
1668 Block* hd = head(); | |
1669 hd->_freq = 1.0f; | |
1670 for (int i = 0; i < _members.length(); i++) { | |
1671 CFGElement* s = _members.at(i); | |
1672 float freq = s->_freq; | |
1673 if (s->is_block()) { | |
1674 Block* b = s->as_Block(); | |
1675 for (uint j = 0; j < b->_num_succs; j++) { | |
1676 Block* sb = b->_succs[j]; | |
1677 update_succ_freq(sb, freq * b->succ_prob(j)); | |
1678 } | |
1679 } else { | |
1680 CFGLoop* lp = s->as_CFGLoop(); | |
1681 assert(lp->_parent == this, "immediate child"); | |
1682 for (int k = 0; k < lp->_exits.length(); k++) { | |
1683 Block* eb = lp->_exits.at(k).get_target(); | |
1684 float prob = lp->_exits.at(k).get_prob(); | |
1685 update_succ_freq(eb, freq * prob); | |
1686 } | |
1687 } | |
1688 } | |
1689 | |
1690 // For all loops other than the outer, "method" loop, | |
1691 // sum and normalize the exit probability. The "method" loop | |
1692 // should keep the initial exit probability of 1, so that | |
1693 // inner blocks do not get erroneously scaled. | |
1694 if (_depth != 0) { | |
1695 // Total the exit probabilities for this loop. | |
1696 float exits_sum = 0.0f; | |
1697 for (int i = 0; i < _exits.length(); i++) { | |
1698 exits_sum += _exits.at(i).get_prob(); | |
1699 } | |
1700 | |
1701 // Normalize the exit probabilities. Until now, the | |
1702 // probabilities estimate the possibility of exit per | |
1703 // a single loop iteration; afterward, they estimate | |
1704 // the probability of exit per loop entry. | |
1705 for (int i = 0; i < _exits.length(); i++) { | |
1706 Block* et = _exits.at(i).get_target(); | |
418 | 1707 float new_prob = 0.0f; |
1708 if (_exits.at(i).get_prob() > 0.0f) { | |
1709 new_prob = _exits.at(i).get_prob() / exits_sum; | |
1710 } | |
0 | 1711 BlockProbPair bpp(et, new_prob); |
1712 _exits.at_put(i, bpp); | |
1713 } | |
1714 | |
418 | 1715 // Save the total, but guard against unreasonable probability, |
0 | 1716 // as the value is used to estimate the loop trip count. |
1717 // An infinite trip count would blur relative block | |
1718 // frequencies. | |
1719 if (exits_sum > 1.0f) exits_sum = 1.0; | |
1720 if (exits_sum < PROB_MIN) exits_sum = PROB_MIN; | |
1721 _exit_prob = exits_sum; | |
1722 } | |
1723 } | |
1724 | |
1725 //------------------------------succ_prob------------------------------------- | |
1726 // Determine the probability of reaching successor 'i' from the receiver block. | |
1727 float Block::succ_prob(uint i) { | |
1728 int eidx = end_idx(); | |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1729 Node *n = get_node(eidx); // Get ending Node |
308 | 1730 |
1731 int op = n->Opcode(); | |
1732 if (n->is_Mach()) { | |
1733 if (n->is_MachNullCheck()) { | |
1734 // Can only reach here if called after lcm. The original Op_If is gone, | |
1735 // so we attempt to infer the probability from one or both of the | |
1736 // successor blocks. | |
1737 assert(_num_succs == 2, "expecting 2 successors of a null check"); | |
1738 // If either successor has only one predecessor, then the | |
605 | 1739 // probability estimate can be derived using the |
308 | 1740 // relative frequency of the successor and this block. |
1741 if (_succs[i]->num_preds() == 2) { | |
1742 return _succs[i]->_freq / _freq; | |
1743 } else if (_succs[1-i]->num_preds() == 2) { | |
1744 return 1 - (_succs[1-i]->_freq / _freq); | |
1745 } else { | |
1746 // Estimate using both successor frequencies | |
1747 float freq = _succs[i]->_freq; | |
1748 return freq / (freq + _succs[1-i]->_freq); | |
1749 } | |
1750 } | |
1751 op = n->as_Mach()->ideal_Opcode(); | |
1752 } | |
1753 | |
0 | 1754 |
1755 // Switch on branch type | |
1756 switch( op ) { | |
1757 case Op_CountedLoopEnd: | |
1758 case Op_If: { | |
1759 assert (i < 2, "just checking"); | |
1760 // Conditionals pass on only part of their frequency | |
1761 float prob = n->as_MachIf()->_prob; | |
1762 assert(prob >= 0.0 && prob <= 1.0, "out of range probability"); | |
1763 // If succ[i] is the FALSE branch, invert path info | |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1764 if( get_node(i + eidx + 1)->Opcode() == Op_IfFalse ) { |
0 | 1765 return 1.0f - prob; // not taken |
1766 } else { | |
1767 return prob; // taken | |
1768 } | |
1769 } | |
1770 | |
1771 case Op_Jump: | |
1772 // Divide the frequency between all successors evenly | |
1773 return 1.0f/_num_succs; | |
1774 | |
1775 case Op_Catch: { | |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1776 const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj(); |
0 | 1777 if (ci->_con == CatchProjNode::fall_through_index) { |
1778 // Fall-thru path gets the lion's share. | |
1779 return 1.0f - PROB_UNLIKELY_MAG(5)*_num_succs; | |
1780 } else { | |
1781 // Presume exceptional paths are equally unlikely | |
1782 return PROB_UNLIKELY_MAG(5); | |
1783 } | |
1784 } | |
1785 | |
1786 case Op_Root: | |
1787 case Op_Goto: | |
1788 // Pass frequency straight thru to target | |
1789 return 1.0f; | |
1790 | |
1791 case Op_NeverBranch: | |
1792 return 0.0f; | |
1793 | |
1794 case Op_TailCall: | |
1795 case Op_TailJump: | |
1796 case Op_Return: | |
1797 case Op_Halt: | |
1798 case Op_Rethrow: | |
1799 // Do not push out freq to root block | |
1800 return 0.0f; | |
1801 | |
1802 default: | |
1803 ShouldNotReachHere(); | |
1804 } | |
1805 | |
1806 return 0.0f; | |
1807 } | |
1808 | |
418 | 1809 //------------------------------num_fall_throughs----------------------------- |
1810 // Return the number of fall-through candidates for a block | |
1811 int Block::num_fall_throughs() { | |
1812 int eidx = end_idx(); | |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1813 Node *n = get_node(eidx); // Get ending Node |
418 | 1814 |
1815 int op = n->Opcode(); | |
1816 if (n->is_Mach()) { | |
1817 if (n->is_MachNullCheck()) { | |
1818 // In theory, either side can fall-thru, for simplicity sake, | |
1819 // let's say only the false branch can now. | |
1820 return 1; | |
1821 } | |
1822 op = n->as_Mach()->ideal_Opcode(); | |
1823 } | |
1824 | |
1825 // Switch on branch type | |
1826 switch( op ) { | |
1827 case Op_CountedLoopEnd: | |
1828 case Op_If: | |
1829 return 2; | |
1830 | |
1831 case Op_Root: | |
1832 case Op_Goto: | |
1833 return 1; | |
1834 | |
1835 case Op_Catch: { | |
1836 for (uint i = 0; i < _num_succs; i++) { | |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1837 const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj(); |
418 | 1838 if (ci->_con == CatchProjNode::fall_through_index) { |
1839 return 1; | |
1840 } | |
1841 } | |
1842 return 0; | |
1843 } | |
1844 | |
1845 case Op_Jump: | |
1846 case Op_NeverBranch: | |
1847 case Op_TailCall: | |
1848 case Op_TailJump: | |
1849 case Op_Return: | |
1850 case Op_Halt: | |
1851 case Op_Rethrow: | |
1852 return 0; | |
1853 | |
1854 default: | |
1855 ShouldNotReachHere(); | |
1856 } | |
1857 | |
1858 return 0; | |
1859 } | |
1860 | |
1861 //------------------------------succ_fall_through----------------------------- | |
1862 // Return true if a specific successor could be fall-through target. | |
1863 bool Block::succ_fall_through(uint i) { | |
1864 int eidx = end_idx(); | |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1865 Node *n = get_node(eidx); // Get ending Node |
418 | 1866 |
1867 int op = n->Opcode(); | |
1868 if (n->is_Mach()) { | |
1869 if (n->is_MachNullCheck()) { | |
1870 // In theory, either side can fall-thru, for simplicity sake, | |
1871 // let's say only the false branch can now. | |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1872 return get_node(i + eidx + 1)->Opcode() == Op_IfFalse; |
418 | 1873 } |
1874 op = n->as_Mach()->ideal_Opcode(); | |
1875 } | |
1876 | |
1877 // Switch on branch type | |
1878 switch( op ) { | |
1879 case Op_CountedLoopEnd: | |
1880 case Op_If: | |
1881 case Op_Root: | |
1882 case Op_Goto: | |
1883 return true; | |
1884 | |
1885 case Op_Catch: { | |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1886 const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj(); |
418 | 1887 return ci->_con == CatchProjNode::fall_through_index; |
1888 } | |
1889 | |
1890 case Op_Jump: | |
1891 case Op_NeverBranch: | |
1892 case Op_TailCall: | |
1893 case Op_TailJump: | |
1894 case Op_Return: | |
1895 case Op_Halt: | |
1896 case Op_Rethrow: | |
1897 return false; | |
1898 | |
1899 default: | |
1900 ShouldNotReachHere(); | |
1901 } | |
1902 | |
1903 return false; | |
1904 } | |
1905 | |
1906 //------------------------------update_uncommon_branch------------------------ | |
1907 // Update the probability of a two-branch to be uncommon | |
1908 void Block::update_uncommon_branch(Block* ub) { | |
1909 int eidx = end_idx(); | |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1910 Node *n = get_node(eidx); // Get ending Node |
418 | 1911 |
1912 int op = n->as_Mach()->ideal_Opcode(); | |
1913 | |
1914 assert(op == Op_CountedLoopEnd || op == Op_If, "must be a If"); | |
1915 assert(num_fall_throughs() == 2, "must be a two way branch block"); | |
1916 | |
1917 // Which successor is ub? | |
1918 uint s; | |
1919 for (s = 0; s <_num_succs; s++) { | |
1920 if (_succs[s] == ub) break; | |
1921 } | |
1922 assert(s < 2, "uncommon successor must be found"); | |
1923 | |
1924 // If ub is the true path, make the proability small, else | |
1925 // ub is the false path, and make the probability large | |
12167
650868c062a9
8023691: Create interface for nodes in class Block
adlertz
parents:
12071
diff
changeset
|
1926 bool invert = (get_node(s + eidx + 1)->Opcode() == Op_IfFalse); |
418 | 1927 |
1928 // Get existing probability | |
1929 float p = n->as_MachIf()->_prob; | |
1930 | |
1931 if (invert) p = 1.0 - p; | |
1932 if (p > PROB_MIN) { | |
1933 p = PROB_MIN; | |
1934 } | |
1935 if (invert) p = 1.0 - p; | |
1936 | |
1937 n->as_MachIf()->_prob = p; | |
1938 } | |
1939 | |
0 | 1940 //------------------------------update_succ_freq------------------------------- |
605 | 1941 // Update the appropriate frequency associated with block 'b', a successor of |
0 | 1942 // a block in this loop. |
1943 void CFGLoop::update_succ_freq(Block* b, float freq) { | |
1944 if (b->_loop == this) { | |
1945 if (b == head()) { | |
1946 // back branch within the loop | |
1947 // Do nothing now, the loop carried frequency will be | |
1948 // adjust later in scale_freq(). | |
1949 } else { | |
1950 // simple branch within the loop | |
1951 b->_freq += freq; | |
1952 } | |
1953 } else if (!in_loop_nest(b)) { | |
1954 // branch is exit from this loop | |
1955 BlockProbPair bpp(b, freq); | |
1956 _exits.append(bpp); | |
1957 } else { | |
1958 // branch into nested loop | |
1959 CFGLoop* ch = b->_loop; | |
1960 ch->_freq += freq; | |
1961 } | |
1962 } | |
1963 | |
1964 //------------------------------in_loop_nest----------------------------------- | |
1965 // Determine if block b is in the receiver's loop nest. | |
1966 bool CFGLoop::in_loop_nest(Block* b) { | |
1967 int depth = _depth; | |
1968 CFGLoop* b_loop = b->_loop; | |
1969 int b_depth = b_loop->_depth; | |
1970 if (depth == b_depth) { | |
1971 return true; | |
1972 } | |
1973 while (b_depth > depth) { | |
1974 b_loop = b_loop->_parent; | |
1975 b_depth = b_loop->_depth; | |
1976 } | |
1977 return b_loop == this; | |
1978 } | |
1979 | |
1980 //------------------------------scale_freq------------------------------------- | |
1981 // Scale frequency of loops and blocks by trip counts from outer loops | |
1982 // Do a top down traversal of loop tree (visit outer loops first.) | |
1983 void CFGLoop::scale_freq() { | |
1984 float loop_freq = _freq * trip_count(); | |
673 | 1985 _freq = loop_freq; |
0 | 1986 for (int i = 0; i < _members.length(); i++) { |
1987 CFGElement* s = _members.at(i); | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1988 float block_freq = s->_freq * loop_freq; |
621 | 1989 if (g_isnan(block_freq) || block_freq < MIN_BLOCK_FREQUENCY) |
1990 block_freq = MIN_BLOCK_FREQUENCY; | |
552
011517bbcd7b
6784930: server jvm fails with assert(!n->is_SpillCopy(),"")
kvn
parents:
418
diff
changeset
|
1991 s->_freq = block_freq; |
0 | 1992 } |
1993 CFGLoop* ch = _child; | |
1994 while (ch != NULL) { | |
1995 ch->scale_freq(); | |
1996 ch = ch->_sibling; | |
1997 } | |
1998 } | |
1999 | |
673 | 2000 // Frequency of outer loop |
2001 float CFGLoop::outer_loop_freq() const { | |
2002 if (_child != NULL) { | |
2003 return _child->_freq; | |
2004 } | |
2005 return _freq; | |
2006 } | |
2007 | |
0 | 2008 #ifndef PRODUCT |
2009 //------------------------------dump_tree-------------------------------------- | |
2010 void CFGLoop::dump_tree() const { | |
2011 dump(); | |
2012 if (_child != NULL) _child->dump_tree(); | |
2013 if (_sibling != NULL) _sibling->dump_tree(); | |
2014 } | |
2015 | |
2016 //------------------------------dump------------------------------------------- | |
2017 void CFGLoop::dump() const { | |
2018 for (int i = 0; i < _depth; i++) tty->print(" "); | |
2019 tty->print("%s: %d trip_count: %6.0f freq: %6.0f\n", | |
2020 _depth == 0 ? "Method" : "Loop", _id, trip_count(), _freq); | |
2021 for (int i = 0; i < _depth; i++) tty->print(" "); | |
2022 tty->print(" members:", _id); | |
2023 int k = 0; | |
2024 for (int i = 0; i < _members.length(); i++) { | |
2025 if (k++ >= 6) { | |
2026 tty->print("\n "); | |
2027 for (int j = 0; j < _depth+1; j++) tty->print(" "); | |
2028 k = 0; | |
2029 } | |
2030 CFGElement *s = _members.at(i); | |
2031 if (s->is_block()) { | |
2032 Block *b = s->as_Block(); | |
2033 tty->print(" B%d(%6.3f)", b->_pre_order, b->_freq); | |
2034 } else { | |
2035 CFGLoop* lp = s->as_CFGLoop(); | |
2036 tty->print(" L%d(%6.3f)", lp->_id, lp->_freq); | |
2037 } | |
2038 } | |
2039 tty->print("\n"); | |
2040 for (int i = 0; i < _depth; i++) tty->print(" "); | |
2041 tty->print(" exits: "); | |
2042 k = 0; | |
2043 for (int i = 0; i < _exits.length(); i++) { | |
2044 if (k++ >= 7) { | |
2045 tty->print("\n "); | |
2046 for (int j = 0; j < _depth+1; j++) tty->print(" "); | |
2047 k = 0; | |
2048 } | |
2049 Block *blk = _exits.at(i).get_target(); | |
2050 float prob = _exits.at(i).get_prob(); | |
2051 tty->print(" ->%d@%d%%", blk->_pre_order, (int)(prob*100)); | |
2052 } | |
2053 tty->print("\n"); | |
2054 } | |
2055 #endif |