Mercurial > hg > graal-compiler
comparison src/share/vm/opto/gcm.cpp @ 14412:e2722a66aba7
Merge
author | kvn |
---|---|
date | Thu, 05 Sep 2013 11:04:39 -0700 |
parents | d2907f74462e adb9a7d94cb5 |
children | 2b8e28fdf503 |
comparison
equal
deleted
inserted
replaced
14411:bdd155477289 | 14412:e2722a66aba7 |
---|---|
68 //----------------------------schedule_node_into_block------------------------- | 68 //----------------------------schedule_node_into_block------------------------- |
69 // Insert node n into block b. Look for projections of n and make sure they | 69 // Insert node n into block b. Look for projections of n and make sure they |
70 // are in b also. | 70 // are in b also. |
71 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) { | 71 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) { |
72 // Set basic block of n, Add n to b, | 72 // Set basic block of n, Add n to b, |
73 _bbs.map(n->_idx, b); | 73 map_node_to_block(n, b); |
74 b->add_inst(n); | 74 b->add_inst(n); |
75 | 75 |
76 // After Matching, nearly any old Node may have projections trailing it. | 76 // After Matching, nearly any old Node may have projections trailing it. |
77 // These are usually machine-dependent flags. In any case, they might | 77 // These are usually machine-dependent flags. In any case, they might |
78 // float to another block below this one. Move them up. | 78 // float to another block below this one. Move them up. |
79 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | 79 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
80 Node* use = n->fast_out(i); | 80 Node* use = n->fast_out(i); |
81 if (use->is_Proj()) { | 81 if (use->is_Proj()) { |
82 Block* buse = _bbs[use->_idx]; | 82 Block* buse = get_block_for_node(use); |
83 if (buse != b) { // In wrong block? | 83 if (buse != b) { // In wrong block? |
84 if (buse != NULL) | 84 if (buse != NULL) { |
85 buse->find_remove(use); // Remove from wrong block | 85 buse->find_remove(use); // Remove from wrong block |
86 _bbs.map(use->_idx, b); // Re-insert in this block | 86 } |
87 map_node_to_block(use, b); | |
87 b->add_inst(use); | 88 b->add_inst(use); |
88 } | 89 } |
89 } | 90 } |
90 } | 91 } |
91 } | 92 } |
99 assert(in0 != NULL, "Only control-dependent"); | 100 assert(in0 != NULL, "Only control-dependent"); |
100 const Node *p = in0->is_block_proj(); | 101 const Node *p = in0->is_block_proj(); |
101 if (p != NULL && p != n) { // Control from a block projection? | 102 if (p != NULL && p != n) { // Control from a block projection? |
102 assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here"); | 103 assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here"); |
103 // Find trailing Region | 104 // Find trailing Region |
104 Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block | 105 Block *pb = get_block_for_node(in0); // Block-projection already has basic block |
105 uint j = 0; | 106 uint j = 0; |
106 if (pb->_num_succs != 1) { // More then 1 successor? | 107 if (pb->_num_succs != 1) { // More then 1 successor? |
107 // Search for successor | 108 // Search for successor |
108 uint max = pb->_nodes.size(); | 109 uint max = pb->_nodes.size(); |
109 assert( max > 1, "" ); | 110 assert( max > 1, "" ); |
122 } | 123 } |
123 | 124 |
124 | 125 |
125 //------------------------------schedule_pinned_nodes-------------------------- | 126 //------------------------------schedule_pinned_nodes-------------------------- |
126 // Set the basic block for Nodes pinned into blocks | 127 // Set the basic block for Nodes pinned into blocks |
127 void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) { | 128 void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) { |
128 // Allocate node stack of size C->unique()+8 to avoid frequent realloc | 129 // Allocate node stack of size C->unique()+8 to avoid frequent realloc |
129 GrowableArray <Node *> spstack(C->unique()+8); | 130 GrowableArray <Node *> spstack(C->unique() + 8); |
130 spstack.push(_root); | 131 spstack.push(_root); |
131 while ( spstack.is_nonempty() ) { | 132 while (spstack.is_nonempty()) { |
132 Node *n = spstack.pop(); | 133 Node* node = spstack.pop(); |
133 if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited | 134 if (!visited.test_set(node->_idx)) { // Test node and flag it as visited |
134 if( n->pinned() && !_bbs.lookup(n->_idx) ) { // Pinned? Nail it down! | 135 if (node->pinned() && !has_block(node)) { // Pinned? Nail it down! |
135 assert( n->in(0), "pinned Node must have Control" ); | 136 assert(node->in(0), "pinned Node must have Control"); |
136 // Before setting block replace block_proj control edge | 137 // Before setting block replace block_proj control edge |
137 replace_block_proj_ctrl(n); | 138 replace_block_proj_ctrl(node); |
138 Node *input = n->in(0); | 139 Node* input = node->in(0); |
139 while( !input->is_block_start() ) | 140 while (!input->is_block_start()) { |
140 input = input->in(0); | 141 input = input->in(0); |
141 Block *b = _bbs[input->_idx]; // Basic block of controlling input | 142 } |
142 schedule_node_into_block(n, b); | 143 Block* block = get_block_for_node(input); // Basic block of controlling input |
143 } | 144 schedule_node_into_block(node, block); |
144 for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs | 145 } |
145 if( n->in(i) != NULL ) | 146 |
146 spstack.push(n->in(i)); | 147 // process all inputs that are non NULL |
148 for (int i = node->req() - 1; i >= 0; --i) { | |
149 if (node->in(i) != NULL) { | |
150 spstack.push(node->in(i)); | |
151 } | |
147 } | 152 } |
148 } | 153 } |
149 } | 154 } |
150 } | 155 } |
151 | 156 |
152 #ifdef ASSERT | 157 #ifdef ASSERT |
153 // Assert that new input b2 is dominated by all previous inputs. | 158 // Assert that new input b2 is dominated by all previous inputs. |
154 // Check this by by seeing that it is dominated by b1, the deepest | 159 // Check this by by seeing that it is dominated by b1, the deepest |
155 // input observed until b2. | 160 // input observed until b2. |
156 static void assert_dom(Block* b1, Block* b2, Node* n, Block_Array &bbs) { | 161 static void assert_dom(Block* b1, Block* b2, Node* n, const PhaseCFG* cfg) { |
157 if (b1 == NULL) return; | 162 if (b1 == NULL) return; |
158 assert(b1->_dom_depth < b2->_dom_depth, "sanity"); | 163 assert(b1->_dom_depth < b2->_dom_depth, "sanity"); |
159 Block* tmp = b2; | 164 Block* tmp = b2; |
160 while (tmp != b1 && tmp != NULL) { | 165 while (tmp != b1 && tmp != NULL) { |
161 tmp = tmp->_idom; | 166 tmp = tmp->_idom; |
164 // Detected an unschedulable graph. Print some nice stuff and die. | 169 // Detected an unschedulable graph. Print some nice stuff and die. |
165 tty->print_cr("!!! Unschedulable graph !!!"); | 170 tty->print_cr("!!! Unschedulable graph !!!"); |
166 for (uint j=0; j<n->len(); j++) { // For all inputs | 171 for (uint j=0; j<n->len(); j++) { // For all inputs |
167 Node* inn = n->in(j); // Get input | 172 Node* inn = n->in(j); // Get input |
168 if (inn == NULL) continue; // Ignore NULL, missing inputs | 173 if (inn == NULL) continue; // Ignore NULL, missing inputs |
169 Block* inb = bbs[inn->_idx]; | 174 Block* inb = cfg->get_block_for_node(inn); |
170 tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order, | 175 tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order, |
171 inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth); | 176 inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth); |
172 inn->dump(); | 177 inn->dump(); |
173 } | 178 } |
174 tty->print("Failing node: "); | 179 tty->print("Failing node: "); |
176 assert(false, "unscheduable graph"); | 181 assert(false, "unscheduable graph"); |
177 } | 182 } |
178 } | 183 } |
179 #endif | 184 #endif |
180 | 185 |
181 static Block* find_deepest_input(Node* n, Block_Array &bbs) { | 186 static Block* find_deepest_input(Node* n, const PhaseCFG* cfg) { |
182 // Find the last input dominated by all other inputs. | 187 // Find the last input dominated by all other inputs. |
183 Block* deepb = NULL; // Deepest block so far | 188 Block* deepb = NULL; // Deepest block so far |
184 int deepb_dom_depth = 0; | 189 int deepb_dom_depth = 0; |
185 for (uint k = 0; k < n->len(); k++) { // For all inputs | 190 for (uint k = 0; k < n->len(); k++) { // For all inputs |
186 Node* inn = n->in(k); // Get input | 191 Node* inn = n->in(k); // Get input |
187 if (inn == NULL) continue; // Ignore NULL, missing inputs | 192 if (inn == NULL) continue; // Ignore NULL, missing inputs |
188 Block* inb = bbs[inn->_idx]; | 193 Block* inb = cfg->get_block_for_node(inn); |
189 assert(inb != NULL, "must already have scheduled this input"); | 194 assert(inb != NULL, "must already have scheduled this input"); |
190 if (deepb_dom_depth < (int) inb->_dom_depth) { | 195 if (deepb_dom_depth < (int) inb->_dom_depth) { |
191 // The new inb must be dominated by the previous deepb. | 196 // The new inb must be dominated by the previous deepb. |
192 // The various inputs must be linearly ordered in the dom | 197 // The various inputs must be linearly ordered in the dom |
193 // tree, or else there will not be a unique deepest block. | 198 // tree, or else there will not be a unique deepest block. |
194 DEBUG_ONLY(assert_dom(deepb, inb, n, bbs)); | 199 DEBUG_ONLY(assert_dom(deepb, inb, n, cfg)); |
195 deepb = inb; // Save deepest block | 200 deepb = inb; // Save deepest block |
196 deepb_dom_depth = deepb->_dom_depth; | 201 deepb_dom_depth = deepb->_dom_depth; |
197 } | 202 } |
198 } | 203 } |
199 assert(deepb != NULL, "must be at least one input to n"); | 204 assert(deepb != NULL, "must be at least one input to n"); |
205 // Find the earliest Block any instruction can be placed in. Some instructions | 210 // Find the earliest Block any instruction can be placed in. Some instructions |
206 // are pinned into Blocks. Unpinned instructions can appear in last block in | 211 // are pinned into Blocks. Unpinned instructions can appear in last block in |
207 // which all their inputs occur. | 212 // which all their inputs occur. |
208 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { | 213 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { |
209 // Allocate stack with enough space to avoid frequent realloc | 214 // Allocate stack with enough space to avoid frequent realloc |
210 Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats | 215 Node_Stack nstack(roots.Size() + 8); |
211 // roots.push(_root); _root will be processed among C->top() inputs | 216 // _root will be processed among C->top() inputs |
212 roots.push(C->top()); | 217 roots.push(C->top()); |
213 visited.set(C->top()->_idx); | 218 visited.set(C->top()->_idx); |
214 | 219 |
215 while (roots.size() != 0) { | 220 while (roots.size() != 0) { |
216 // Use local variables nstack_top_n & nstack_top_i to cache values | 221 // Use local variables nstack_top_n & nstack_top_i to cache values |
217 // on stack's top. | 222 // on stack's top. |
218 Node *nstack_top_n = roots.pop(); | 223 Node* parent_node = roots.pop(); |
219 uint nstack_top_i = 0; | 224 uint input_index = 0; |
220 //while_nstack_nonempty: | 225 |
221 while (true) { | 226 while (true) { |
222 // Get parent node and next input's index from stack's top. | 227 if (input_index == 0) { |
223 Node *n = nstack_top_n; | |
224 uint i = nstack_top_i; | |
225 | |
226 if (i == 0) { | |
227 // Fixup some control. Constants without control get attached | 228 // Fixup some control. Constants without control get attached |
228 // to root and nodes that use is_block_proj() nodes should be attached | 229 // to root and nodes that use is_block_proj() nodes should be attached |
229 // to the region that starts their block. | 230 // to the region that starts their block. |
230 const Node *in0 = n->in(0); | 231 const Node* control_input = parent_node->in(0); |
231 if (in0 != NULL) { // Control-dependent? | 232 if (control_input != NULL) { |
232 replace_block_proj_ctrl(n); | 233 replace_block_proj_ctrl(parent_node); |
233 } else { // n->in(0) == NULL | 234 } else { |
234 if (n->req() == 1) { // This guy is a constant with NO inputs? | 235 // Is a constant with NO inputs? |
235 n->set_req(0, _root); | 236 if (parent_node->req() == 1) { |
237 parent_node->set_req(0, _root); | |
236 } | 238 } |
237 } | 239 } |
238 } | 240 } |
239 | 241 |
240 // First, visit all inputs and force them to get a block. If an | 242 // First, visit all inputs and force them to get a block. If an |
241 // input is already in a block we quit following inputs (to avoid | 243 // input is already in a block we quit following inputs (to avoid |
242 // cycles). Instead we put that Node on a worklist to be handled | 244 // cycles). Instead we put that Node on a worklist to be handled |
243 // later (since IT'S inputs may not have a block yet). | 245 // later (since IT'S inputs may not have a block yet). |
244 bool done = true; // Assume all n's inputs will be processed | 246 |
245 while (i < n->len()) { // For all inputs | 247 // Assume all n's inputs will be processed |
246 Node *in = n->in(i); // Get input | 248 bool done = true; |
247 ++i; | 249 |
248 if (in == NULL) continue; // Ignore NULL, missing inputs | 250 while (input_index < parent_node->len()) { |
251 Node* in = parent_node->in(input_index++); | |
252 if (in == NULL) { | |
253 continue; | |
254 } | |
255 | |
249 int is_visited = visited.test_set(in->_idx); | 256 int is_visited = visited.test_set(in->_idx); |
250 if (!_bbs.lookup(in->_idx)) { // Missing block selection? | 257 if (!has_block(in)) { |
251 if (is_visited) { | 258 if (is_visited) { |
252 // assert( !visited.test(in->_idx), "did not schedule early" ); | |
253 return false; | 259 return false; |
254 } | 260 } |
255 nstack.push(n, i); // Save parent node and next input's index. | 261 // Save parent node and next input's index. |
256 nstack_top_n = in; // Process current input now. | 262 nstack.push(parent_node, input_index); |
257 nstack_top_i = 0; | 263 // Process current input now. |
258 done = false; // Not all n's inputs processed. | 264 parent_node = in; |
259 break; // continue while_nstack_nonempty; | 265 input_index = 0; |
260 } else if (!is_visited) { // Input not yet visited? | 266 // Not all n's inputs processed. |
261 roots.push(in); // Visit this guy later, using worklist | 267 done = false; |
268 break; | |
269 } else if (!is_visited) { | |
270 // Visit this guy later, using worklist | |
271 roots.push(in); | |
262 } | 272 } |
263 } | 273 } |
274 | |
264 if (done) { | 275 if (done) { |
265 // All of n's inputs have been processed, complete post-processing. | 276 // All of n's inputs have been processed, complete post-processing. |
266 | 277 |
267 // Some instructions are pinned into a block. These include Region, | 278 // Some instructions are pinned into a block. These include Region, |
268 // Phi, Start, Return, and other control-dependent instructions and | 279 // Phi, Start, Return, and other control-dependent instructions and |
269 // any projections which depend on them. | 280 // any projections which depend on them. |
270 if (!n->pinned()) { | 281 if (!parent_node->pinned()) { |
271 // Set earliest legal block. | 282 // Set earliest legal block. |
272 _bbs.map(n->_idx, find_deepest_input(n, _bbs)); | 283 Block* earliest_block = find_deepest_input(parent_node, this); |
284 map_node_to_block(parent_node, earliest_block); | |
273 } else { | 285 } else { |
274 assert(_bbs[n->_idx] == _bbs[n->in(0)->_idx], "Pinned Node should be at the same block as its control edge"); | 286 assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge"); |
275 } | 287 } |
276 | 288 |
277 if (nstack.is_empty()) { | 289 if (nstack.is_empty()) { |
278 // Finished all nodes on stack. | 290 // Finished all nodes on stack. |
279 // Process next node on the worklist 'roots'. | 291 // Process next node on the worklist 'roots'. |
280 break; | 292 break; |
281 } | 293 } |
282 // Get saved parent node and next input's index. | 294 // Get saved parent node and next input's index. |
283 nstack_top_n = nstack.node(); | 295 parent_node = nstack.node(); |
284 nstack_top_i = nstack.index(); | 296 input_index = nstack.index(); |
285 nstack.pop(); | 297 nstack.pop(); |
286 } // if (done) | 298 } |
287 } // while (true) | 299 } |
288 } // while (roots.size() != 0) | 300 } |
289 return true; | 301 return true; |
290 } | 302 } |
291 | 303 |
292 //------------------------------dom_lca---------------------------------------- | 304 //------------------------------dom_lca---------------------------------------- |
293 // Find least common ancestor in dominator tree | 305 // Find least common ancestor in dominator tree |
315 //--------------------------raise_LCA_above_use-------------------------------- | 327 //--------------------------raise_LCA_above_use-------------------------------- |
316 // We are placing a definition, and have been given a def->use edge. | 328 // We are placing a definition, and have been given a def->use edge. |
317 // The definition must dominate the use, so move the LCA upward in the | 329 // The definition must dominate the use, so move the LCA upward in the |
318 // dominator tree to dominate the use. If the use is a phi, adjust | 330 // dominator tree to dominate the use. If the use is a phi, adjust |
319 // the LCA only with the phi input paths which actually use this def. | 331 // the LCA only with the phi input paths which actually use this def. |
320 static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, Block_Array &bbs) { | 332 static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, const PhaseCFG* cfg) { |
321 Block* buse = bbs[use->_idx]; | 333 Block* buse = cfg->get_block_for_node(use); |
322 if (buse == NULL) return LCA; // Unused killing Projs have no use block | 334 if (buse == NULL) return LCA; // Unused killing Projs have no use block |
323 if (!use->is_Phi()) return buse->dom_lca(LCA); | 335 if (!use->is_Phi()) return buse->dom_lca(LCA); |
324 uint pmax = use->req(); // Number of Phi inputs | 336 uint pmax = use->req(); // Number of Phi inputs |
325 // Why does not this loop just break after finding the matching input to | 337 // Why does not this loop just break after finding the matching input to |
326 // the Phi? Well...it's like this. I do not have true def-use/use-def | 338 // the Phi? Well...it's like this. I do not have true def-use/use-def |
331 // which use-def edge I should find the predecessor block for. So I find | 343 // which use-def edge I should find the predecessor block for. So I find |
332 // them all. Means I do a little extra work if a Phi uses the same value | 344 // them all. Means I do a little extra work if a Phi uses the same value |
333 // more than once. | 345 // more than once. |
334 for (uint j=1; j<pmax; j++) { // For all inputs | 346 for (uint j=1; j<pmax; j++) { // For all inputs |
335 if (use->in(j) == def) { // Found matching input? | 347 if (use->in(j) == def) { // Found matching input? |
336 Block* pred = bbs[buse->pred(j)->_idx]; | 348 Block* pred = cfg->get_block_for_node(buse->pred(j)); |
337 LCA = pred->dom_lca(LCA); | 349 LCA = pred->dom_lca(LCA); |
338 } | 350 } |
339 } | 351 } |
340 return LCA; | 352 return LCA; |
341 } | 353 } |
344 // Return a new LCA that dominates LCA and any of its marked predecessors. | 356 // Return a new LCA that dominates LCA and any of its marked predecessors. |
345 // Search all my parents up to 'early' (exclusive), looking for predecessors | 357 // Search all my parents up to 'early' (exclusive), looking for predecessors |
346 // which are marked with the given index. Return the LCA (in the dom tree) | 358 // which are marked with the given index. Return the LCA (in the dom tree) |
347 // of all marked blocks. If there are none marked, return the original | 359 // of all marked blocks. If there are none marked, return the original |
348 // LCA. | 360 // LCA. |
349 static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, | 361 static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, Block* early, const PhaseCFG* cfg) { |
350 Block* early, Block_Array &bbs) { | |
351 Block_List worklist; | 362 Block_List worklist; |
352 worklist.push(LCA); | 363 worklist.push(LCA); |
353 while (worklist.size() > 0) { | 364 while (worklist.size() > 0) { |
354 Block* mid = worklist.pop(); | 365 Block* mid = worklist.pop(); |
355 if (mid == early) continue; // stop searching here | 366 if (mid == early) continue; // stop searching here |
368 if (LCA == mid) | 379 if (LCA == mid) |
369 continue; // Don't mark as visited to avoid early termination. | 380 continue; // Don't mark as visited to avoid early termination. |
370 } else { | 381 } else { |
371 // Keep searching through this block's predecessors. | 382 // Keep searching through this block's predecessors. |
372 for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) { | 383 for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) { |
373 Block* mid_parent = bbs[ mid->pred(j)->_idx ]; | 384 Block* mid_parent = cfg->get_block_for_node(mid->pred(j)); |
374 worklist.push(mid_parent); | 385 worklist.push(mid_parent); |
375 } | 386 } |
376 } | 387 } |
377 mid->set_raise_LCA_visited(mark); | 388 mid->set_raise_LCA_visited(mark); |
378 } | 389 } |
386 // | 397 // |
387 // Because a subset of edges are considered, the resulting block will | 398 // Because a subset of edges are considered, the resulting block will |
388 // be earlier (at a shallower dom_depth) than the true schedule_early | 399 // be earlier (at a shallower dom_depth) than the true schedule_early |
389 // point of the node. We compute this earlier block as a more permissive | 400 // point of the node. We compute this earlier block as a more permissive |
390 // site for anti-dependency insertion, but only if subsume_loads is enabled. | 401 // site for anti-dependency insertion, but only if subsume_loads is enabled. |
391 static Block* memory_early_block(Node* load, Block* early, Block_Array &bbs) { | 402 static Block* memory_early_block(Node* load, Block* early, const PhaseCFG* cfg) { |
392 Node* base; | 403 Node* base; |
393 Node* index; | 404 Node* index; |
394 Node* store = load->in(MemNode::Memory); | 405 Node* store = load->in(MemNode::Memory); |
395 load->as_Mach()->memory_inputs(base, index); | 406 load->as_Mach()->memory_inputs(base, index); |
396 | 407 |
414 if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0); | 425 if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0); |
415 | 426 |
416 Block* deepb = NULL; // Deepest block so far | 427 Block* deepb = NULL; // Deepest block so far |
417 int deepb_dom_depth = 0; | 428 int deepb_dom_depth = 0; |
418 for (int i = 0; i < mem_inputs_length; i++) { | 429 for (int i = 0; i < mem_inputs_length; i++) { |
419 Block* inb = bbs[mem_inputs[i]->_idx]; | 430 Block* inb = cfg->get_block_for_node(mem_inputs[i]); |
420 if (deepb_dom_depth < (int) inb->_dom_depth) { | 431 if (deepb_dom_depth < (int) inb->_dom_depth) { |
421 // The new inb must be dominated by the previous deepb. | 432 // The new inb must be dominated by the previous deepb. |
422 // The various inputs must be linearly ordered in the dom | 433 // The various inputs must be linearly ordered in the dom |
423 // tree, or else there will not be a unique deepest block. | 434 // tree, or else there will not be a unique deepest block. |
424 DEBUG_ONLY(assert_dom(deepb, inb, load, bbs)); | 435 DEBUG_ONLY(assert_dom(deepb, inb, load, cfg)); |
425 deepb = inb; // Save deepest block | 436 deepb = inb; // Save deepest block |
426 deepb_dom_depth = deepb->_dom_depth; | 437 deepb_dom_depth = deepb->_dom_depth; |
427 } | 438 } |
428 } | 439 } |
429 early = deepb; | 440 early = deepb; |
490 // Note the earliest legal placement of 'load', as determined by | 501 // Note the earliest legal placement of 'load', as determined by |
491 // by the unique point in the dom tree where all memory effects | 502 // by the unique point in the dom tree where all memory effects |
492 // and other inputs are first available. (Computed by schedule_early.) | 503 // and other inputs are first available. (Computed by schedule_early.) |
493 // For normal loads, 'early' is the shallowest place (dom graph wise) | 504 // For normal loads, 'early' is the shallowest place (dom graph wise) |
494 // to look for anti-deps between this load and any store. | 505 // to look for anti-deps between this load and any store. |
495 Block* early = _bbs[load_index]; | 506 Block* early = get_block_for_node(load); |
496 | 507 |
497 // If we are subsuming loads, compute an "early" block that only considers | 508 // If we are subsuming loads, compute an "early" block that only considers |
498 // memory or address inputs. This block may be different than the | 509 // memory or address inputs. This block may be different than the |
499 // schedule_early block in that it could be at an even shallower depth in the | 510 // schedule_early block in that it could be at an even shallower depth in the |
500 // dominator tree, and allow for a broader discovery of anti-dependences. | 511 // dominator tree, and allow for a broader discovery of anti-dependences. |
501 if (C->subsume_loads()) { | 512 if (C->subsume_loads()) { |
502 early = memory_early_block(load, early, _bbs); | 513 early = memory_early_block(load, early, this); |
503 } | 514 } |
504 | 515 |
505 ResourceArea *area = Thread::current()->resource_area(); | 516 ResourceArea *area = Thread::current()->resource_area(); |
506 Node_List worklist_mem(area); // prior memory state to store | 517 Node_List worklist_mem(area); // prior memory state to store |
507 Node_List worklist_store(area); // possible-def to explore | 518 Node_List worklist_store(area); // possible-def to explore |
621 | 632 |
622 // Identify a block that the current load must be above, | 633 // Identify a block that the current load must be above, |
623 // or else observe that 'store' is all the way up in the | 634 // or else observe that 'store' is all the way up in the |
624 // earliest legal block for 'load'. In the latter case, | 635 // earliest legal block for 'load'. In the latter case, |
625 // immediately insert an anti-dependence edge. | 636 // immediately insert an anti-dependence edge. |
626 Block* store_block = _bbs[store->_idx]; | 637 Block* store_block = get_block_for_node(store); |
627 assert(store_block != NULL, "unused killing projections skipped above"); | 638 assert(store_block != NULL, "unused killing projections skipped above"); |
628 | 639 |
629 if (store->is_Phi()) { | 640 if (store->is_Phi()) { |
630 // 'load' uses memory which is one (or more) of the Phi's inputs. | 641 // 'load' uses memory which is one (or more) of the Phi's inputs. |
631 // It must be scheduled not before the Phi, but rather before | 642 // It must be scheduled not before the Phi, but rather before |
639 // PhiNode may be at start of block 'early' with backedge to 'early' | 650 // PhiNode may be at start of block 'early' with backedge to 'early' |
640 DEBUG_ONLY(bool found_match = false); | 651 DEBUG_ONLY(bool found_match = false); |
641 for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) { | 652 for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) { |
642 if (store->in(j) == mem) { // Found matching input? | 653 if (store->in(j) == mem) { // Found matching input? |
643 DEBUG_ONLY(found_match = true); | 654 DEBUG_ONLY(found_match = true); |
644 Block* pred_block = _bbs[store_block->pred(j)->_idx]; | 655 Block* pred_block = get_block_for_node(store_block->pred(j)); |
645 if (pred_block != early) { | 656 if (pred_block != early) { |
646 // If any predecessor of the Phi matches the load's "early block", | 657 // If any predecessor of the Phi matches the load's "early block", |
647 // we do not need a precedence edge between the Phi and 'load' | 658 // we do not need a precedence edge between the Phi and 'load' |
648 // since the load will be forced into a block preceding the Phi. | 659 // since the load will be forced into a block preceding the Phi. |
649 pred_block->set_raise_LCA_mark(load_index); | 660 pred_block->set_raise_LCA_mark(load_index); |
713 // | 724 // |
714 // The raised LCA will be a lower bound for placing the load, | 725 // The raised LCA will be a lower bound for placing the load, |
715 // preventing the load from sinking past any block containing | 726 // preventing the load from sinking past any block containing |
716 // a store that may invalidate the memory state required by 'load'. | 727 // a store that may invalidate the memory state required by 'load'. |
717 if (must_raise_LCA) | 728 if (must_raise_LCA) |
718 LCA = raise_LCA_above_marks(LCA, load->_idx, early, _bbs); | 729 LCA = raise_LCA_above_marks(LCA, load->_idx, early, this); |
719 if (LCA == early) return LCA; | 730 if (LCA == early) return LCA; |
720 | 731 |
721 // Insert anti-dependence edges from 'load' to each store | 732 // Insert anti-dependence edges from 'load' to each store |
722 // in the non-early LCA block. | 733 // in the non-early LCA block. |
723 // Mine the non_early_stores list for such stores. | 734 // Mine the non_early_stores list for such stores. |
724 if (LCA->raise_LCA_mark() == load_index) { | 735 if (LCA->raise_LCA_mark() == load_index) { |
725 while (non_early_stores.size() > 0) { | 736 while (non_early_stores.size() > 0) { |
726 Node* store = non_early_stores.pop(); | 737 Node* store = non_early_stores.pop(); |
727 Block* store_block = _bbs[store->_idx]; | 738 Block* store_block = get_block_for_node(store); |
728 if (store_block == LCA) { | 739 if (store_block == LCA) { |
729 // add anti_dependence from store to load in its own block | 740 // add anti_dependence from store to load in its own block |
730 assert(store != load->in(0), "dependence cycle found"); | 741 assert(store != load->in(0), "dependence cycle found"); |
731 if (verify) { | 742 if (verify) { |
732 assert(store->find_edge(load) != -1, "missing precedence edge"); | 743 assert(store->find_edge(load) != -1, "missing precedence edge"); |
756 private: | 767 private: |
757 Node_Backward_Iterator(); | 768 Node_Backward_Iterator(); |
758 | 769 |
759 public: | 770 public: |
760 // Constructor for the iterator | 771 // Constructor for the iterator |
761 Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs); | 772 Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg); |
762 | 773 |
763 // Postincrement operator to iterate over the nodes | 774 // Postincrement operator to iterate over the nodes |
764 Node *next(); | 775 Node *next(); |
765 | 776 |
766 private: | 777 private: |
767 VectorSet &_visited; | 778 VectorSet &_visited; |
768 Node_List &_stack; | 779 Node_List &_stack; |
769 Block_Array &_bbs; | 780 PhaseCFG &_cfg; |
770 }; | 781 }; |
771 | 782 |
772 // Constructor for the Node_Backward_Iterator | 783 // Constructor for the Node_Backward_Iterator |
773 Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs ) | 784 Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg) |
774 : _visited(visited), _stack(stack), _bbs(bbs) { | 785 : _visited(visited), _stack(stack), _cfg(cfg) { |
775 // The stack should contain exactly the root | 786 // The stack should contain exactly the root |
776 stack.clear(); | 787 stack.clear(); |
777 stack.push(root); | 788 stack.push(root); |
778 | 789 |
779 // Clear the visited bits | 790 // Clear the visited bits |
799 while( 1 ) { | 810 while( 1 ) { |
800 | 811 |
801 _visited.set(self->_idx); | 812 _visited.set(self->_idx); |
802 | 813 |
803 // Now schedule all uses as late as possible. | 814 // Now schedule all uses as late as possible. |
804 uint src = self->is_Proj() ? self->in(0)->_idx : self->_idx; | 815 const Node* src = self->is_Proj() ? self->in(0) : self; |
805 uint src_rpo = _bbs[src]->_rpo; | 816 uint src_rpo = _cfg.get_block_for_node(src)->_rpo; |
806 | 817 |
807 // Schedule all nodes in a post-order visit | 818 // Schedule all nodes in a post-order visit |
808 Node *unvisited = NULL; // Unvisited anti-dependent Node, if any | 819 Node *unvisited = NULL; // Unvisited anti-dependent Node, if any |
809 | 820 |
810 // Scan for unvisited nodes | 821 // Scan for unvisited nodes |
816 if ( _visited.test(n->_idx) ) | 827 if ( _visited.test(n->_idx) ) |
817 continue; | 828 continue; |
818 | 829 |
819 // do not traverse backward control edges | 830 // do not traverse backward control edges |
820 Node *use = n->is_Proj() ? n->in(0) : n; | 831 Node *use = n->is_Proj() ? n->in(0) : n; |
821 uint use_rpo = _bbs[use->_idx]->_rpo; | 832 uint use_rpo = _cfg.get_block_for_node(use)->_rpo; |
822 | 833 |
823 if ( use_rpo < src_rpo ) | 834 if ( use_rpo < src_rpo ) |
824 continue; | 835 continue; |
825 | 836 |
826 // Phi nodes always precede uses in a basic block | 837 // Phi nodes always precede uses in a basic block |
848 return self; | 859 return self; |
849 } | 860 } |
850 | 861 |
851 //------------------------------ComputeLatenciesBackwards---------------------- | 862 //------------------------------ComputeLatenciesBackwards---------------------- |
852 // Compute the latency of all the instructions. | 863 // Compute the latency of all the instructions. |
853 void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) { | 864 void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) { |
854 #ifndef PRODUCT | 865 #ifndef PRODUCT |
855 if (trace_opto_pipelining()) | 866 if (trace_opto_pipelining()) |
856 tty->print("\n#---- ComputeLatenciesBackwards ----\n"); | 867 tty->print("\n#---- ComputeLatenciesBackwards ----\n"); |
857 #endif | 868 #endif |
858 | 869 |
859 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); | 870 Node_Backward_Iterator iter((Node *)_root, visited, stack, *this); |
860 Node *n; | 871 Node *n; |
861 | 872 |
862 // Walk over all the nodes from last to first | 873 // Walk over all the nodes from last to first |
863 while (n = iter.next()) { | 874 while (n = iter.next()) { |
864 // Set the latency for the definitions of this instruction | 875 // Set the latency for the definitions of this instruction |
871 // a number that increases as we approach the beginning of the routine. | 882 // a number that increases as we approach the beginning of the routine. |
872 void PhaseCFG::partial_latency_of_defs(Node *n) { | 883 void PhaseCFG::partial_latency_of_defs(Node *n) { |
873 // Set the latency for this instruction | 884 // Set the latency for this instruction |
874 #ifndef PRODUCT | 885 #ifndef PRODUCT |
875 if (trace_opto_pipelining()) { | 886 if (trace_opto_pipelining()) { |
876 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", | 887 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); |
877 n->_idx, _node_latency->at_grow(n->_idx)); | |
878 dump(); | 888 dump(); |
879 } | 889 } |
880 #endif | 890 #endif |
881 | 891 |
882 if (n->is_Proj()) | 892 if (n->is_Proj()) { |
883 n = n->in(0); | 893 n = n->in(0); |
884 | 894 } |
885 if (n->is_Root()) | 895 |
896 if (n->is_Root()) { | |
886 return; | 897 return; |
898 } | |
887 | 899 |
888 uint nlen = n->len(); | 900 uint nlen = n->len(); |
889 uint use_latency = _node_latency->at_grow(n->_idx); | 901 uint use_latency = get_latency_for_node(n); |
890 uint use_pre_order = _bbs[n->_idx]->_pre_order; | 902 uint use_pre_order = get_block_for_node(n)->_pre_order; |
891 | 903 |
892 for ( uint j=0; j<nlen; j++ ) { | 904 for (uint j = 0; j < nlen; j++) { |
893 Node *def = n->in(j); | 905 Node *def = n->in(j); |
894 | 906 |
895 if (!def || def == n) | 907 if (!def || def == n) { |
896 continue; | 908 continue; |
909 } | |
897 | 910 |
898 // Walk backwards thru projections | 911 // Walk backwards thru projections |
899 if (def->is_Proj()) | 912 if (def->is_Proj()) { |
900 def = def->in(0); | 913 def = def->in(0); |
914 } | |
901 | 915 |
902 #ifndef PRODUCT | 916 #ifndef PRODUCT |
903 if (trace_opto_pipelining()) { | 917 if (trace_opto_pipelining()) { |
904 tty->print("# in(%2d): ", j); | 918 tty->print("# in(%2d): ", j); |
905 def->dump(); | 919 def->dump(); |
906 } | 920 } |
907 #endif | 921 #endif |
908 | 922 |
909 // If the defining block is not known, assume it is ok | 923 // If the defining block is not known, assume it is ok |
910 Block *def_block = _bbs[def->_idx]; | 924 Block *def_block = get_block_for_node(def); |
911 uint def_pre_order = def_block ? def_block->_pre_order : 0; | 925 uint def_pre_order = def_block ? def_block->_pre_order : 0; |
912 | 926 |
913 if ( (use_pre_order < def_pre_order) || | 927 if ((use_pre_order < def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) { |
914 (use_pre_order == def_pre_order && n->is_Phi()) ) | |
915 continue; | 928 continue; |
929 } | |
916 | 930 |
917 uint delta_latency = n->latency(j); | 931 uint delta_latency = n->latency(j); |
918 uint current_latency = delta_latency + use_latency; | 932 uint current_latency = delta_latency + use_latency; |
919 | 933 |
920 if (_node_latency->at_grow(def->_idx) < current_latency) { | 934 if (get_latency_for_node(def) < current_latency) { |
921 _node_latency->at_put_grow(def->_idx, current_latency); | 935 set_latency_for_node(def, current_latency); |
922 } | 936 } |
923 | 937 |
924 #ifndef PRODUCT | 938 #ifndef PRODUCT |
925 if (trace_opto_pipelining()) { | 939 if (trace_opto_pipelining()) { |
926 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", | 940 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def)); |
927 use_latency, j, delta_latency, current_latency, def->_idx, | |
928 _node_latency->at_grow(def->_idx)); | |
929 } | 941 } |
930 #endif | 942 #endif |
931 } | 943 } |
932 } | 944 } |
933 | 945 |
934 //------------------------------latency_from_use------------------------------- | 946 //------------------------------latency_from_use------------------------------- |
935 // Compute the latency of a specific use | 947 // Compute the latency of a specific use |
936 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) { | 948 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) { |
937 // If self-reference, return no latency | 949 // If self-reference, return no latency |
938 if (use == n || use->is_Root()) | 950 if (use == n || use->is_Root()) { |
939 return 0; | 951 return 0; |
940 | 952 } |
941 uint def_pre_order = _bbs[def->_idx]->_pre_order; | 953 |
954 uint def_pre_order = get_block_for_node(def)->_pre_order; | |
942 uint latency = 0; | 955 uint latency = 0; |
943 | 956 |
944 // If the use is not a projection, then it is simple... | 957 // If the use is not a projection, then it is simple... |
945 if (!use->is_Proj()) { | 958 if (!use->is_Proj()) { |
946 #ifndef PRODUCT | 959 #ifndef PRODUCT |
948 tty->print("# out(): "); | 961 tty->print("# out(): "); |
949 use->dump(); | 962 use->dump(); |
950 } | 963 } |
951 #endif | 964 #endif |
952 | 965 |
953 uint use_pre_order = _bbs[use->_idx]->_pre_order; | 966 uint use_pre_order = get_block_for_node(use)->_pre_order; |
954 | 967 |
955 if (use_pre_order < def_pre_order) | 968 if (use_pre_order < def_pre_order) |
956 return 0; | 969 return 0; |
957 | 970 |
958 if (use_pre_order == def_pre_order && use->is_Phi()) | 971 if (use_pre_order == def_pre_order && use->is_Phi()) |
959 return 0; | 972 return 0; |
960 | 973 |
961 uint nlen = use->len(); | 974 uint nlen = use->len(); |
962 uint nl = _node_latency->at_grow(use->_idx); | 975 uint nl = get_latency_for_node(use); |
963 | 976 |
964 for ( uint j=0; j<nlen; j++ ) { | 977 for ( uint j=0; j<nlen; j++ ) { |
965 if (use->in(j) == n) { | 978 if (use->in(j) == n) { |
966 // Change this if we want local latencies | 979 // Change this if we want local latencies |
967 uint ul = use->latency(j); | 980 uint ul = use->latency(j); |
992 // routine. | 1005 // routine. |
993 void PhaseCFG::latency_from_uses(Node *n) { | 1006 void PhaseCFG::latency_from_uses(Node *n) { |
994 // Set the latency for this instruction | 1007 // Set the latency for this instruction |
995 #ifndef PRODUCT | 1008 #ifndef PRODUCT |
996 if (trace_opto_pipelining()) { | 1009 if (trace_opto_pipelining()) { |
997 tty->print("# latency_from_outputs: node_latency[%d] = %d for node", | 1010 tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); |
998 n->_idx, _node_latency->at_grow(n->_idx)); | |
999 dump(); | 1011 dump(); |
1000 } | 1012 } |
1001 #endif | 1013 #endif |
1002 uint latency=0; | 1014 uint latency=0; |
1003 const Node *def = n->is_Proj() ? n->in(0): n; | 1015 const Node *def = n->is_Proj() ? n->in(0): n; |
1006 uint l = latency_from_use(n, def, n->fast_out(i)); | 1018 uint l = latency_from_use(n, def, n->fast_out(i)); |
1007 | 1019 |
1008 if (latency < l) latency = l; | 1020 if (latency < l) latency = l; |
1009 } | 1021 } |
1010 | 1022 |
1011 _node_latency->at_put_grow(n->_idx, latency); | 1023 set_latency_for_node(n, latency); |
1012 } | 1024 } |
1013 | 1025 |
1014 //------------------------------hoist_to_cheaper_block------------------------- | 1026 //------------------------------hoist_to_cheaper_block------------------------- |
1015 // Pick a block for node self, between early and LCA, that is a cheaper | 1027 // Pick a block for node self, between early and LCA, that is a cheaper |
1016 // alternative to LCA. | 1028 // alternative to LCA. |
1017 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { | 1029 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { |
1018 const double delta = 1+PROB_UNLIKELY_MAG(4); | 1030 const double delta = 1+PROB_UNLIKELY_MAG(4); |
1019 Block* least = LCA; | 1031 Block* least = LCA; |
1020 double least_freq = least->_freq; | 1032 double least_freq = least->_freq; |
1021 uint target = _node_latency->at_grow(self->_idx); | 1033 uint target = get_latency_for_node(self); |
1022 uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx); | 1034 uint start_latency = get_latency_for_node(LCA->_nodes[0]); |
1023 uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx); | 1035 uint end_latency = get_latency_for_node(LCA->_nodes[LCA->end_idx()]); |
1024 bool in_latency = (target <= start_latency); | 1036 bool in_latency = (target <= start_latency); |
1025 const Block* root_block = _bbs[_root->_idx]; | 1037 const Block* root_block = get_block_for_node(_root); |
1026 | 1038 |
1027 // Turn off latency scheduling if scheduling is just plain off | 1039 // Turn off latency scheduling if scheduling is just plain off |
1028 if (!C->do_scheduling()) | 1040 if (!C->do_scheduling()) |
1029 in_latency = true; | 1041 in_latency = true; |
1030 | 1042 |
1035 if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty()) | 1047 if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty()) |
1036 in_latency = true; | 1048 in_latency = true; |
1037 | 1049 |
1038 #ifndef PRODUCT | 1050 #ifndef PRODUCT |
1039 if (trace_opto_pipelining()) { | 1051 if (trace_opto_pipelining()) { |
1040 tty->print("# Find cheaper block for latency %d: ", | 1052 tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self)); |
1041 _node_latency->at_grow(self->_idx)); | |
1042 self->dump(); | 1053 self->dump(); |
1043 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | 1054 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", |
1044 LCA->_pre_order, | 1055 LCA->_pre_order, |
1045 LCA->_nodes[0]->_idx, | 1056 LCA->_nodes[0]->_idx, |
1046 start_latency, | 1057 start_latency, |
1065 | 1076 |
1066 // Don't hoist machine instructions to the root basic block | 1077 // Don't hoist machine instructions to the root basic block |
1067 if (mach && LCA == root_block) | 1078 if (mach && LCA == root_block) |
1068 break; | 1079 break; |
1069 | 1080 |
1070 uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx); | 1081 uint start_lat = get_latency_for_node(LCA->_nodes[0]); |
1071 uint end_idx = LCA->end_idx(); | 1082 uint end_idx = LCA->end_idx(); |
1072 uint end_lat = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx); | 1083 uint end_lat = get_latency_for_node(LCA->_nodes[end_idx]); |
1073 double LCA_freq = LCA->_freq; | 1084 double LCA_freq = LCA->_freq; |
1074 #ifndef PRODUCT | 1085 #ifndef PRODUCT |
1075 if (trace_opto_pipelining()) { | 1086 if (trace_opto_pipelining()) { |
1076 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | 1087 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", |
1077 LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq); | 1088 LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq); |
1109 #ifndef PRODUCT | 1120 #ifndef PRODUCT |
1110 if (trace_opto_pipelining()) { | 1121 if (trace_opto_pipelining()) { |
1111 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); | 1122 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); |
1112 } | 1123 } |
1113 #endif | 1124 #endif |
1114 _node_latency->at_put_grow(self->_idx, end_latency); | 1125 set_latency_for_node(self, end_latency); |
1115 partial_latency_of_defs(self); | 1126 partial_latency_of_defs(self); |
1116 } | 1127 } |
1117 | 1128 |
1118 return least; | 1129 return least; |
1119 } | 1130 } |
1128 #ifndef PRODUCT | 1139 #ifndef PRODUCT |
1129 if (trace_opto_pipelining()) | 1140 if (trace_opto_pipelining()) |
1130 tty->print("\n#---- schedule_late ----\n"); | 1141 tty->print("\n#---- schedule_late ----\n"); |
1131 #endif | 1142 #endif |
1132 | 1143 |
1133 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); | 1144 Node_Backward_Iterator iter((Node *)_root, visited, stack, *this); |
1134 Node *self; | 1145 Node *self; |
1135 | 1146 |
1136 // Walk over all the nodes from last to first | 1147 // Walk over all the nodes from last to first |
1137 while (self = iter.next()) { | 1148 while (self = iter.next()) { |
1138 Block* early = _bbs[self->_idx]; // Earliest legal placement | 1149 Block* early = get_block_for_node(self); // Earliest legal placement |
1139 | 1150 |
1140 if (self->is_top()) { | 1151 if (self->is_top()) { |
1141 // Top node goes in bb #2 with other constants. | 1152 // Top node goes in bb #2 with other constants. |
1142 // It must be special-cased, because it has no out edges. | 1153 // It must be special-cased, because it has no out edges. |
1143 early->add_inst(self); | 1154 early->add_inst(self); |
1181 Block *LCA = NULL; | 1192 Block *LCA = NULL; |
1182 { | 1193 { |
1183 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { | 1194 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { |
1184 // For all uses, find LCA | 1195 // For all uses, find LCA |
1185 Node* use = self->fast_out(i); | 1196 Node* use = self->fast_out(i); |
1186 LCA = raise_LCA_above_use(LCA, use, self, _bbs); | 1197 LCA = raise_LCA_above_use(LCA, use, self, this); |
1187 } | 1198 } |
1188 } // (Hide defs of imax, i from rest of block.) | 1199 } // (Hide defs of imax, i from rest of block.) |
1189 | 1200 |
1190 // Place temps in the block of their use. This isn't a | 1201 // Place temps in the block of their use. This isn't a |
1191 // requirement for correctness but it reduces useless | 1202 // requirement for correctness but it reduces useless |
1192 // interference between temps and other nodes. | 1203 // interference between temps and other nodes. |
1193 if (mach != NULL && mach->is_MachTemp()) { | 1204 if (mach != NULL && mach->is_MachTemp()) { |
1194 _bbs.map(self->_idx, LCA); | 1205 map_node_to_block(self, LCA); |
1195 LCA->add_inst(self); | 1206 LCA->add_inst(self); |
1196 continue; | 1207 continue; |
1197 } | 1208 } |
1198 | 1209 |
1199 // Check if 'self' could be anti-dependent on memory | 1210 // Check if 'self' could be anti-dependent on memory |
1255 } // Loop until all nodes have been visited | 1266 } // Loop until all nodes have been visited |
1256 | 1267 |
1257 } // end ScheduleLate | 1268 } // end ScheduleLate |
1258 | 1269 |
1259 //------------------------------GlobalCodeMotion------------------------------- | 1270 //------------------------------GlobalCodeMotion------------------------------- |
1260 void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) { | 1271 void PhaseCFG::global_code_motion() { |
1261 ResourceMark rm; | 1272 ResourceMark rm; |
1262 | 1273 |
1263 #ifndef PRODUCT | 1274 #ifndef PRODUCT |
1264 if (trace_opto_pipelining()) { | 1275 if (trace_opto_pipelining()) { |
1265 tty->print("\n---- Start GlobalCodeMotion ----\n"); | 1276 tty->print("\n---- Start GlobalCodeMotion ----\n"); |
1266 } | 1277 } |
1267 #endif | 1278 #endif |
1268 | 1279 |
1269 // Initialize the bbs.map for things on the proj_list | 1280 // Initialize the node to block mapping for things on the proj_list |
1270 uint i; | 1281 for (uint i = 0; i < _matcher.number_of_projections(); i++) { |
1271 for( i=0; i < proj_list.size(); i++ ) | 1282 unmap_node_from_block(_matcher.get_projection(i)); |
1272 _bbs.map(proj_list[i]->_idx, NULL); | 1283 } |
1273 | 1284 |
1274 // Set the basic block for Nodes pinned into blocks | 1285 // Set the basic block for Nodes pinned into blocks |
1275 Arena *a = Thread::current()->resource_area(); | 1286 Arena* arena = Thread::current()->resource_area(); |
1276 VectorSet visited(a); | 1287 VectorSet visited(arena); |
1277 schedule_pinned_nodes( visited ); | 1288 schedule_pinned_nodes(visited); |
1278 | 1289 |
1279 // Find the earliest Block any instruction can be placed in. Some | 1290 // Find the earliest Block any instruction can be placed in. Some |
1280 // instructions are pinned into Blocks. Unpinned instructions can | 1291 // instructions are pinned into Blocks. Unpinned instructions can |
1281 // appear in last block in which all their inputs occur. | 1292 // appear in last block in which all their inputs occur. |
1282 visited.Clear(); | 1293 visited.Clear(); |
1283 Node_List stack(a); | 1294 Node_List stack(arena); |
1284 stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list | 1295 // Pre-grow the list |
1296 stack.map((C->unique() >> 1) + 16, NULL); | |
1285 if (!schedule_early(visited, stack)) { | 1297 if (!schedule_early(visited, stack)) { |
1286 // Bailout without retry | 1298 // Bailout without retry |
1287 C->record_method_not_compilable("early schedule failed"); | 1299 C->record_method_not_compilable("early schedule failed"); |
1288 return; | 1300 return; |
1289 } | 1301 } |
1290 | 1302 |
1291 // Build Def-Use edges. | 1303 // Build Def-Use edges. |
1292 proj_list.push(_root); // Add real root as another root | |
1293 proj_list.pop(); | |
1294 | |
1295 // Compute the latency information (via backwards walk) for all the | 1304 // Compute the latency information (via backwards walk) for all the |
1296 // instructions in the graph | 1305 // instructions in the graph |
1297 _node_latency = new GrowableArray<uint>(); // resource_area allocation | 1306 _node_latency = new GrowableArray<uint>(); // resource_area allocation |
1298 | 1307 |
1299 if( C->do_scheduling() ) | 1308 if (C->do_scheduling()) { |
1300 ComputeLatenciesBackwards(visited, stack); | 1309 compute_latencies_backwards(visited, stack); |
1310 } | |
1301 | 1311 |
1302 // Now schedule all codes as LATE as possible. This is the LCA in the | 1312 // Now schedule all codes as LATE as possible. This is the LCA in the |
1303 // dominator tree of all USES of a value. Pick the block with the least | 1313 // dominator tree of all USES of a value. Pick the block with the least |
1304 // loop nesting depth that is lowest in the dominator tree. | 1314 // loop nesting depth that is lowest in the dominator tree. |
1305 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) | 1315 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) |
1306 schedule_late(visited, stack); | 1316 schedule_late(visited, stack); |
1307 if( C->failing() ) { | 1317 if (C->failing()) { |
1308 // schedule_late fails only when graph is incorrect. | 1318 // schedule_late fails only when graph is incorrect. |
1309 assert(!VerifyGraphEdges, "verification should have failed"); | 1319 assert(!VerifyGraphEdges, "verification should have failed"); |
1310 return; | 1320 return; |
1311 } | 1321 } |
1312 | |
1313 unique = C->unique(); | |
1314 | 1322 |
1315 #ifndef PRODUCT | 1323 #ifndef PRODUCT |
1316 if (trace_opto_pipelining()) { | 1324 if (trace_opto_pipelining()) { |
1317 tty->print("\n---- Detect implicit null checks ----\n"); | 1325 tty->print("\n---- Detect implicit null checks ----\n"); |
1318 } | 1326 } |
1332 allowed_reasons |= nth_bit(reason); | 1340 allowed_reasons |= nth_bit(reason); |
1333 } | 1341 } |
1334 // By reversing the loop direction we get a very minor gain on mpegaudio. | 1342 // By reversing the loop direction we get a very minor gain on mpegaudio. |
1335 // Feel free to revert to a forward loop for clarity. | 1343 // Feel free to revert to a forward loop for clarity. |
1336 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { | 1344 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { |
1337 for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) { | 1345 for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) { |
1338 Node *proj = matcher._null_check_tests[i ]; | 1346 Node* proj = _matcher._null_check_tests[i]; |
1339 Node *val = matcher._null_check_tests[i+1]; | 1347 Node* val = _matcher._null_check_tests[i + 1]; |
1340 _bbs[proj->_idx]->implicit_null_check(this, proj, val, allowed_reasons); | 1348 Block* block = get_block_for_node(proj); |
1349 block->implicit_null_check(this, proj, val, allowed_reasons); | |
1341 // The implicit_null_check will only perform the transformation | 1350 // The implicit_null_check will only perform the transformation |
1342 // if the null branch is truly uncommon, *and* it leads to an | 1351 // if the null branch is truly uncommon, *and* it leads to an |
1343 // uncommon trap. Combined with the too_many_traps guards | 1352 // uncommon trap. Combined with the too_many_traps guards |
1344 // above, this prevents SEGV storms reported in 6366351, | 1353 // above, this prevents SEGV storms reported in 6366351, |
1345 // by recompiling offending methods without this optimization. | 1354 // by recompiling offending methods without this optimization. |
1352 } | 1361 } |
1353 #endif | 1362 #endif |
1354 | 1363 |
1355 // Schedule locally. Right now a simple topological sort. | 1364 // Schedule locally. Right now a simple topological sort. |
1356 // Later, do a real latency aware scheduler. | 1365 // Later, do a real latency aware scheduler. |
1357 uint max_idx = C->unique(); | 1366 GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1); |
1358 GrowableArray<int> ready_cnt(max_idx, max_idx, -1); | |
1359 visited.Clear(); | 1367 visited.Clear(); |
1360 for (i = 0; i < _num_blocks; i++) { | 1368 for (uint i = 0; i < number_of_blocks(); i++) { |
1361 if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) { | 1369 Block* block = get_block(i); |
1370 if (!block->schedule_local(this, _matcher, ready_cnt, visited)) { | |
1362 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { | 1371 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { |
1363 C->record_method_not_compilable("local schedule failed"); | 1372 C->record_method_not_compilable("local schedule failed"); |
1364 } | 1373 } |
1365 return; | 1374 return; |
1366 } | 1375 } |
1367 } | 1376 } |
1368 | 1377 |
1369 // If we inserted any instructions between a Call and his CatchNode, | 1378 // If we inserted any instructions between a Call and his CatchNode, |
1370 // clone the instructions on all paths below the Catch. | 1379 // clone the instructions on all paths below the Catch. |
1371 for( i=0; i < _num_blocks; i++ ) | 1380 for (uint i = 0; i < number_of_blocks(); i++) { |
1372 _blocks[i]->call_catch_cleanup(_bbs, C); | 1381 Block* block = get_block(i); |
1382 block->call_catch_cleanup(this, C); | |
1383 } | |
1373 | 1384 |
1374 #ifndef PRODUCT | 1385 #ifndef PRODUCT |
1375 if (trace_opto_pipelining()) { | 1386 if (trace_opto_pipelining()) { |
1376 tty->print("\n---- After GlobalCodeMotion ----\n"); | 1387 tty->print("\n---- After GlobalCodeMotion ----\n"); |
1377 for (uint i = 0; i < _num_blocks; i++) { | 1388 for (uint i = 0; i < number_of_blocks(); i++) { |
1378 _blocks[i]->dump(); | 1389 Block* block = get_block(i); |
1390 block->dump(); | |
1379 } | 1391 } |
1380 } | 1392 } |
1381 #endif | 1393 #endif |
1382 // Dead. | 1394 // Dead. |
1383 _node_latency = (GrowableArray<uint> *)0xdeadbeef; | 1395 _node_latency = (GrowableArray<uint> *)0xdeadbeef; |
1384 } | 1396 } |
1385 | 1397 |
1398 bool PhaseCFG::do_global_code_motion() { | |
1399 | |
1400 build_dominator_tree(); | |
1401 if (C->failing()) { | |
1402 return false; | |
1403 } | |
1404 | |
1405 NOT_PRODUCT( C->verify_graph_edges(); ) | |
1406 | |
1407 estimate_block_frequency(); | |
1408 | |
1409 global_code_motion(); | |
1410 | |
1411 if (C->failing()) { | |
1412 return false; | |
1413 } | |
1414 | |
1415 return true; | |
1416 } | |
1386 | 1417 |
1387 //------------------------------Estimate_Block_Frequency----------------------- | 1418 //------------------------------Estimate_Block_Frequency----------------------- |
1388 // Estimate block frequencies based on IfNode probabilities. | 1419 // Estimate block frequencies based on IfNode probabilities. |
1389 void PhaseCFG::Estimate_Block_Frequency() { | 1420 void PhaseCFG::estimate_block_frequency() { |
1390 | 1421 |
1391 // Force conditional branches leading to uncommon traps to be unlikely, | 1422 // Force conditional branches leading to uncommon traps to be unlikely, |
1392 // not because we get to the uncommon_trap with less relative frequency, | 1423 // not because we get to the uncommon_trap with less relative frequency, |
1393 // but because an uncommon_trap typically causes a deopt, so we only get | 1424 // but because an uncommon_trap typically causes a deopt, so we only get |
1394 // there once. | 1425 // there once. |
1395 if (C->do_freq_based_layout()) { | 1426 if (C->do_freq_based_layout()) { |
1396 Block_List worklist; | 1427 Block_List worklist; |
1397 Block* root_blk = _blocks[0]; | 1428 Block* root_blk = get_block(0); |
1398 for (uint i = 1; i < root_blk->num_preds(); i++) { | 1429 for (uint i = 1; i < root_blk->num_preds(); i++) { |
1399 Block *pb = _bbs[root_blk->pred(i)->_idx]; | 1430 Block *pb = get_block_for_node(root_blk->pred(i)); |
1400 if (pb->has_uncommon_code()) { | 1431 if (pb->has_uncommon_code()) { |
1401 worklist.push(pb); | 1432 worklist.push(pb); |
1402 } | 1433 } |
1403 } | 1434 } |
1404 while (worklist.size() > 0) { | 1435 while (worklist.size() > 0) { |
1405 Block* uct = worklist.pop(); | 1436 Block* uct = worklist.pop(); |
1406 if (uct == _broot) continue; | 1437 if (uct == get_root_block()) { |
1438 continue; | |
1439 } | |
1407 for (uint i = 1; i < uct->num_preds(); i++) { | 1440 for (uint i = 1; i < uct->num_preds(); i++) { |
1408 Block *pb = _bbs[uct->pred(i)->_idx]; | 1441 Block *pb = get_block_for_node(uct->pred(i)); |
1409 if (pb->_num_succs == 1) { | 1442 if (pb->_num_succs == 1) { |
1410 worklist.push(pb); | 1443 worklist.push(pb); |
1411 } else if (pb->num_fall_throughs() == 2) { | 1444 } else if (pb->num_fall_throughs() == 2) { |
1412 pb->update_uncommon_branch(uct); | 1445 pb->update_uncommon_branch(uct); |
1413 } | 1446 } |
1425 // Adjust all frequencies to be relative to a single method entry | 1458 // Adjust all frequencies to be relative to a single method entry |
1426 _root_loop->_freq = 1.0; | 1459 _root_loop->_freq = 1.0; |
1427 _root_loop->scale_freq(); | 1460 _root_loop->scale_freq(); |
1428 | 1461 |
1429 // Save outmost loop frequency for LRG frequency threshold | 1462 // Save outmost loop frequency for LRG frequency threshold |
1430 _outer_loop_freq = _root_loop->outer_loop_freq(); | 1463 _outer_loop_frequency = _root_loop->outer_loop_freq(); |
1431 | 1464 |
1432 // force paths ending at uncommon traps to be infrequent | 1465 // force paths ending at uncommon traps to be infrequent |
1433 if (!C->do_freq_based_layout()) { | 1466 if (!C->do_freq_based_layout()) { |
1434 Block_List worklist; | 1467 Block_List worklist; |
1435 Block* root_blk = _blocks[0]; | 1468 Block* root_blk = get_block(0); |
1436 for (uint i = 1; i < root_blk->num_preds(); i++) { | 1469 for (uint i = 1; i < root_blk->num_preds(); i++) { |
1437 Block *pb = _bbs[root_blk->pred(i)->_idx]; | 1470 Block *pb = get_block_for_node(root_blk->pred(i)); |
1438 if (pb->has_uncommon_code()) { | 1471 if (pb->has_uncommon_code()) { |
1439 worklist.push(pb); | 1472 worklist.push(pb); |
1440 } | 1473 } |
1441 } | 1474 } |
1442 while (worklist.size() > 0) { | 1475 while (worklist.size() > 0) { |
1443 Block* uct = worklist.pop(); | 1476 Block* uct = worklist.pop(); |
1444 uct->_freq = PROB_MIN; | 1477 uct->_freq = PROB_MIN; |
1445 for (uint i = 1; i < uct->num_preds(); i++) { | 1478 for (uint i = 1; i < uct->num_preds(); i++) { |
1446 Block *pb = _bbs[uct->pred(i)->_idx]; | 1479 Block *pb = get_block_for_node(uct->pred(i)); |
1447 if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { | 1480 if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { |
1448 worklist.push(pb); | 1481 worklist.push(pb); |
1449 } | 1482 } |
1450 } | 1483 } |
1451 } | 1484 } |
1452 } | 1485 } |
1453 | 1486 |
1454 #ifdef ASSERT | 1487 #ifdef ASSERT |
1455 for (uint i = 0; i < _num_blocks; i++ ) { | 1488 for (uint i = 0; i < number_of_blocks(); i++) { |
1456 Block *b = _blocks[i]; | 1489 Block* b = get_block(i); |
1457 assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); | 1490 assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); |
1458 } | 1491 } |
1459 #endif | 1492 #endif |
1460 | 1493 |
1461 #ifndef PRODUCT | 1494 #ifndef PRODUCT |
1475 //----------------------------create_loop_tree-------------------------------- | 1508 //----------------------------create_loop_tree-------------------------------- |
1476 // Create a loop tree from the CFG | 1509 // Create a loop tree from the CFG |
1477 CFGLoop* PhaseCFG::create_loop_tree() { | 1510 CFGLoop* PhaseCFG::create_loop_tree() { |
1478 | 1511 |
1479 #ifdef ASSERT | 1512 #ifdef ASSERT |
1480 assert( _blocks[0] == _broot, "" ); | 1513 assert(get_block(0) == get_root_block(), "first block should be root block"); |
1481 for (uint i = 0; i < _num_blocks; i++ ) { | 1514 for (uint i = 0; i < number_of_blocks(); i++) { |
1482 Block *b = _blocks[i]; | 1515 Block* block = get_block(i); |
1483 // Check that _loop field are clear...we could clear them if not. | 1516 // Check that _loop field are clear...we could clear them if not. |
1484 assert(b->_loop == NULL, "clear _loop expected"); | 1517 assert(block->_loop == NULL, "clear _loop expected"); |
1485 // Sanity check that the RPO numbering is reflected in the _blocks array. | 1518 // Sanity check that the RPO numbering is reflected in the _blocks array. |
1486 // It doesn't have to be for the loop tree to be built, but if it is not, | 1519 // It doesn't have to be for the loop tree to be built, but if it is not, |
1487 // then the blocks have been reordered since dom graph building...which | 1520 // then the blocks have been reordered since dom graph building...which |
1488 // may question the RPO numbering | 1521 // may question the RPO numbering |
1489 assert(b->_rpo == i, "unexpected reverse post order number"); | 1522 assert(block->_rpo == i, "unexpected reverse post order number"); |
1490 } | 1523 } |
1491 #endif | 1524 #endif |
1492 | 1525 |
1493 int idct = 0; | 1526 int idct = 0; |
1494 CFGLoop* root_loop = new CFGLoop(idct++); | 1527 CFGLoop* root_loop = new CFGLoop(idct++); |
1495 | 1528 |
1496 Block_List worklist; | 1529 Block_List worklist; |
1497 | 1530 |
1498 // Assign blocks to loops | 1531 // Assign blocks to loops |
1499 for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block | 1532 for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block |
1500 Block *b = _blocks[i]; | 1533 Block* block = get_block(i); |
1501 | 1534 |
1502 if (b->head()->is_Loop()) { | 1535 if (block->head()->is_Loop()) { |
1503 Block* loop_head = b; | 1536 Block* loop_head = block; |
1504 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); | 1537 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); |
1505 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); | 1538 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); |
1506 Block* tail = _bbs[tail_n->_idx]; | 1539 Block* tail = get_block_for_node(tail_n); |
1507 | 1540 |
1508 // Defensively filter out Loop nodes for non-single-entry loops. | 1541 // Defensively filter out Loop nodes for non-single-entry loops. |
1509 // For all reasonable loops, the head occurs before the tail in RPO. | 1542 // For all reasonable loops, the head occurs before the tail in RPO. |
1510 if (i <= tail->_rpo) { | 1543 if (i <= tail->_rpo) { |
1511 | 1544 |
1516 CFGLoop* nloop = new CFGLoop(idct++); | 1549 CFGLoop* nloop = new CFGLoop(idct++); |
1517 assert(loop_head->_loop == NULL, "just checking"); | 1550 assert(loop_head->_loop == NULL, "just checking"); |
1518 loop_head->_loop = nloop; | 1551 loop_head->_loop = nloop; |
1519 // Add to nloop so push_pred() will skip over inner loops | 1552 // Add to nloop so push_pred() will skip over inner loops |
1520 nloop->add_member(loop_head); | 1553 nloop->add_member(loop_head); |
1521 nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, _bbs); | 1554 nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, this); |
1522 | 1555 |
1523 while (worklist.size() > 0) { | 1556 while (worklist.size() > 0) { |
1524 Block* member = worklist.pop(); | 1557 Block* member = worklist.pop(); |
1525 if (member != loop_head) { | 1558 if (member != loop_head) { |
1526 for (uint j = 1; j < member->num_preds(); j++) { | 1559 for (uint j = 1; j < member->num_preds(); j++) { |
1527 nloop->push_pred(member, j, worklist, _bbs); | 1560 nloop->push_pred(member, j, worklist, this); |
1528 } | 1561 } |
1529 } | 1562 } |
1530 } | 1563 } |
1531 } | 1564 } |
1532 } | 1565 } |
1533 } | 1566 } |
1534 | 1567 |
1535 // Create a member list for each loop consisting | 1568 // Create a member list for each loop consisting |
1536 // of both blocks and (immediate child) loops. | 1569 // of both blocks and (immediate child) loops. |
1537 for (uint i = 0; i < _num_blocks; i++) { | 1570 for (uint i = 0; i < number_of_blocks(); i++) { |
1538 Block *b = _blocks[i]; | 1571 Block* block = get_block(i); |
1539 CFGLoop* lp = b->_loop; | 1572 CFGLoop* lp = block->_loop; |
1540 if (lp == NULL) { | 1573 if (lp == NULL) { |
1541 // Not assigned to a loop. Add it to the method's pseudo loop. | 1574 // Not assigned to a loop. Add it to the method's pseudo loop. |
1542 b->_loop = root_loop; | 1575 block->_loop = root_loop; |
1543 lp = root_loop; | 1576 lp = root_loop; |
1544 } | 1577 } |
1545 if (lp == root_loop || b != lp->head()) { // loop heads are already members | 1578 if (lp == root_loop || block != lp->head()) { // loop heads are already members |
1546 lp->add_member(b); | 1579 lp->add_member(block); |
1547 } | 1580 } |
1548 if (lp != root_loop) { | 1581 if (lp != root_loop) { |
1549 if (lp->parent() == NULL) { | 1582 if (lp->parent() == NULL) { |
1550 // Not a nested loop. Make it a child of the method's pseudo loop. | 1583 // Not a nested loop. Make it a child of the method's pseudo loop. |
1551 root_loop->add_nested_loop(lp); | 1584 root_loop->add_nested_loop(lp); |
1552 } | 1585 } |
1553 if (b == lp->head()) { | 1586 if (block == lp->head()) { |
1554 // Add nested loop to member list of parent loop. | 1587 // Add nested loop to member list of parent loop. |
1555 lp->parent()->add_member(lp); | 1588 lp->parent()->add_member(lp); |
1556 } | 1589 } |
1557 } | 1590 } |
1558 } | 1591 } |
1559 | 1592 |
1560 return root_loop; | 1593 return root_loop; |
1561 } | 1594 } |
1562 | 1595 |
1563 //------------------------------push_pred-------------------------------------- | 1596 //------------------------------push_pred-------------------------------------- |
1564 void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk) { | 1597 void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg) { |
1565 Node* pred_n = blk->pred(i); | 1598 Node* pred_n = blk->pred(i); |
1566 Block* pred = node_to_blk[pred_n->_idx]; | 1599 Block* pred = cfg->get_block_for_node(pred_n); |
1567 CFGLoop *pred_loop = pred->_loop; | 1600 CFGLoop *pred_loop = pred->_loop; |
1568 if (pred_loop == NULL) { | 1601 if (pred_loop == NULL) { |
1569 // Filter out blocks for non-single-entry loops. | 1602 // Filter out blocks for non-single-entry loops. |
1570 // For all reasonable loops, the head occurs before the tail in RPO. | 1603 // For all reasonable loops, the head occurs before the tail in RPO. |
1571 if (pred->_rpo > head()->_rpo) { | 1604 if (pred->_rpo > head()->_rpo) { |
1582 add_nested_loop(pred_loop); | 1615 add_nested_loop(pred_loop); |
1583 // Continue with loop entry predecessor. | 1616 // Continue with loop entry predecessor. |
1584 Block* pred_head = pred_loop->head(); | 1617 Block* pred_head = pred_loop->head(); |
1585 assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); | 1618 assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); |
1586 assert(pred_head != head(), "loop head in only one loop"); | 1619 assert(pred_head != head(), "loop head in only one loop"); |
1587 push_pred(pred_head, LoopNode::EntryControl, worklist, node_to_blk); | 1620 push_pred(pred_head, LoopNode::EntryControl, worklist, cfg); |
1588 } else { | 1621 } else { |
1589 assert(pred_loop->_parent == this && _parent == NULL, "just checking"); | 1622 assert(pred_loop->_parent == this && _parent == NULL, "just checking"); |
1590 } | 1623 } |
1591 } | 1624 } |
1592 } | 1625 } |