Mercurial > hg > graal-compiler
annotate src/share/vm/opto/lcm.cpp @ 1941:79d04223b8a5
Added caching for resolved types and resolved fields.
This is crucial, because the local load elimination will lead to wrong results, if field equality (of two RiField objects with the same object and the same RiType) is not given. The caching makes sure that the default equals implementation is sufficient.
author | Thomas Wuerthinger <wuerthinger@ssw.jku.at> |
---|---|
date | Tue, 28 Dec 2010 18:33:26 +0100 |
parents | 3e8fbc61cee8 |
children | f95d63e2154a |
rev | line source |
---|---|
0 | 1 /* |
1748 | 2 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1151
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1151
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1151
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
25 // Optimization - Graph Style | |
26 | |
27 #include "incls/_precompiled.incl" | |
28 #include "incls/_lcm.cpp.incl" | |
29 | |
30 //------------------------------implicit_null_check---------------------------- | |
31 // Detect implicit-null-check opportunities. Basically, find NULL checks | |
32 // with suitable memory ops nearby. Use the memory op to do the NULL check. | |
33 // I can generate a memory op if there is not one nearby. | |
34 // The proj is the control projection for the not-null case. | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
35 // The val is the pointer being checked for nullness or |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
36 // decodeHeapOop_not_null node if it did not fold into address. |
0 | 37 void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons) { |
38 // Assume if null check need for 0 offset then always needed | |
39 // Intel solaris doesn't support any null checks yet and no | |
40 // mechanism exists (yet) to set the switches at an os_cpu level | |
41 if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return; | |
42 | |
43 // Make sure the ptr-is-null path appears to be uncommon! | |
44 float f = end()->as_MachIf()->_prob; | |
45 if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f; | |
46 if( f > PROB_UNLIKELY_MAG(4) ) return; | |
47 | |
48 uint bidx = 0; // Capture index of value into memop | |
49 bool was_store; // Memory op is a store op | |
50 | |
51 // Get the successor block for if the test ptr is non-null | |
52 Block* not_null_block; // this one goes with the proj | |
53 Block* null_block; | |
54 if (_nodes[_nodes.size()-1] == proj) { | |
55 null_block = _succs[0]; | |
56 not_null_block = _succs[1]; | |
57 } else { | |
58 assert(_nodes[_nodes.size()-2] == proj, "proj is one or the other"); | |
59 not_null_block = _succs[0]; | |
60 null_block = _succs[1]; | |
61 } | |
332 | 62 while (null_block->is_Empty() == Block::empty_with_goto) { |
63 null_block = null_block->_succs[0]; | |
64 } | |
0 | 65 |
66 // Search the exception block for an uncommon trap. | |
67 // (See Parse::do_if and Parse::do_ifnull for the reason | |
68 // we need an uncommon trap. Briefly, we need a way to | |
69 // detect failure of this optimization, as in 6366351.) | |
70 { | |
71 bool found_trap = false; | |
72 for (uint i1 = 0; i1 < null_block->_nodes.size(); i1++) { | |
73 Node* nn = null_block->_nodes[i1]; | |
74 if (nn->is_MachCall() && | |
1748 | 75 nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) { |
0 | 76 const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type(); |
77 if (trtype->isa_int() && trtype->is_int()->is_con()) { | |
78 jint tr_con = trtype->is_int()->get_con(); | |
79 Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con); | |
80 Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con); | |
81 assert((int)reason < (int)BitsPerInt, "recode bit map"); | |
82 if (is_set_nth_bit(allowed_reasons, (int) reason) | |
83 && action != Deoptimization::Action_none) { | |
84 // This uncommon trap is sure to recompile, eventually. | |
85 // When that happens, C->too_many_traps will prevent | |
86 // this transformation from happening again. | |
87 found_trap = true; | |
88 } | |
89 } | |
90 break; | |
91 } | |
92 } | |
93 if (!found_trap) { | |
94 // We did not find an uncommon trap. | |
95 return; | |
96 } | |
97 } | |
98 | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
99 // Check for decodeHeapOop_not_null node which did not fold into address |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
100 bool is_decoden = ((intptr_t)val) & 1; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
101 val = (Node*)(((intptr_t)val) & ~1); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
102 |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
103 assert(!is_decoden || (val->in(0) == NULL) && val->is_Mach() && |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
104 (val->as_Mach()->ideal_Opcode() == Op_DecodeN), "sanity"); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
105 |
0 | 106 // Search the successor block for a load or store who's base value is also |
107 // the tested value. There may be several. | |
108 Node_List *out = new Node_List(Thread::current()->resource_area()); | |
109 MachNode *best = NULL; // Best found so far | |
110 for (DUIterator i = val->outs(); val->has_out(i); i++) { | |
111 Node *m = val->out(i); | |
112 if( !m->is_Mach() ) continue; | |
113 MachNode *mach = m->as_Mach(); | |
114 was_store = false; | |
1693
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
115 int iop = mach->ideal_Opcode(); |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
116 switch( iop ) { |
0 | 117 case Op_LoadB: |
558
3b5ac9e7e6ea
6796746: rename LoadC (char) opcode class to LoadUS (unsigned short)
twisti
parents:
365
diff
changeset
|
118 case Op_LoadUS: |
0 | 119 case Op_LoadD: |
120 case Op_LoadF: | |
121 case Op_LoadI: | |
122 case Op_LoadL: | |
123 case Op_LoadP: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
124 case Op_LoadN: |
0 | 125 case Op_LoadS: |
126 case Op_LoadKlass: | |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
125
diff
changeset
|
127 case Op_LoadNKlass: |
0 | 128 case Op_LoadRange: |
129 case Op_LoadD_unaligned: | |
130 case Op_LoadL_unaligned: | |
1151
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
131 assert(mach->in(2) == val, "should be address"); |
0 | 132 break; |
133 case Op_StoreB: | |
134 case Op_StoreC: | |
135 case Op_StoreCM: | |
136 case Op_StoreD: | |
137 case Op_StoreF: | |
138 case Op_StoreI: | |
139 case Op_StoreL: | |
140 case Op_StoreP: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
141 case Op_StoreN: |
0 | 142 was_store = true; // Memory op is a store op |
143 // Stores will have their address in slot 2 (memory in slot 1). | |
144 // If the value being nul-checked is in another slot, it means we | |
145 // are storing the checked value, which does NOT check the value! | |
146 if( mach->in(2) != val ) continue; | |
147 break; // Found a memory op? | |
148 case Op_StrComp: | |
681 | 149 case Op_StrEquals: |
150 case Op_StrIndexOf: | |
169
9148c65abefc
6695049: (coll) Create an x86 intrinsic for Arrays.equals
rasbold
parents:
164
diff
changeset
|
151 case Op_AryEq: |
0 | 152 // Not a legit memory op for implicit null check regardless of |
153 // embedded loads | |
154 continue; | |
155 default: // Also check for embedded loads | |
156 if( !mach->needs_anti_dependence_check() ) | |
157 continue; // Not an memory op; skip it | |
1693
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
158 if( must_clone[iop] ) { |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
159 // Do not move nodes which produce flags because |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
160 // RA will try to clone it to place near branch and |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
161 // it will cause recompilation, see clone_node(). |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
162 continue; |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
163 } |
1151
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
164 { |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
165 // Check that value is used in memory address in |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
166 // instructions with embedded load (CmpP val1,(val2+off)). |
1151
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
167 Node* base; |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
168 Node* index; |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
169 const MachOper* oper = mach->memory_inputs(base, index); |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
170 if (oper == NULL || oper == (MachOper*)-1) { |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
171 continue; // Not an memory op; skip it |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
172 } |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
173 if (val == base || |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
174 val == index && val->bottom_type()->isa_narrowoop()) { |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
175 break; // Found it |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
176 } else { |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
177 continue; // Skip it |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
178 } |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
179 } |
0 | 180 break; |
181 } | |
182 // check if the offset is not too high for implicit exception | |
183 { | |
184 intptr_t offset = 0; | |
185 const TypePtr *adr_type = NULL; // Do not need this return value here | |
186 const Node* base = mach->get_base_and_disp(offset, adr_type); | |
187 if (base == NULL || base == NodeSentinel) { | |
332 | 188 // Narrow oop address doesn't have base, only index |
189 if( val->bottom_type()->isa_narrowoop() && | |
190 MacroAssembler::needs_explicit_null_check(offset) ) | |
191 continue; // Give up if offset is beyond page size | |
0 | 192 // cannot reason about it; is probably not implicit null exception |
193 } else { | |
642
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
194 const TypePtr* tptr; |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
195 if (UseCompressedOops && Universe::narrow_oop_shift() == 0) { |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
196 // 32-bits narrow oop can be the base of address expressions |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
197 tptr = base->bottom_type()->make_ptr(); |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
198 } else { |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
199 // only regular oops are expected here |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
200 tptr = base->bottom_type()->is_ptr(); |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
201 } |
0 | 202 // Give up if offset is not a compile-time constant |
203 if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot ) | |
204 continue; | |
205 offset += tptr->_offset; // correct if base is offseted | |
206 if( MacroAssembler::needs_explicit_null_check(offset) ) | |
207 continue; // Give up is reference is beyond 4K page size | |
208 } | |
209 } | |
210 | |
211 // Check ctrl input to see if the null-check dominates the memory op | |
212 Block *cb = cfg->_bbs[mach->_idx]; | |
213 cb = cb->_idom; // Always hoist at least 1 block | |
214 if( !was_store ) { // Stores can be hoisted only one block | |
215 while( cb->_dom_depth > (_dom_depth + 1)) | |
216 cb = cb->_idom; // Hoist loads as far as we want | |
217 // The non-null-block should dominate the memory op, too. Live | |
218 // range spilling will insert a spill in the non-null-block if it is | |
219 // needs to spill the memory op for an implicit null check. | |
220 if (cb->_dom_depth == (_dom_depth + 1)) { | |
221 if (cb != not_null_block) continue; | |
222 cb = cb->_idom; | |
223 } | |
224 } | |
225 if( cb != this ) continue; | |
226 | |
227 // Found a memory user; see if it can be hoisted to check-block | |
228 uint vidx = 0; // Capture index of value into memop | |
229 uint j; | |
230 for( j = mach->req()-1; j > 0; j-- ) { | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
231 if( mach->in(j) == val ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
232 vidx = j; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
233 // Ignore DecodeN val which could be hoisted to where needed. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
234 if( is_decoden ) continue; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
235 } |
0 | 236 // Block of memory-op input |
237 Block *inb = cfg->_bbs[mach->in(j)->_idx]; | |
238 Block *b = this; // Start from nul check | |
239 while( b != inb && b->_dom_depth > inb->_dom_depth ) | |
240 b = b->_idom; // search upwards for input | |
241 // See if input dominates null check | |
242 if( b != inb ) | |
243 break; | |
244 } | |
245 if( j > 0 ) | |
246 continue; | |
247 Block *mb = cfg->_bbs[mach->_idx]; | |
248 // Hoisting stores requires more checks for the anti-dependence case. | |
249 // Give up hoisting if we have to move the store past any load. | |
250 if( was_store ) { | |
251 Block *b = mb; // Start searching here for a local load | |
252 // mach use (faulting) trying to hoist | |
253 // n might be blocker to hoisting | |
254 while( b != this ) { | |
255 uint k; | |
256 for( k = 1; k < b->_nodes.size(); k++ ) { | |
257 Node *n = b->_nodes[k]; | |
258 if( n->needs_anti_dependence_check() && | |
259 n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) ) | |
260 break; // Found anti-dependent load | |
261 } | |
262 if( k < b->_nodes.size() ) | |
263 break; // Found anti-dependent load | |
264 // Make sure control does not do a merge (would have to check allpaths) | |
265 if( b->num_preds() != 2 ) break; | |
266 b = cfg->_bbs[b->pred(1)->_idx]; // Move up to predecessor block | |
267 } | |
268 if( b != this ) continue; | |
269 } | |
270 | |
271 // Make sure this memory op is not already being used for a NullCheck | |
272 Node *e = mb->end(); | |
273 if( e->is_MachNullCheck() && e->in(1) == mach ) | |
274 continue; // Already being used as a NULL check | |
275 | |
276 // Found a candidate! Pick one with least dom depth - the highest | |
277 // in the dom tree should be closest to the null check. | |
278 if( !best || | |
279 cfg->_bbs[mach->_idx]->_dom_depth < cfg->_bbs[best->_idx]->_dom_depth ) { | |
280 best = mach; | |
281 bidx = vidx; | |
282 | |
283 } | |
284 } | |
285 // No candidate! | |
286 if( !best ) return; | |
287 | |
288 // ---- Found an implicit null check | |
289 extern int implicit_null_checks; | |
290 implicit_null_checks++; | |
291 | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
292 if( is_decoden ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
293 // Check if we need to hoist decodeHeapOop_not_null first. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
294 Block *valb = cfg->_bbs[val->_idx]; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
295 if( this != valb && this->_dom_depth < valb->_dom_depth ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
296 // Hoist it up to the end of the test block. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
297 valb->find_remove(val); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
298 this->add_inst(val); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
299 cfg->_bbs.map(val->_idx,this); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
300 // DecodeN on x86 may kill flags. Check for flag-killing projections |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
301 // that also need to be hoisted. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
302 for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
303 Node* n = val->fast_out(j); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
304 if( n->Opcode() == Op_MachProj ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
305 cfg->_bbs[n->_idx]->find_remove(n); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
306 this->add_inst(n); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
307 cfg->_bbs.map(n->_idx,this); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
308 } |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
309 } |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
310 } |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
311 } |
0 | 312 // Hoist the memory candidate up to the end of the test block. |
313 Block *old_block = cfg->_bbs[best->_idx]; | |
314 old_block->find_remove(best); | |
315 add_inst(best); | |
316 cfg->_bbs.map(best->_idx,this); | |
317 | |
318 // Move the control dependence | |
319 if (best->in(0) && best->in(0) == old_block->_nodes[0]) | |
320 best->set_req(0, _nodes[0]); | |
321 | |
322 // Check for flag-killing projections that also need to be hoisted | |
323 // Should be DU safe because no edge updates. | |
324 for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) { | |
325 Node* n = best->fast_out(j); | |
326 if( n->Opcode() == Op_MachProj ) { | |
327 cfg->_bbs[n->_idx]->find_remove(n); | |
328 add_inst(n); | |
329 cfg->_bbs.map(n->_idx,this); | |
330 } | |
331 } | |
332 | |
333 Compile *C = cfg->C; | |
334 // proj==Op_True --> ne test; proj==Op_False --> eq test. | |
335 // One of two graph shapes got matched: | |
336 // (IfTrue (If (Bool NE (CmpP ptr NULL)))) | |
337 // (IfFalse (If (Bool EQ (CmpP ptr NULL)))) | |
338 // NULL checks are always branch-if-eq. If we see a IfTrue projection | |
339 // then we are replacing a 'ne' test with a 'eq' NULL check test. | |
340 // We need to flip the projections to keep the same semantics. | |
341 if( proj->Opcode() == Op_IfTrue ) { | |
342 // Swap order of projections in basic block to swap branch targets | |
343 Node *tmp1 = _nodes[end_idx()+1]; | |
344 Node *tmp2 = _nodes[end_idx()+2]; | |
345 _nodes.map(end_idx()+1, tmp2); | |
346 _nodes.map(end_idx()+2, tmp1); | |
347 Node *tmp = new (C, 1) Node(C->top()); // Use not NULL input | |
348 tmp1->replace_by(tmp); | |
349 tmp2->replace_by(tmp1); | |
350 tmp->replace_by(tmp2); | |
351 tmp->destruct(); | |
352 } | |
353 | |
354 // Remove the existing null check; use a new implicit null check instead. | |
355 // Since schedule-local needs precise def-use info, we need to correct | |
356 // it as well. | |
357 Node *old_tst = proj->in(0); | |
358 MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx); | |
359 _nodes.map(end_idx(),nul_chk); | |
360 cfg->_bbs.map(nul_chk->_idx,this); | |
361 // Redirect users of old_test to nul_chk | |
362 for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2) | |
363 old_tst->last_out(i2)->set_req(0, nul_chk); | |
364 // Clean-up any dead code | |
365 for (uint i3 = 0; i3 < old_tst->req(); i3++) | |
366 old_tst->set_req(i3, NULL); | |
367 | |
368 cfg->latency_from_uses(nul_chk); | |
369 cfg->latency_from_uses(best); | |
370 } | |
371 | |
372 | |
373 //------------------------------select----------------------------------------- | |
374 // Select a nice fellow from the worklist to schedule next. If there is only | |
375 // one choice, then use it. Projections take top priority for correctness | |
376 // reasons - if I see a projection, then it is next. There are a number of | |
377 // other special cases, for instructions that consume condition codes, et al. | |
378 // These are chosen immediately. Some instructions are required to immediately | |
379 // precede the last instruction in the block, and these are taken last. Of the | |
380 // remaining cases (most), choose the instruction with the greatest latency | |
381 // (that is, the most number of pseudo-cycles required to the end of the | |
382 // routine). If there is a tie, choose the instruction with the most inputs. | |
383 Node *Block::select(PhaseCFG *cfg, Node_List &worklist, int *ready_cnt, VectorSet &next_call, uint sched_slot) { | |
384 | |
385 // If only a single entry on the stack, use it | |
386 uint cnt = worklist.size(); | |
387 if (cnt == 1) { | |
388 Node *n = worklist[0]; | |
389 worklist.map(0,worklist.pop()); | |
390 return n; | |
391 } | |
392 | |
393 uint choice = 0; // Bigger is most important | |
394 uint latency = 0; // Bigger is scheduled first | |
395 uint score = 0; // Bigger is better | |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
396 int idx = -1; // Index in worklist |
0 | 397 |
398 for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist | |
399 // Order in worklist is used to break ties. | |
400 // See caller for how this is used to delay scheduling | |
401 // of induction variable increments to after the other | |
402 // uses of the phi are scheduled. | |
403 Node *n = worklist[i]; // Get Node on worklist | |
404 | |
405 int iop = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : 0; | |
406 if( n->is_Proj() || // Projections always win | |
407 n->Opcode()== Op_Con || // So does constant 'Top' | |
408 iop == Op_CreateEx || // Create-exception must start block | |
409 iop == Op_CheckCastPP | |
410 ) { | |
411 worklist.map(i,worklist.pop()); | |
412 return n; | |
413 } | |
414 | |
415 // Final call in a block must be adjacent to 'catch' | |
416 Node *e = end(); | |
417 if( e->is_Catch() && e->in(0)->in(0) == n ) | |
418 continue; | |
419 | |
420 // Memory op for an implicit null check has to be at the end of the block | |
421 if( e->is_MachNullCheck() && e->in(1) == n ) | |
422 continue; | |
423 | |
424 uint n_choice = 2; | |
425 | |
426 // See if this instruction is consumed by a branch. If so, then (as the | |
427 // branch is the last instruction in the basic block) force it to the | |
428 // end of the basic block | |
429 if ( must_clone[iop] ) { | |
430 // See if any use is a branch | |
431 bool found_machif = false; | |
432 | |
433 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
434 Node* use = n->fast_out(j); | |
435 | |
436 // The use is a conditional branch, make them adjacent | |
437 if (use->is_MachIf() && cfg->_bbs[use->_idx]==this ) { | |
438 found_machif = true; | |
439 break; | |
440 } | |
441 | |
442 // More than this instruction pending for successor to be ready, | |
443 // don't choose this if other opportunities are ready | |
444 if (ready_cnt[use->_idx] > 1) | |
445 n_choice = 1; | |
446 } | |
447 | |
448 // loop terminated, prefer not to use this instruction | |
449 if (found_machif) | |
450 continue; | |
451 } | |
452 | |
453 // See if this has a predecessor that is "must_clone", i.e. sets the | |
454 // condition code. If so, choose this first | |
455 for (uint j = 0; j < n->req() ; j++) { | |
456 Node *inn = n->in(j); | |
457 if (inn) { | |
458 if (inn->is_Mach() && must_clone[inn->as_Mach()->ideal_Opcode()] ) { | |
459 n_choice = 3; | |
460 break; | |
461 } | |
462 } | |
463 } | |
464 | |
465 // MachTemps should be scheduled last so they are near their uses | |
466 if (n->is_MachTemp()) { | |
467 n_choice = 1; | |
468 } | |
469 | |
1685 | 470 uint n_latency = cfg->_node_latency->at_grow(n->_idx); |
0 | 471 uint n_score = n->req(); // Many inputs get high score to break ties |
472 | |
473 // Keep best latency found | |
474 if( choice < n_choice || | |
475 ( choice == n_choice && | |
476 ( latency < n_latency || | |
477 ( latency == n_latency && | |
478 ( score < n_score ))))) { | |
479 choice = n_choice; | |
480 latency = n_latency; | |
481 score = n_score; | |
482 idx = i; // Also keep index in worklist | |
483 } | |
484 } // End of for all ready nodes in worklist | |
485 | |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
486 assert(idx >= 0, "index should be set"); |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
487 Node *n = worklist[(uint)idx]; // Get the winner |
0 | 488 |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
489 worklist.map((uint)idx, worklist.pop()); // Compress worklist |
0 | 490 return n; |
491 } | |
492 | |
493 | |
494 //------------------------------set_next_call---------------------------------- | |
495 void Block::set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs ) { | |
496 if( next_call.test_set(n->_idx) ) return; | |
497 for( uint i=0; i<n->len(); i++ ) { | |
498 Node *m = n->in(i); | |
499 if( !m ) continue; // must see all nodes in block that precede call | |
500 if( bbs[m->_idx] == this ) | |
501 set_next_call( m, next_call, bbs ); | |
502 } | |
503 } | |
504 | |
505 //------------------------------needed_for_next_call--------------------------- | |
506 // Set the flag 'next_call' for each Node that is needed for the next call to | |
507 // be scheduled. This flag lets me bias scheduling so Nodes needed for the | |
508 // next subroutine call get priority - basically it moves things NOT needed | |
509 // for the next call till after the call. This prevents me from trying to | |
510 // carry lots of stuff live across a call. | |
511 void Block::needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs) { | |
512 // Find the next control-defining Node in this block | |
513 Node* call = NULL; | |
514 for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) { | |
515 Node* m = this_call->fast_out(i); | |
516 if( bbs[m->_idx] == this && // Local-block user | |
517 m != this_call && // Not self-start node | |
518 m->is_Call() ) | |
519 call = m; | |
520 break; | |
521 } | |
522 if (call == NULL) return; // No next call (e.g., block end is near) | |
523 // Set next-call for all inputs to this call | |
524 set_next_call(call, next_call, bbs); | |
525 } | |
526 | |
527 //------------------------------sched_call------------------------------------- | |
528 uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { | |
529 RegMask regs; | |
530 | |
531 // Schedule all the users of the call right now. All the users are | |
532 // projection Nodes, so they must be scheduled next to the call. | |
533 // Collect all the defined registers. | |
534 for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { | |
535 Node* n = mcall->fast_out(i); | |
536 assert( n->Opcode()==Op_MachProj, "" ); | |
537 --ready_cnt[n->_idx]; | |
538 assert( !ready_cnt[n->_idx], "" ); | |
539 // Schedule next to call | |
540 _nodes.map(node_cnt++, n); | |
541 // Collect defined registers | |
542 regs.OR(n->out_RegMask()); | |
543 // Check for scheduling the next control-definer | |
544 if( n->bottom_type() == Type::CONTROL ) | |
545 // Warm up next pile of heuristic bits | |
546 needed_for_next_call(n, next_call, bbs); | |
547 | |
548 // Children of projections are now all ready | |
549 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
550 Node* m = n->fast_out(j); // Get user | |
551 if( bbs[m->_idx] != this ) continue; | |
552 if( m->is_Phi() ) continue; | |
553 if( !--ready_cnt[m->_idx] ) | |
554 worklist.push(m); | |
555 } | |
556 | |
557 } | |
558 | |
559 // Act as if the call defines the Frame Pointer. | |
560 // Certainly the FP is alive and well after the call. | |
561 regs.Insert(matcher.c_frame_pointer()); | |
562 | |
563 // Set all registers killed and not already defined by the call. | |
564 uint r_cnt = mcall->tf()->range()->cnt(); | |
565 int op = mcall->ideal_Opcode(); | |
566 MachProjNode *proj = new (matcher.C, 1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); | |
567 bbs.map(proj->_idx,this); | |
568 _nodes.insert(node_cnt++, proj); | |
569 | |
570 // Select the right register save policy. | |
571 const char * save_policy; | |
572 switch (op) { | |
573 case Op_CallRuntime: | |
574 case Op_CallLeaf: | |
575 case Op_CallLeafNoFP: | |
576 // Calling C code so use C calling convention | |
577 save_policy = matcher._c_reg_save_policy; | |
578 break; | |
579 | |
580 case Op_CallStaticJava: | |
581 case Op_CallDynamicJava: | |
582 // Calling Java code so use Java calling convention | |
583 save_policy = matcher._register_save_policy; | |
584 break; | |
585 | |
586 default: | |
587 ShouldNotReachHere(); | |
588 } | |
589 | |
590 // When using CallRuntime mark SOE registers as killed by the call | |
591 // so values that could show up in the RegisterMap aren't live in a | |
592 // callee saved register since the register wouldn't know where to | |
593 // find them. CallLeaf and CallLeafNoFP are ok because they can't | |
594 // have debug info on them. Strictly speaking this only needs to be | |
595 // done for oops since idealreg2debugmask takes care of debug info | |
596 // references but there no way to handle oops differently than other | |
597 // pointers as far as the kill mask goes. | |
598 bool exclude_soe = op == Op_CallRuntime; | |
599 | |
1137
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
600 // If the call is a MethodHandle invoke, we need to exclude the |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
601 // register which is used to save the SP value over MH invokes from |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
602 // the mask. Otherwise this register could be used for |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
603 // deoptimization information. |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
604 if (op == Op_CallStaticJava) { |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
605 MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall; |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
606 if (mcallstaticjava->_method_handle_invoke) |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
607 proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask()); |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
608 } |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
609 |
0 | 610 // Fill in the kill mask for the call |
611 for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { | |
612 if( !regs.Member(r) ) { // Not already defined by the call | |
613 // Save-on-call register? | |
614 if ((save_policy[r] == 'C') || | |
615 (save_policy[r] == 'A') || | |
616 ((save_policy[r] == 'E') && exclude_soe)) { | |
617 proj->_rout.Insert(r); | |
618 } | |
619 } | |
620 } | |
621 | |
622 return node_cnt; | |
623 } | |
624 | |
625 | |
626 //------------------------------schedule_local--------------------------------- | |
627 // Topological sort within a block. Someday become a real scheduler. | |
628 bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, int *ready_cnt, VectorSet &next_call) { | |
629 // Already "sorted" are the block start Node (as the first entry), and | |
630 // the block-ending Node and any trailing control projections. We leave | |
631 // these alone. PhiNodes and ParmNodes are made to follow the block start | |
632 // Node. Everything else gets topo-sorted. | |
633 | |
634 #ifndef PRODUCT | |
635 if (cfg->trace_opto_pipelining()) { | |
636 tty->print_cr("# --- schedule_local B%d, before: ---", _pre_order); | |
637 for (uint i = 0;i < _nodes.size();i++) { | |
638 tty->print("# "); | |
639 _nodes[i]->fast_dump(); | |
640 } | |
641 tty->print_cr("#"); | |
642 } | |
643 #endif | |
644 | |
645 // RootNode is already sorted | |
646 if( _nodes.size() == 1 ) return true; | |
647 | |
648 // Move PhiNodes and ParmNodes from 1 to cnt up to the start | |
649 uint node_cnt = end_idx(); | |
650 uint phi_cnt = 1; | |
651 uint i; | |
652 for( i = 1; i<node_cnt; i++ ) { // Scan for Phi | |
653 Node *n = _nodes[i]; | |
654 if( n->is_Phi() || // Found a PhiNode or ParmNode | |
655 (n->is_Proj() && n->in(0) == head()) ) { | |
656 // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt | |
657 _nodes.map(i,_nodes[phi_cnt]); | |
658 _nodes.map(phi_cnt++,n); // swap Phi/Parm up front | |
659 } else { // All others | |
660 // Count block-local inputs to 'n' | |
661 uint cnt = n->len(); // Input count | |
662 uint local = 0; | |
663 for( uint j=0; j<cnt; j++ ) { | |
664 Node *m = n->in(j); | |
665 if( m && cfg->_bbs[m->_idx] == this && !m->is_top() ) | |
666 local++; // One more block-local input | |
667 } | |
668 ready_cnt[n->_idx] = local; // Count em up | |
669 | |
670 // A few node types require changing a required edge to a precedence edge | |
671 // before allocation. | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
125
diff
changeset
|
672 if( UseConcMarkSweepGC || UseG1GC ) { |
0 | 673 if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { |
674 // Note: Required edges with an index greater than oper_input_base | |
675 // are not supported by the allocator. | |
676 // Note2: Can only depend on unmatched edge being last, | |
677 // can not depend on its absolute position. | |
678 Node *oop_store = n->in(n->req() - 1); | |
679 n->del_req(n->req() - 1); | |
680 n->add_prec(oop_store); | |
681 assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); | |
682 } | |
683 } | |
1100
f96a1a986f7b
6895383: JCK test throws NPE for method compiled with Escape Analysis
kvn
parents:
681
diff
changeset
|
684 if( n->is_Mach() && n->req() > TypeFunc::Parms && |
f96a1a986f7b
6895383: JCK test throws NPE for method compiled with Escape Analysis
kvn
parents:
681
diff
changeset
|
685 (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire || |
f96a1a986f7b
6895383: JCK test throws NPE for method compiled with Escape Analysis
kvn
parents:
681
diff
changeset
|
686 n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) { |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
687 // MemBarAcquire could be created without Precedent edge. |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
688 // del_req() replaces the specified edge with the last input edge |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
689 // and then removes the last edge. If the specified edge > number of |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
690 // edges the last edge will be moved outside of the input edges array |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
691 // and the edge will be lost. This is why this code should be |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
692 // executed only when Precedent (== TypeFunc::Parms) edge is present. |
0 | 693 Node *x = n->in(TypeFunc::Parms); |
694 n->del_req(TypeFunc::Parms); | |
695 n->add_prec(x); | |
696 } | |
697 } | |
698 } | |
699 for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count | |
700 ready_cnt[_nodes[i2]->_idx] = 0; | |
701 | |
702 // All the prescheduled guys do not hold back internal nodes | |
703 uint i3; | |
704 for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled | |
705 Node *n = _nodes[i3]; // Get pre-scheduled | |
706 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
707 Node* m = n->fast_out(j); | |
708 if( cfg->_bbs[m->_idx] ==this ) // Local-block user | |
709 ready_cnt[m->_idx]--; // Fix ready count | |
710 } | |
711 } | |
712 | |
713 Node_List delay; | |
714 // Make a worklist | |
715 Node_List worklist; | |
716 for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist | |
717 Node *m = _nodes[i4]; | |
718 if( !ready_cnt[m->_idx] ) { // Zero ready count? | |
719 if (m->is_iteratively_computed()) { | |
720 // Push induction variable increments last to allow other uses | |
721 // of the phi to be scheduled first. The select() method breaks | |
722 // ties in scheduling by worklist order. | |
723 delay.push(m); | |
125
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
724 } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) { |
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
725 // Force the CreateEx to the top of the list so it's processed |
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
726 // first and ends up at the start of the block. |
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
727 worklist.insert(0, m); |
0 | 728 } else { |
729 worklist.push(m); // Then on to worklist! | |
730 } | |
731 } | |
732 } | |
733 while (delay.size()) { | |
734 Node* d = delay.pop(); | |
735 worklist.push(d); | |
736 } | |
737 | |
738 // Warm up the 'next_call' heuristic bits | |
739 needed_for_next_call(_nodes[0], next_call, cfg->_bbs); | |
740 | |
741 #ifndef PRODUCT | |
742 if (cfg->trace_opto_pipelining()) { | |
743 for (uint j=0; j<_nodes.size(); j++) { | |
744 Node *n = _nodes[j]; | |
745 int idx = n->_idx; | |
746 tty->print("# ready cnt:%3d ", ready_cnt[idx]); | |
1685 | 747 tty->print("latency:%3d ", cfg->_node_latency->at_grow(idx)); |
0 | 748 tty->print("%4d: %s\n", idx, n->Name()); |
749 } | |
750 } | |
751 #endif | |
752 | |
753 // Pull from worklist and schedule | |
754 while( worklist.size() ) { // Worklist is not ready | |
755 | |
756 #ifndef PRODUCT | |
757 if (cfg->trace_opto_pipelining()) { | |
758 tty->print("# ready list:"); | |
759 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist | |
760 Node *n = worklist[i]; // Get Node on worklist | |
761 tty->print(" %d", n->_idx); | |
762 } | |
763 tty->cr(); | |
764 } | |
765 #endif | |
766 | |
767 // Select and pop a ready guy from worklist | |
768 Node* n = select(cfg, worklist, ready_cnt, next_call, phi_cnt); | |
769 _nodes.map(phi_cnt++,n); // Schedule him next | |
770 | |
771 #ifndef PRODUCT | |
772 if (cfg->trace_opto_pipelining()) { | |
773 tty->print("# select %d: %s", n->_idx, n->Name()); | |
1685 | 774 tty->print(", latency:%d", cfg->_node_latency->at_grow(n->_idx)); |
0 | 775 n->dump(); |
776 if (Verbose) { | |
777 tty->print("# ready list:"); | |
778 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist | |
779 Node *n = worklist[i]; // Get Node on worklist | |
780 tty->print(" %d", n->_idx); | |
781 } | |
782 tty->cr(); | |
783 } | |
784 } | |
785 | |
786 #endif | |
787 if( n->is_MachCall() ) { | |
788 MachCallNode *mcall = n->as_MachCall(); | |
789 phi_cnt = sched_call(matcher, cfg->_bbs, phi_cnt, worklist, ready_cnt, mcall, next_call); | |
790 continue; | |
791 } | |
792 // Children are now all ready | |
793 for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { | |
794 Node* m = n->fast_out(i5); // Get user | |
795 if( cfg->_bbs[m->_idx] != this ) continue; | |
796 if( m->is_Phi() ) continue; | |
797 if( !--ready_cnt[m->_idx] ) | |
798 worklist.push(m); | |
799 } | |
800 } | |
801 | |
802 if( phi_cnt != end_idx() ) { | |
803 // did not schedule all. Retry, Bailout, or Die | |
804 Compile* C = matcher.C; | |
805 if (C->subsume_loads() == true && !C->failing()) { | |
806 // Retry with subsume_loads == false | |
807 // If this is the first failure, the sentinel string will "stick" | |
808 // to the Compile object, and the C2Compiler will see it and retry. | |
809 C->record_failure(C2Compiler::retry_no_subsuming_loads()); | |
810 } | |
811 // assert( phi_cnt == end_idx(), "did not schedule all" ); | |
812 return false; | |
813 } | |
814 | |
815 #ifndef PRODUCT | |
816 if (cfg->trace_opto_pipelining()) { | |
817 tty->print_cr("#"); | |
818 tty->print_cr("# after schedule_local"); | |
819 for (uint i = 0;i < _nodes.size();i++) { | |
820 tty->print("# "); | |
821 _nodes[i]->fast_dump(); | |
822 } | |
823 tty->cr(); | |
824 } | |
825 #endif | |
826 | |
827 | |
828 return true; | |
829 } | |
830 | |
831 //--------------------------catch_cleanup_fix_all_inputs----------------------- | |
832 static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) { | |
833 for (uint l = 0; l < use->len(); l++) { | |
834 if (use->in(l) == old_def) { | |
835 if (l < use->req()) { | |
836 use->set_req(l, new_def); | |
837 } else { | |
838 use->rm_prec(l); | |
839 use->add_prec(new_def); | |
840 l--; | |
841 } | |
842 } | |
843 } | |
844 } | |
845 | |
846 //------------------------------catch_cleanup_find_cloned_def------------------ | |
847 static Node *catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) { | |
848 assert( use_blk != def_blk, "Inter-block cleanup only"); | |
849 | |
850 // The use is some block below the Catch. Find and return the clone of the def | |
851 // that dominates the use. If there is no clone in a dominating block, then | |
852 // create a phi for the def in a dominating block. | |
853 | |
854 // Find which successor block dominates this use. The successor | |
855 // blocks must all be single-entry (from the Catch only; I will have | |
856 // split blocks to make this so), hence they all dominate. | |
857 while( use_blk->_dom_depth > def_blk->_dom_depth+1 ) | |
858 use_blk = use_blk->_idom; | |
859 | |
860 // Find the successor | |
861 Node *fixup = NULL; | |
862 | |
863 uint j; | |
864 for( j = 0; j < def_blk->_num_succs; j++ ) | |
865 if( use_blk == def_blk->_succs[j] ) | |
866 break; | |
867 | |
868 if( j == def_blk->_num_succs ) { | |
869 // Block at same level in dom-tree is not a successor. It needs a | |
870 // PhiNode, the PhiNode uses from the def and IT's uses need fixup. | |
871 Node_Array inputs = new Node_List(Thread::current()->resource_area()); | |
872 for(uint k = 1; k < use_blk->num_preds(); k++) { | |
873 inputs.map(k, catch_cleanup_find_cloned_def(bbs[use_blk->pred(k)->_idx], def, def_blk, bbs, n_clone_idx)); | |
874 } | |
875 | |
876 // Check to see if the use_blk already has an identical phi inserted. | |
877 // If it exists, it will be at the first position since all uses of a | |
878 // def are processed together. | |
879 Node *phi = use_blk->_nodes[1]; | |
880 if( phi->is_Phi() ) { | |
881 fixup = phi; | |
882 for (uint k = 1; k < use_blk->num_preds(); k++) { | |
883 if (phi->in(k) != inputs[k]) { | |
884 // Not a match | |
885 fixup = NULL; | |
886 break; | |
887 } | |
888 } | |
889 } | |
890 | |
891 // If an existing PhiNode was not found, make a new one. | |
892 if (fixup == NULL) { | |
893 Node *new_phi = PhiNode::make(use_blk->head(), def); | |
894 use_blk->_nodes.insert(1, new_phi); | |
895 bbs.map(new_phi->_idx, use_blk); | |
896 for (uint k = 1; k < use_blk->num_preds(); k++) { | |
897 new_phi->set_req(k, inputs[k]); | |
898 } | |
899 fixup = new_phi; | |
900 } | |
901 | |
902 } else { | |
903 // Found the use just below the Catch. Make it use the clone. | |
904 fixup = use_blk->_nodes[n_clone_idx]; | |
905 } | |
906 | |
907 return fixup; | |
908 } | |
909 | |
910 //--------------------------catch_cleanup_intra_block-------------------------- | |
911 // Fix all input edges in use that reference "def". The use is in the same | |
912 // block as the def and both have been cloned in each successor block. | |
913 static void catch_cleanup_intra_block(Node *use, Node *def, Block *blk, int beg, int n_clone_idx) { | |
914 | |
915 // Both the use and def have been cloned. For each successor block, | |
916 // get the clone of the use, and make its input the clone of the def | |
917 // found in that block. | |
918 | |
919 uint use_idx = blk->find_node(use); | |
920 uint offset_idx = use_idx - beg; | |
921 for( uint k = 0; k < blk->_num_succs; k++ ) { | |
922 // Get clone in each successor block | |
923 Block *sb = blk->_succs[k]; | |
924 Node *clone = sb->_nodes[offset_idx+1]; | |
925 assert( clone->Opcode() == use->Opcode(), "" ); | |
926 | |
927 // Make use-clone reference the def-clone | |
928 catch_cleanup_fix_all_inputs(clone, def, sb->_nodes[n_clone_idx]); | |
929 } | |
930 } | |
931 | |
932 //------------------------------catch_cleanup_inter_block--------------------- | |
933 // Fix all input edges in use that reference "def". The use is in a different | |
934 // block than the def. | |
935 static void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) { | |
936 if( !use_blk ) return; // Can happen if the use is a precedence edge | |
937 | |
938 Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, bbs, n_clone_idx); | |
939 catch_cleanup_fix_all_inputs(use, def, new_def); | |
940 } | |
941 | |
942 //------------------------------call_catch_cleanup----------------------------- | |
943 // If we inserted any instructions between a Call and his CatchNode, | |
944 // clone the instructions on all paths below the Catch. | |
945 void Block::call_catch_cleanup(Block_Array &bbs) { | |
946 | |
947 // End of region to clone | |
948 uint end = end_idx(); | |
949 if( !_nodes[end]->is_Catch() ) return; | |
950 // Start of region to clone | |
951 uint beg = end; | |
952 while( _nodes[beg-1]->Opcode() != Op_MachProj || | |
953 !_nodes[beg-1]->in(0)->is_Call() ) { | |
954 beg--; | |
955 assert(beg > 0,"Catch cleanup walking beyond block boundary"); | |
956 } | |
957 // Range of inserted instructions is [beg, end) | |
958 if( beg == end ) return; | |
959 | |
960 // Clone along all Catch output paths. Clone area between the 'beg' and | |
961 // 'end' indices. | |
962 for( uint i = 0; i < _num_succs; i++ ) { | |
963 Block *sb = _succs[i]; | |
964 // Clone the entire area; ignoring the edge fixup for now. | |
965 for( uint j = end; j > beg; j-- ) { | |
1693
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
966 // It is safe here to clone a node with anti_dependence |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
967 // since clones dominate on each path. |
0 | 968 Node *clone = _nodes[j-1]->clone(); |
969 sb->_nodes.insert( 1, clone ); | |
970 bbs.map(clone->_idx,sb); | |
971 } | |
972 } | |
973 | |
974 | |
975 // Fixup edges. Check the def-use info per cloned Node | |
976 for(uint i2 = beg; i2 < end; i2++ ) { | |
977 uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block | |
978 Node *n = _nodes[i2]; // Node that got cloned | |
979 // Need DU safe iterator because of edge manipulation in calls. | |
980 Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area()); | |
981 for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) { | |
982 out->push(n->fast_out(j1)); | |
983 } | |
984 uint max = out->size(); | |
985 for (uint j = 0; j < max; j++) {// For all users | |
986 Node *use = out->pop(); | |
987 Block *buse = bbs[use->_idx]; | |
988 if( use->is_Phi() ) { | |
989 for( uint k = 1; k < use->req(); k++ ) | |
990 if( use->in(k) == n ) { | |
991 Node *fixup = catch_cleanup_find_cloned_def(bbs[buse->pred(k)->_idx], n, this, bbs, n_clone_idx); | |
992 use->set_req(k, fixup); | |
993 } | |
994 } else { | |
995 if (this == buse) { | |
996 catch_cleanup_intra_block(use, n, this, beg, n_clone_idx); | |
997 } else { | |
998 catch_cleanup_inter_block(use, buse, n, this, bbs, n_clone_idx); | |
999 } | |
1000 } | |
1001 } // End for all users | |
1002 | |
1003 } // End of for all Nodes in cloned area | |
1004 | |
1005 // Remove the now-dead cloned ops | |
1006 for(uint i3 = beg; i3 < end; i3++ ) { | |
1007 _nodes[beg]->disconnect_inputs(NULL); | |
1008 _nodes.remove(beg); | |
1009 } | |
1010 | |
1011 // If the successor blocks have a CreateEx node, move it back to the top | |
1012 for(uint i4 = 0; i4 < _num_succs; i4++ ) { | |
1013 Block *sb = _succs[i4]; | |
1014 uint new_cnt = end - beg; | |
1015 // Remove any newly created, but dead, nodes. | |
1016 for( uint j = new_cnt; j > 0; j-- ) { | |
1017 Node *n = sb->_nodes[j]; | |
1018 if (n->outcnt() == 0 && | |
1019 (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){ | |
1020 n->disconnect_inputs(NULL); | |
1021 sb->_nodes.remove(j); | |
1022 new_cnt--; | |
1023 } | |
1024 } | |
1025 // If any newly created nodes remain, move the CreateEx node to the top | |
1026 if (new_cnt > 0) { | |
1027 Node *cex = sb->_nodes[1+new_cnt]; | |
1028 if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) { | |
1029 sb->_nodes.remove(1+new_cnt); | |
1030 sb->_nodes.insert(1,cex); | |
1031 } | |
1032 } | |
1033 } | |
1034 } |