Mercurial > hg > truffle
annotate src/share/vm/opto/lcm.cpp @ 9126:bc26f978b0ce
HotSpotResolvedObjectType: implement hasFinalizeSubclass() correctly
don't use the (wrong) cached value, but ask the runtime on each request.
Fixes regression on xml.* benchmarks @ specjvm2008. The problem was:
After the constructor of Object was deoptimized due to an assumption violation,
it was recompiled again after some time. However, on recompilation, the value
of hasFinalizeSubclass for the class was not updated and it was compiled again
with a, now wrong, assumption, which then triggers deoptimization again.
This was repeated until it hit the recompilation limit (defined by
PerMethodRecompilationCutoff), and therefore only executed by the interpreter
from now on, causing the performance regression.
author | Bernhard Urban <bernhard.urban@jku.at> |
---|---|
date | Mon, 15 Apr 2013 19:54:58 +0200 |
parents | 571076d3c79d |
children | 70120f47d403 |
rev | line source |
---|---|
0 | 1 /* |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4820
diff
changeset
|
2 * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1151
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1151
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1151
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "memory/allocation.inline.hpp" | |
27 #include "opto/block.hpp" | |
28 #include "opto/c2compiler.hpp" | |
29 #include "opto/callnode.hpp" | |
30 #include "opto/cfgnode.hpp" | |
31 #include "opto/machnode.hpp" | |
32 #include "opto/runtime.hpp" | |
33 #ifdef TARGET_ARCH_MODEL_x86_32 | |
34 # include "adfiles/ad_x86_32.hpp" | |
35 #endif | |
36 #ifdef TARGET_ARCH_MODEL_x86_64 | |
37 # include "adfiles/ad_x86_64.hpp" | |
38 #endif | |
39 #ifdef TARGET_ARCH_MODEL_sparc | |
40 # include "adfiles/ad_sparc.hpp" | |
41 #endif | |
42 #ifdef TARGET_ARCH_MODEL_zero | |
43 # include "adfiles/ad_zero.hpp" | |
44 #endif | |
2401
7e88bdae86ec
7029017: Additional architecture support for c2 compiler
roland
parents:
1972
diff
changeset
|
45 #ifdef TARGET_ARCH_MODEL_arm |
7e88bdae86ec
7029017: Additional architecture support for c2 compiler
roland
parents:
1972
diff
changeset
|
46 # include "adfiles/ad_arm.hpp" |
7e88bdae86ec
7029017: Additional architecture support for c2 compiler
roland
parents:
1972
diff
changeset
|
47 #endif |
3796 | 48 #ifdef TARGET_ARCH_MODEL_ppc |
49 # include "adfiles/ad_ppc.hpp" | |
50 #endif | |
0 | 51 |
1972 | 52 // Optimization - Graph Style |
0 | 53 |
54 //------------------------------implicit_null_check---------------------------- | |
55 // Detect implicit-null-check opportunities. Basically, find NULL checks | |
56 // with suitable memory ops nearby. Use the memory op to do the NULL check. | |
57 // I can generate a memory op if there is not one nearby. | |
58 // The proj is the control projection for the not-null case. | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
59 // The val is the pointer being checked for nullness or |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
60 // decodeHeapOop_not_null node if it did not fold into address. |
0 | 61 void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons) { |
62 // Assume if null check need for 0 offset then always needed | |
63 // Intel solaris doesn't support any null checks yet and no | |
64 // mechanism exists (yet) to set the switches at an os_cpu level | |
65 if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return; | |
66 | |
67 // Make sure the ptr-is-null path appears to be uncommon! | |
68 float f = end()->as_MachIf()->_prob; | |
69 if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f; | |
70 if( f > PROB_UNLIKELY_MAG(4) ) return; | |
71 | |
72 uint bidx = 0; // Capture index of value into memop | |
73 bool was_store; // Memory op is a store op | |
74 | |
75 // Get the successor block for if the test ptr is non-null | |
76 Block* not_null_block; // this one goes with the proj | |
77 Block* null_block; | |
78 if (_nodes[_nodes.size()-1] == proj) { | |
79 null_block = _succs[0]; | |
80 not_null_block = _succs[1]; | |
81 } else { | |
82 assert(_nodes[_nodes.size()-2] == proj, "proj is one or the other"); | |
83 not_null_block = _succs[0]; | |
84 null_block = _succs[1]; | |
85 } | |
332 | 86 while (null_block->is_Empty() == Block::empty_with_goto) { |
87 null_block = null_block->_succs[0]; | |
88 } | |
0 | 89 |
90 // Search the exception block for an uncommon trap. | |
91 // (See Parse::do_if and Parse::do_ifnull for the reason | |
92 // we need an uncommon trap. Briefly, we need a way to | |
93 // detect failure of this optimization, as in 6366351.) | |
94 { | |
95 bool found_trap = false; | |
96 for (uint i1 = 0; i1 < null_block->_nodes.size(); i1++) { | |
97 Node* nn = null_block->_nodes[i1]; | |
98 if (nn->is_MachCall() && | |
1748 | 99 nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) { |
0 | 100 const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type(); |
101 if (trtype->isa_int() && trtype->is_int()->is_con()) { | |
102 jint tr_con = trtype->is_int()->get_con(); | |
103 Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con); | |
104 Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con); | |
105 assert((int)reason < (int)BitsPerInt, "recode bit map"); | |
106 if (is_set_nth_bit(allowed_reasons, (int) reason) | |
107 && action != Deoptimization::Action_none) { | |
108 // This uncommon trap is sure to recompile, eventually. | |
109 // When that happens, C->too_many_traps will prevent | |
110 // this transformation from happening again. | |
111 found_trap = true; | |
112 } | |
113 } | |
114 break; | |
115 } | |
116 } | |
117 if (!found_trap) { | |
118 // We did not find an uncommon trap. | |
119 return; | |
120 } | |
121 } | |
122 | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
123 // Check for decodeHeapOop_not_null node which did not fold into address |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
124 bool is_decoden = ((intptr_t)val) & 1; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
125 val = (Node*)(((intptr_t)val) & ~1); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
126 |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
127 assert(!is_decoden || (val->in(0) == NULL) && val->is_Mach() && |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
128 (val->as_Mach()->ideal_Opcode() == Op_DecodeN), "sanity"); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
129 |
0 | 130 // Search the successor block for a load or store who's base value is also |
131 // the tested value. There may be several. | |
132 Node_List *out = new Node_List(Thread::current()->resource_area()); | |
133 MachNode *best = NULL; // Best found so far | |
134 for (DUIterator i = val->outs(); val->has_out(i); i++) { | |
135 Node *m = val->out(i); | |
136 if( !m->is_Mach() ) continue; | |
137 MachNode *mach = m->as_Mach(); | |
138 was_store = false; | |
1693
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
139 int iop = mach->ideal_Opcode(); |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
140 switch( iop ) { |
0 | 141 case Op_LoadB: |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4820
diff
changeset
|
142 case Op_LoadUB: |
558
3b5ac9e7e6ea
6796746: rename LoadC (char) opcode class to LoadUS (unsigned short)
twisti
parents:
365
diff
changeset
|
143 case Op_LoadUS: |
0 | 144 case Op_LoadD: |
145 case Op_LoadF: | |
146 case Op_LoadI: | |
147 case Op_LoadL: | |
148 case Op_LoadP: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
149 case Op_LoadN: |
0 | 150 case Op_LoadS: |
151 case Op_LoadKlass: | |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
125
diff
changeset
|
152 case Op_LoadNKlass: |
0 | 153 case Op_LoadRange: |
154 case Op_LoadD_unaligned: | |
155 case Op_LoadL_unaligned: | |
1151
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
156 assert(mach->in(2) == val, "should be address"); |
0 | 157 break; |
158 case Op_StoreB: | |
159 case Op_StoreC: | |
160 case Op_StoreCM: | |
161 case Op_StoreD: | |
162 case Op_StoreF: | |
163 case Op_StoreI: | |
164 case Op_StoreL: | |
165 case Op_StoreP: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
166 case Op_StoreN: |
6848
8e47bac5643a
7054512: Compress class pointers after perm gen removal
roland
parents:
6804
diff
changeset
|
167 case Op_StoreNKlass: |
0 | 168 was_store = true; // Memory op is a store op |
169 // Stores will have their address in slot 2 (memory in slot 1). | |
170 // If the value being nul-checked is in another slot, it means we | |
171 // are storing the checked value, which does NOT check the value! | |
172 if( mach->in(2) != val ) continue; | |
173 break; // Found a memory op? | |
174 case Op_StrComp: | |
681 | 175 case Op_StrEquals: |
176 case Op_StrIndexOf: | |
169
9148c65abefc
6695049: (coll) Create an x86 intrinsic for Arrays.equals
rasbold
parents:
164
diff
changeset
|
177 case Op_AryEq: |
7637
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7196
diff
changeset
|
178 case Op_EncodeISOArray: |
0 | 179 // Not a legit memory op for implicit null check regardless of |
180 // embedded loads | |
181 continue; | |
182 default: // Also check for embedded loads | |
183 if( !mach->needs_anti_dependence_check() ) | |
184 continue; // Not an memory op; skip it | |
1693
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
185 if( must_clone[iop] ) { |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
186 // Do not move nodes which produce flags because |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
187 // RA will try to clone it to place near branch and |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
188 // it will cause recompilation, see clone_node(). |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
189 continue; |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
190 } |
1151
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
191 { |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
192 // Check that value is used in memory address in |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
193 // instructions with embedded load (CmpP val1,(val2+off)). |
1151
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
194 Node* base; |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
195 Node* index; |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
196 const MachOper* oper = mach->memory_inputs(base, index); |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
197 if (oper == NULL || oper == (MachOper*)-1) { |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
198 continue; // Not an memory op; skip it |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
199 } |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
200 if (val == base || |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
201 val == index && val->bottom_type()->isa_narrowoop()) { |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
202 break; // Found it |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
203 } else { |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
204 continue; // Skip it |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
205 } |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
206 } |
0 | 207 break; |
208 } | |
209 // check if the offset is not too high for implicit exception | |
210 { | |
211 intptr_t offset = 0; | |
212 const TypePtr *adr_type = NULL; // Do not need this return value here | |
213 const Node* base = mach->get_base_and_disp(offset, adr_type); | |
214 if (base == NULL || base == NodeSentinel) { | |
332 | 215 // Narrow oop address doesn't have base, only index |
216 if( val->bottom_type()->isa_narrowoop() && | |
217 MacroAssembler::needs_explicit_null_check(offset) ) | |
218 continue; // Give up if offset is beyond page size | |
0 | 219 // cannot reason about it; is probably not implicit null exception |
220 } else { | |
642
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
221 const TypePtr* tptr; |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
222 if (UseCompressedOops && Universe::narrow_oop_shift() == 0) { |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
223 // 32-bits narrow oop can be the base of address expressions |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
224 tptr = base->bottom_type()->make_ptr(); |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
225 } else { |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
226 // only regular oops are expected here |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
227 tptr = base->bottom_type()->is_ptr(); |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
228 } |
0 | 229 // Give up if offset is not a compile-time constant |
230 if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot ) | |
231 continue; | |
232 offset += tptr->_offset; // correct if base is offseted | |
233 if( MacroAssembler::needs_explicit_null_check(offset) ) | |
234 continue; // Give up is reference is beyond 4K page size | |
235 } | |
236 } | |
237 | |
238 // Check ctrl input to see if the null-check dominates the memory op | |
239 Block *cb = cfg->_bbs[mach->_idx]; | |
240 cb = cb->_idom; // Always hoist at least 1 block | |
241 if( !was_store ) { // Stores can be hoisted only one block | |
242 while( cb->_dom_depth > (_dom_depth + 1)) | |
243 cb = cb->_idom; // Hoist loads as far as we want | |
244 // The non-null-block should dominate the memory op, too. Live | |
245 // range spilling will insert a spill in the non-null-block if it is | |
246 // needs to spill the memory op for an implicit null check. | |
247 if (cb->_dom_depth == (_dom_depth + 1)) { | |
248 if (cb != not_null_block) continue; | |
249 cb = cb->_idom; | |
250 } | |
251 } | |
252 if( cb != this ) continue; | |
253 | |
254 // Found a memory user; see if it can be hoisted to check-block | |
255 uint vidx = 0; // Capture index of value into memop | |
256 uint j; | |
257 for( j = mach->req()-1; j > 0; j-- ) { | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
258 if( mach->in(j) == val ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
259 vidx = j; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
260 // Ignore DecodeN val which could be hoisted to where needed. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
261 if( is_decoden ) continue; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
262 } |
0 | 263 // Block of memory-op input |
264 Block *inb = cfg->_bbs[mach->in(j)->_idx]; | |
265 Block *b = this; // Start from nul check | |
266 while( b != inb && b->_dom_depth > inb->_dom_depth ) | |
267 b = b->_idom; // search upwards for input | |
268 // See if input dominates null check | |
269 if( b != inb ) | |
270 break; | |
271 } | |
272 if( j > 0 ) | |
273 continue; | |
274 Block *mb = cfg->_bbs[mach->_idx]; | |
275 // Hoisting stores requires more checks for the anti-dependence case. | |
276 // Give up hoisting if we have to move the store past any load. | |
277 if( was_store ) { | |
278 Block *b = mb; // Start searching here for a local load | |
279 // mach use (faulting) trying to hoist | |
280 // n might be blocker to hoisting | |
281 while( b != this ) { | |
282 uint k; | |
283 for( k = 1; k < b->_nodes.size(); k++ ) { | |
284 Node *n = b->_nodes[k]; | |
285 if( n->needs_anti_dependence_check() && | |
286 n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) ) | |
287 break; // Found anti-dependent load | |
288 } | |
289 if( k < b->_nodes.size() ) | |
290 break; // Found anti-dependent load | |
291 // Make sure control does not do a merge (would have to check allpaths) | |
292 if( b->num_preds() != 2 ) break; | |
293 b = cfg->_bbs[b->pred(1)->_idx]; // Move up to predecessor block | |
294 } | |
295 if( b != this ) continue; | |
296 } | |
297 | |
298 // Make sure this memory op is not already being used for a NullCheck | |
299 Node *e = mb->end(); | |
300 if( e->is_MachNullCheck() && e->in(1) == mach ) | |
301 continue; // Already being used as a NULL check | |
302 | |
303 // Found a candidate! Pick one with least dom depth - the highest | |
304 // in the dom tree should be closest to the null check. | |
305 if( !best || | |
306 cfg->_bbs[mach->_idx]->_dom_depth < cfg->_bbs[best->_idx]->_dom_depth ) { | |
307 best = mach; | |
308 bidx = vidx; | |
309 | |
310 } | |
311 } | |
312 // No candidate! | |
313 if( !best ) return; | |
314 | |
315 // ---- Found an implicit null check | |
316 extern int implicit_null_checks; | |
317 implicit_null_checks++; | |
318 | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
319 if( is_decoden ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
320 // Check if we need to hoist decodeHeapOop_not_null first. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
321 Block *valb = cfg->_bbs[val->_idx]; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
322 if( this != valb && this->_dom_depth < valb->_dom_depth ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
323 // Hoist it up to the end of the test block. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
324 valb->find_remove(val); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
325 this->add_inst(val); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
326 cfg->_bbs.map(val->_idx,this); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
327 // DecodeN on x86 may kill flags. Check for flag-killing projections |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
328 // that also need to be hoisted. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
329 for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
330 Node* n = val->fast_out(j); |
3842 | 331 if( n->is_MachProj() ) { |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
332 cfg->_bbs[n->_idx]->find_remove(n); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
333 this->add_inst(n); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
334 cfg->_bbs.map(n->_idx,this); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
335 } |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
336 } |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
337 } |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
338 } |
0 | 339 // Hoist the memory candidate up to the end of the test block. |
340 Block *old_block = cfg->_bbs[best->_idx]; | |
341 old_block->find_remove(best); | |
342 add_inst(best); | |
343 cfg->_bbs.map(best->_idx,this); | |
344 | |
345 // Move the control dependence | |
346 if (best->in(0) && best->in(0) == old_block->_nodes[0]) | |
347 best->set_req(0, _nodes[0]); | |
348 | |
349 // Check for flag-killing projections that also need to be hoisted | |
350 // Should be DU safe because no edge updates. | |
351 for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) { | |
352 Node* n = best->fast_out(j); | |
3842 | 353 if( n->is_MachProj() ) { |
0 | 354 cfg->_bbs[n->_idx]->find_remove(n); |
355 add_inst(n); | |
356 cfg->_bbs.map(n->_idx,this); | |
357 } | |
358 } | |
359 | |
360 Compile *C = cfg->C; | |
361 // proj==Op_True --> ne test; proj==Op_False --> eq test. | |
362 // One of two graph shapes got matched: | |
363 // (IfTrue (If (Bool NE (CmpP ptr NULL)))) | |
364 // (IfFalse (If (Bool EQ (CmpP ptr NULL)))) | |
365 // NULL checks are always branch-if-eq. If we see a IfTrue projection | |
366 // then we are replacing a 'ne' test with a 'eq' NULL check test. | |
367 // We need to flip the projections to keep the same semantics. | |
368 if( proj->Opcode() == Op_IfTrue ) { | |
369 // Swap order of projections in basic block to swap branch targets | |
370 Node *tmp1 = _nodes[end_idx()+1]; | |
371 Node *tmp2 = _nodes[end_idx()+2]; | |
372 _nodes.map(end_idx()+1, tmp2); | |
373 _nodes.map(end_idx()+2, tmp1); | |
6804
e626685e9f6c
7193318: C2: remove number of inputs requirement from Node's new operator
kvn
parents:
6179
diff
changeset
|
374 Node *tmp = new (C) Node(C->top()); // Use not NULL input |
0 | 375 tmp1->replace_by(tmp); |
376 tmp2->replace_by(tmp1); | |
377 tmp->replace_by(tmp2); | |
378 tmp->destruct(); | |
379 } | |
380 | |
381 // Remove the existing null check; use a new implicit null check instead. | |
382 // Since schedule-local needs precise def-use info, we need to correct | |
383 // it as well. | |
384 Node *old_tst = proj->in(0); | |
385 MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx); | |
386 _nodes.map(end_idx(),nul_chk); | |
387 cfg->_bbs.map(nul_chk->_idx,this); | |
388 // Redirect users of old_test to nul_chk | |
389 for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2) | |
390 old_tst->last_out(i2)->set_req(0, nul_chk); | |
391 // Clean-up any dead code | |
392 for (uint i3 = 0; i3 < old_tst->req(); i3++) | |
393 old_tst->set_req(i3, NULL); | |
394 | |
395 cfg->latency_from_uses(nul_chk); | |
396 cfg->latency_from_uses(best); | |
397 } | |
398 | |
399 | |
400 //------------------------------select----------------------------------------- | |
401 // Select a nice fellow from the worklist to schedule next. If there is only | |
402 // one choice, then use it. Projections take top priority for correctness | |
403 // reasons - if I see a projection, then it is next. There are a number of | |
404 // other special cases, for instructions that consume condition codes, et al. | |
405 // These are chosen immediately. Some instructions are required to immediately | |
406 // precede the last instruction in the block, and these are taken last. Of the | |
407 // remaining cases (most), choose the instruction with the greatest latency | |
408 // (that is, the most number of pseudo-cycles required to the end of the | |
409 // routine). If there is a tie, choose the instruction with the most inputs. | |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
410 Node *Block::select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) { |
0 | 411 |
412 // If only a single entry on the stack, use it | |
413 uint cnt = worklist.size(); | |
414 if (cnt == 1) { | |
415 Node *n = worklist[0]; | |
416 worklist.map(0,worklist.pop()); | |
417 return n; | |
418 } | |
419 | |
420 uint choice = 0; // Bigger is most important | |
421 uint latency = 0; // Bigger is scheduled first | |
422 uint score = 0; // Bigger is better | |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
423 int idx = -1; // Index in worklist |
8691
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7637
diff
changeset
|
424 int cand_cnt = 0; // Candidate count |
0 | 425 |
426 for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist | |
427 // Order in worklist is used to break ties. | |
428 // See caller for how this is used to delay scheduling | |
429 // of induction variable increments to after the other | |
430 // uses of the phi are scheduled. | |
431 Node *n = worklist[i]; // Get Node on worklist | |
432 | |
433 int iop = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : 0; | |
434 if( n->is_Proj() || // Projections always win | |
435 n->Opcode()== Op_Con || // So does constant 'Top' | |
436 iop == Op_CreateEx || // Create-exception must start block | |
437 iop == Op_CheckCastPP | |
438 ) { | |
439 worklist.map(i,worklist.pop()); | |
440 return n; | |
441 } | |
442 | |
443 // Final call in a block must be adjacent to 'catch' | |
444 Node *e = end(); | |
445 if( e->is_Catch() && e->in(0)->in(0) == n ) | |
446 continue; | |
447 | |
448 // Memory op for an implicit null check has to be at the end of the block | |
449 if( e->is_MachNullCheck() && e->in(1) == n ) | |
450 continue; | |
451 | |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4820
diff
changeset
|
452 // Schedule IV increment last. |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4820
diff
changeset
|
453 if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd && |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4820
diff
changeset
|
454 e->in(1)->in(1) == n && n->is_iteratively_computed()) |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4820
diff
changeset
|
455 continue; |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4820
diff
changeset
|
456 |
0 | 457 uint n_choice = 2; |
458 | |
459 // See if this instruction is consumed by a branch. If so, then (as the | |
460 // branch is the last instruction in the basic block) force it to the | |
461 // end of the basic block | |
462 if ( must_clone[iop] ) { | |
463 // See if any use is a branch | |
464 bool found_machif = false; | |
465 | |
466 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
467 Node* use = n->fast_out(j); | |
468 | |
469 // The use is a conditional branch, make them adjacent | |
470 if (use->is_MachIf() && cfg->_bbs[use->_idx]==this ) { | |
471 found_machif = true; | |
472 break; | |
473 } | |
474 | |
475 // More than this instruction pending for successor to be ready, | |
476 // don't choose this if other opportunities are ready | |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
477 if (ready_cnt.at(use->_idx) > 1) |
0 | 478 n_choice = 1; |
479 } | |
480 | |
481 // loop terminated, prefer not to use this instruction | |
482 if (found_machif) | |
483 continue; | |
484 } | |
485 | |
486 // See if this has a predecessor that is "must_clone", i.e. sets the | |
487 // condition code. If so, choose this first | |
488 for (uint j = 0; j < n->req() ; j++) { | |
489 Node *inn = n->in(j); | |
490 if (inn) { | |
491 if (inn->is_Mach() && must_clone[inn->as_Mach()->ideal_Opcode()] ) { | |
492 n_choice = 3; | |
493 break; | |
494 } | |
495 } | |
496 } | |
497 | |
498 // MachTemps should be scheduled last so they are near their uses | |
499 if (n->is_MachTemp()) { | |
500 n_choice = 1; | |
501 } | |
502 | |
1685 | 503 uint n_latency = cfg->_node_latency->at_grow(n->_idx); |
0 | 504 uint n_score = n->req(); // Many inputs get high score to break ties |
505 | |
506 // Keep best latency found | |
8691
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7637
diff
changeset
|
507 cand_cnt++; |
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7637
diff
changeset
|
508 if (choice < n_choice || |
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7637
diff
changeset
|
509 (choice == n_choice && |
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7637
diff
changeset
|
510 ((StressLCM && Compile::randomized_select(cand_cnt)) || |
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7637
diff
changeset
|
511 (!StressLCM && |
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7637
diff
changeset
|
512 (latency < n_latency || |
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7637
diff
changeset
|
513 (latency == n_latency && |
571076d3c79d
8009120: Fuzz instruction scheduling in HotSpot compilers
shade
parents:
7637
diff
changeset
|
514 (score < n_score))))))) { |
0 | 515 choice = n_choice; |
516 latency = n_latency; | |
517 score = n_score; | |
518 idx = i; // Also keep index in worklist | |
519 } | |
520 } // End of for all ready nodes in worklist | |
521 | |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
522 assert(idx >= 0, "index should be set"); |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
523 Node *n = worklist[(uint)idx]; // Get the winner |
0 | 524 |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
525 worklist.map((uint)idx, worklist.pop()); // Compress worklist |
0 | 526 return n; |
527 } | |
528 | |
529 | |
530 //------------------------------set_next_call---------------------------------- | |
531 void Block::set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs ) { | |
532 if( next_call.test_set(n->_idx) ) return; | |
533 for( uint i=0; i<n->len(); i++ ) { | |
534 Node *m = n->in(i); | |
535 if( !m ) continue; // must see all nodes in block that precede call | |
536 if( bbs[m->_idx] == this ) | |
537 set_next_call( m, next_call, bbs ); | |
538 } | |
539 } | |
540 | |
541 //------------------------------needed_for_next_call--------------------------- | |
542 // Set the flag 'next_call' for each Node that is needed for the next call to | |
543 // be scheduled. This flag lets me bias scheduling so Nodes needed for the | |
544 // next subroutine call get priority - basically it moves things NOT needed | |
545 // for the next call till after the call. This prevents me from trying to | |
546 // carry lots of stuff live across a call. | |
547 void Block::needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs) { | |
548 // Find the next control-defining Node in this block | |
549 Node* call = NULL; | |
550 for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) { | |
551 Node* m = this_call->fast_out(i); | |
552 if( bbs[m->_idx] == this && // Local-block user | |
553 m != this_call && // Not self-start node | |
3842 | 554 m->is_MachCall() ) |
0 | 555 call = m; |
556 break; | |
557 } | |
558 if (call == NULL) return; // No next call (e.g., block end is near) | |
559 // Set next-call for all inputs to this call | |
560 set_next_call(call, next_call, bbs); | |
561 } | |
562 | |
4120
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
563 //------------------------------add_call_kills------------------------------------- |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
564 void Block::add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) { |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
565 // Fill in the kill mask for the call |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
566 for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
567 if( !regs.Member(r) ) { // Not already defined by the call |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
568 // Save-on-call register? |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
569 if ((save_policy[r] == 'C') || |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
570 (save_policy[r] == 'A') || |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
571 ((save_policy[r] == 'E') && exclude_soe)) { |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
572 proj->_rout.Insert(r); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
573 } |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
574 } |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
575 } |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
576 } |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
577 |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
578 |
0 | 579 //------------------------------sched_call------------------------------------- |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
580 uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { |
0 | 581 RegMask regs; |
582 | |
583 // Schedule all the users of the call right now. All the users are | |
584 // projection Nodes, so they must be scheduled next to the call. | |
585 // Collect all the defined registers. | |
586 for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { | |
587 Node* n = mcall->fast_out(i); | |
3842 | 588 assert( n->is_MachProj(), "" ); |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
589 int n_cnt = ready_cnt.at(n->_idx)-1; |
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
590 ready_cnt.at_put(n->_idx, n_cnt); |
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
591 assert( n_cnt == 0, "" ); |
0 | 592 // Schedule next to call |
593 _nodes.map(node_cnt++, n); | |
594 // Collect defined registers | |
595 regs.OR(n->out_RegMask()); | |
596 // Check for scheduling the next control-definer | |
597 if( n->bottom_type() == Type::CONTROL ) | |
598 // Warm up next pile of heuristic bits | |
599 needed_for_next_call(n, next_call, bbs); | |
600 | |
601 // Children of projections are now all ready | |
602 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
603 Node* m = n->fast_out(j); // Get user | |
604 if( bbs[m->_idx] != this ) continue; | |
605 if( m->is_Phi() ) continue; | |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
606 int m_cnt = ready_cnt.at(m->_idx)-1; |
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
607 ready_cnt.at_put(m->_idx, m_cnt); |
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
608 if( m_cnt == 0 ) |
0 | 609 worklist.push(m); |
610 } | |
611 | |
612 } | |
613 | |
614 // Act as if the call defines the Frame Pointer. | |
615 // Certainly the FP is alive and well after the call. | |
616 regs.Insert(matcher.c_frame_pointer()); | |
617 | |
618 // Set all registers killed and not already defined by the call. | |
619 uint r_cnt = mcall->tf()->range()->cnt(); | |
620 int op = mcall->ideal_Opcode(); | |
6804
e626685e9f6c
7193318: C2: remove number of inputs requirement from Node's new operator
kvn
parents:
6179
diff
changeset
|
621 MachProjNode *proj = new (matcher.C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); |
0 | 622 bbs.map(proj->_idx,this); |
623 _nodes.insert(node_cnt++, proj); | |
624 | |
625 // Select the right register save policy. | |
626 const char * save_policy; | |
627 switch (op) { | |
628 case Op_CallRuntime: | |
629 case Op_CallLeaf: | |
630 case Op_CallLeafNoFP: | |
631 // Calling C code so use C calling convention | |
632 save_policy = matcher._c_reg_save_policy; | |
633 break; | |
634 | |
635 case Op_CallStaticJava: | |
636 case Op_CallDynamicJava: | |
637 // Calling Java code so use Java calling convention | |
638 save_policy = matcher._register_save_policy; | |
639 break; | |
640 | |
641 default: | |
642 ShouldNotReachHere(); | |
643 } | |
644 | |
645 // When using CallRuntime mark SOE registers as killed by the call | |
646 // so values that could show up in the RegisterMap aren't live in a | |
647 // callee saved register since the register wouldn't know where to | |
648 // find them. CallLeaf and CallLeafNoFP are ok because they can't | |
649 // have debug info on them. Strictly speaking this only needs to be | |
650 // done for oops since idealreg2debugmask takes care of debug info | |
651 // references but there no way to handle oops differently than other | |
652 // pointers as far as the kill mask goes. | |
653 bool exclude_soe = op == Op_CallRuntime; | |
654 | |
1137
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
655 // If the call is a MethodHandle invoke, we need to exclude the |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
656 // register which is used to save the SP value over MH invokes from |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
657 // the mask. Otherwise this register could be used for |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
658 // deoptimization information. |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
659 if (op == Op_CallStaticJava) { |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
660 MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall; |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
661 if (mcallstaticjava->_method_handle_invoke) |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
662 proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask()); |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
663 } |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
664 |
4120
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
665 add_call_kills(proj, regs, save_policy, exclude_soe); |
0 | 666 |
667 return node_cnt; | |
668 } | |
669 | |
670 | |
671 //------------------------------schedule_local--------------------------------- | |
672 // Topological sort within a block. Someday become a real scheduler. | |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
673 bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, GrowableArray<int> &ready_cnt, VectorSet &next_call) { |
0 | 674 // Already "sorted" are the block start Node (as the first entry), and |
675 // the block-ending Node and any trailing control projections. We leave | |
676 // these alone. PhiNodes and ParmNodes are made to follow the block start | |
677 // Node. Everything else gets topo-sorted. | |
678 | |
679 #ifndef PRODUCT | |
680 if (cfg->trace_opto_pipelining()) { | |
681 tty->print_cr("# --- schedule_local B%d, before: ---", _pre_order); | |
682 for (uint i = 0;i < _nodes.size();i++) { | |
683 tty->print("# "); | |
684 _nodes[i]->fast_dump(); | |
685 } | |
686 tty->print_cr("#"); | |
687 } | |
688 #endif | |
689 | |
690 // RootNode is already sorted | |
691 if( _nodes.size() == 1 ) return true; | |
692 | |
693 // Move PhiNodes and ParmNodes from 1 to cnt up to the start | |
694 uint node_cnt = end_idx(); | |
695 uint phi_cnt = 1; | |
696 uint i; | |
697 for( i = 1; i<node_cnt; i++ ) { // Scan for Phi | |
698 Node *n = _nodes[i]; | |
699 if( n->is_Phi() || // Found a PhiNode or ParmNode | |
700 (n->is_Proj() && n->in(0) == head()) ) { | |
701 // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt | |
702 _nodes.map(i,_nodes[phi_cnt]); | |
703 _nodes.map(phi_cnt++,n); // swap Phi/Parm up front | |
704 } else { // All others | |
705 // Count block-local inputs to 'n' | |
706 uint cnt = n->len(); // Input count | |
707 uint local = 0; | |
708 for( uint j=0; j<cnt; j++ ) { | |
709 Node *m = n->in(j); | |
710 if( m && cfg->_bbs[m->_idx] == this && !m->is_top() ) | |
711 local++; // One more block-local input | |
712 } | |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
713 ready_cnt.at_put(n->_idx, local); // Count em up |
0 | 714 |
3248
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
715 #ifdef ASSERT |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
125
diff
changeset
|
716 if( UseConcMarkSweepGC || UseG1GC ) { |
0 | 717 if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { |
3248
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
718 // Check the precedence edges |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
719 for (uint prec = n->req(); prec < n->len(); prec++) { |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
720 Node* oop_store = n->in(prec); |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
721 if (oop_store != NULL) { |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
722 assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
723 } |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
724 } |
0 | 725 } |
726 } | |
3248
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
727 #endif |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
728 |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
729 // A few node types require changing a required edge to a precedence edge |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
730 // before allocation. |
1100
f96a1a986f7b
6895383: JCK test throws NPE for method compiled with Escape Analysis
kvn
parents:
681
diff
changeset
|
731 if( n->is_Mach() && n->req() > TypeFunc::Parms && |
f96a1a986f7b
6895383: JCK test throws NPE for method compiled with Escape Analysis
kvn
parents:
681
diff
changeset
|
732 (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire || |
f96a1a986f7b
6895383: JCK test throws NPE for method compiled with Escape Analysis
kvn
parents:
681
diff
changeset
|
733 n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) { |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
734 // MemBarAcquire could be created without Precedent edge. |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
735 // del_req() replaces the specified edge with the last input edge |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
736 // and then removes the last edge. If the specified edge > number of |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
737 // edges the last edge will be moved outside of the input edges array |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
738 // and the edge will be lost. This is why this code should be |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
739 // executed only when Precedent (== TypeFunc::Parms) edge is present. |
0 | 740 Node *x = n->in(TypeFunc::Parms); |
741 n->del_req(TypeFunc::Parms); | |
742 n->add_prec(x); | |
743 } | |
744 } | |
745 } | |
746 for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count | |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
747 ready_cnt.at_put(_nodes[i2]->_idx, 0); |
0 | 748 |
749 // All the prescheduled guys do not hold back internal nodes | |
750 uint i3; | |
751 for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled | |
752 Node *n = _nodes[i3]; // Get pre-scheduled | |
753 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
754 Node* m = n->fast_out(j); | |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
755 if( cfg->_bbs[m->_idx] ==this ) { // Local-block user |
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
756 int m_cnt = ready_cnt.at(m->_idx)-1; |
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
757 ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count |
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
758 } |
0 | 759 } |
760 } | |
761 | |
762 Node_List delay; | |
763 // Make a worklist | |
764 Node_List worklist; | |
765 for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist | |
766 Node *m = _nodes[i4]; | |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
767 if( !ready_cnt.at(m->_idx) ) { // Zero ready count? |
0 | 768 if (m->is_iteratively_computed()) { |
769 // Push induction variable increments last to allow other uses | |
770 // of the phi to be scheduled first. The select() method breaks | |
771 // ties in scheduling by worklist order. | |
772 delay.push(m); | |
125
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
773 } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) { |
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
774 // Force the CreateEx to the top of the list so it's processed |
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
775 // first and ends up at the start of the block. |
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
776 worklist.insert(0, m); |
0 | 777 } else { |
778 worklist.push(m); // Then on to worklist! | |
779 } | |
780 } | |
781 } | |
782 while (delay.size()) { | |
783 Node* d = delay.pop(); | |
784 worklist.push(d); | |
785 } | |
786 | |
787 // Warm up the 'next_call' heuristic bits | |
788 needed_for_next_call(_nodes[0], next_call, cfg->_bbs); | |
789 | |
790 #ifndef PRODUCT | |
791 if (cfg->trace_opto_pipelining()) { | |
792 for (uint j=0; j<_nodes.size(); j++) { | |
793 Node *n = _nodes[j]; | |
794 int idx = n->_idx; | |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
795 tty->print("# ready cnt:%3d ", ready_cnt.at(idx)); |
1685 | 796 tty->print("latency:%3d ", cfg->_node_latency->at_grow(idx)); |
0 | 797 tty->print("%4d: %s\n", idx, n->Name()); |
798 } | |
799 } | |
800 #endif | |
801 | |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
802 uint max_idx = (uint)ready_cnt.length(); |
0 | 803 // Pull from worklist and schedule |
804 while( worklist.size() ) { // Worklist is not ready | |
805 | |
806 #ifndef PRODUCT | |
807 if (cfg->trace_opto_pipelining()) { | |
808 tty->print("# ready list:"); | |
809 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist | |
810 Node *n = worklist[i]; // Get Node on worklist | |
811 tty->print(" %d", n->_idx); | |
812 } | |
813 tty->cr(); | |
814 } | |
815 #endif | |
816 | |
817 // Select and pop a ready guy from worklist | |
818 Node* n = select(cfg, worklist, ready_cnt, next_call, phi_cnt); | |
819 _nodes.map(phi_cnt++,n); // Schedule him next | |
820 | |
821 #ifndef PRODUCT | |
822 if (cfg->trace_opto_pipelining()) { | |
823 tty->print("# select %d: %s", n->_idx, n->Name()); | |
1685 | 824 tty->print(", latency:%d", cfg->_node_latency->at_grow(n->_idx)); |
0 | 825 n->dump(); |
826 if (Verbose) { | |
827 tty->print("# ready list:"); | |
828 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist | |
829 Node *n = worklist[i]; // Get Node on worklist | |
830 tty->print(" %d", n->_idx); | |
831 } | |
832 tty->cr(); | |
833 } | |
834 } | |
835 | |
836 #endif | |
837 if( n->is_MachCall() ) { | |
838 MachCallNode *mcall = n->as_MachCall(); | |
839 phi_cnt = sched_call(matcher, cfg->_bbs, phi_cnt, worklist, ready_cnt, mcall, next_call); | |
840 continue; | |
841 } | |
4120
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
842 |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
843 if (n->is_Mach() && n->as_Mach()->has_call()) { |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
844 RegMask regs; |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
845 regs.Insert(matcher.c_frame_pointer()); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
846 regs.OR(n->out_RegMask()); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
847 |
6804
e626685e9f6c
7193318: C2: remove number of inputs requirement from Node's new operator
kvn
parents:
6179
diff
changeset
|
848 MachProjNode *proj = new (matcher.C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj ); |
4120
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
849 cfg->_bbs.map(proj->_idx,this); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
850 _nodes.insert(phi_cnt++, proj); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
851 |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
852 add_call_kills(proj, regs, matcher._c_reg_save_policy, false); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
853 } |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
854 |
0 | 855 // Children are now all ready |
856 for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { | |
857 Node* m = n->fast_out(i5); // Get user | |
858 if( cfg->_bbs[m->_idx] != this ) continue; | |
859 if( m->is_Phi() ) continue; | |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
860 if (m->_idx >= max_idx) { // new node, skip it |
4120
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
861 assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
862 continue; |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
863 } |
4820
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
864 int m_cnt = ready_cnt.at(m->_idx)-1; |
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
865 ready_cnt.at_put(m->_idx, m_cnt); |
cf407b7d3d78
7116050: C2/ARM: memory stomping error with DivideMcTests
roland
parents:
4120
diff
changeset
|
866 if( m_cnt == 0 ) |
0 | 867 worklist.push(m); |
868 } | |
869 } | |
870 | |
871 if( phi_cnt != end_idx() ) { | |
872 // did not schedule all. Retry, Bailout, or Die | |
873 Compile* C = matcher.C; | |
874 if (C->subsume_loads() == true && !C->failing()) { | |
875 // Retry with subsume_loads == false | |
876 // If this is the first failure, the sentinel string will "stick" | |
877 // to the Compile object, and the C2Compiler will see it and retry. | |
878 C->record_failure(C2Compiler::retry_no_subsuming_loads()); | |
879 } | |
880 // assert( phi_cnt == end_idx(), "did not schedule all" ); | |
881 return false; | |
882 } | |
883 | |
884 #ifndef PRODUCT | |
885 if (cfg->trace_opto_pipelining()) { | |
886 tty->print_cr("#"); | |
887 tty->print_cr("# after schedule_local"); | |
888 for (uint i = 0;i < _nodes.size();i++) { | |
889 tty->print("# "); | |
890 _nodes[i]->fast_dump(); | |
891 } | |
892 tty->cr(); | |
893 } | |
894 #endif | |
895 | |
896 | |
897 return true; | |
898 } | |
899 | |
900 //--------------------------catch_cleanup_fix_all_inputs----------------------- | |
901 static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) { | |
902 for (uint l = 0; l < use->len(); l++) { | |
903 if (use->in(l) == old_def) { | |
904 if (l < use->req()) { | |
905 use->set_req(l, new_def); | |
906 } else { | |
907 use->rm_prec(l); | |
908 use->add_prec(new_def); | |
909 l--; | |
910 } | |
911 } | |
912 } | |
913 } | |
914 | |
915 //------------------------------catch_cleanup_find_cloned_def------------------ | |
916 static Node *catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) { | |
917 assert( use_blk != def_blk, "Inter-block cleanup only"); | |
918 | |
919 // The use is some block below the Catch. Find and return the clone of the def | |
920 // that dominates the use. If there is no clone in a dominating block, then | |
921 // create a phi for the def in a dominating block. | |
922 | |
923 // Find which successor block dominates this use. The successor | |
924 // blocks must all be single-entry (from the Catch only; I will have | |
925 // split blocks to make this so), hence they all dominate. | |
926 while( use_blk->_dom_depth > def_blk->_dom_depth+1 ) | |
927 use_blk = use_blk->_idom; | |
928 | |
929 // Find the successor | |
930 Node *fixup = NULL; | |
931 | |
932 uint j; | |
933 for( j = 0; j < def_blk->_num_succs; j++ ) | |
934 if( use_blk == def_blk->_succs[j] ) | |
935 break; | |
936 | |
937 if( j == def_blk->_num_succs ) { | |
938 // Block at same level in dom-tree is not a successor. It needs a | |
939 // PhiNode, the PhiNode uses from the def and IT's uses need fixup. | |
940 Node_Array inputs = new Node_List(Thread::current()->resource_area()); | |
941 for(uint k = 1; k < use_blk->num_preds(); k++) { | |
942 inputs.map(k, catch_cleanup_find_cloned_def(bbs[use_blk->pred(k)->_idx], def, def_blk, bbs, n_clone_idx)); | |
943 } | |
944 | |
945 // Check to see if the use_blk already has an identical phi inserted. | |
946 // If it exists, it will be at the first position since all uses of a | |
947 // def are processed together. | |
948 Node *phi = use_blk->_nodes[1]; | |
949 if( phi->is_Phi() ) { | |
950 fixup = phi; | |
951 for (uint k = 1; k < use_blk->num_preds(); k++) { | |
952 if (phi->in(k) != inputs[k]) { | |
953 // Not a match | |
954 fixup = NULL; | |
955 break; | |
956 } | |
957 } | |
958 } | |
959 | |
960 // If an existing PhiNode was not found, make a new one. | |
961 if (fixup == NULL) { | |
962 Node *new_phi = PhiNode::make(use_blk->head(), def); | |
963 use_blk->_nodes.insert(1, new_phi); | |
964 bbs.map(new_phi->_idx, use_blk); | |
965 for (uint k = 1; k < use_blk->num_preds(); k++) { | |
966 new_phi->set_req(k, inputs[k]); | |
967 } | |
968 fixup = new_phi; | |
969 } | |
970 | |
971 } else { | |
972 // Found the use just below the Catch. Make it use the clone. | |
973 fixup = use_blk->_nodes[n_clone_idx]; | |
974 } | |
975 | |
976 return fixup; | |
977 } | |
978 | |
979 //--------------------------catch_cleanup_intra_block-------------------------- | |
980 // Fix all input edges in use that reference "def". The use is in the same | |
981 // block as the def and both have been cloned in each successor block. | |
982 static void catch_cleanup_intra_block(Node *use, Node *def, Block *blk, int beg, int n_clone_idx) { | |
983 | |
984 // Both the use and def have been cloned. For each successor block, | |
985 // get the clone of the use, and make its input the clone of the def | |
986 // found in that block. | |
987 | |
988 uint use_idx = blk->find_node(use); | |
989 uint offset_idx = use_idx - beg; | |
990 for( uint k = 0; k < blk->_num_succs; k++ ) { | |
991 // Get clone in each successor block | |
992 Block *sb = blk->_succs[k]; | |
993 Node *clone = sb->_nodes[offset_idx+1]; | |
994 assert( clone->Opcode() == use->Opcode(), "" ); | |
995 | |
996 // Make use-clone reference the def-clone | |
997 catch_cleanup_fix_all_inputs(clone, def, sb->_nodes[n_clone_idx]); | |
998 } | |
999 } | |
1000 | |
1001 //------------------------------catch_cleanup_inter_block--------------------- | |
1002 // Fix all input edges in use that reference "def". The use is in a different | |
1003 // block than the def. | |
1004 static void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) { | |
1005 if( !use_blk ) return; // Can happen if the use is a precedence edge | |
1006 | |
1007 Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, bbs, n_clone_idx); | |
1008 catch_cleanup_fix_all_inputs(use, def, new_def); | |
1009 } | |
1010 | |
1011 //------------------------------call_catch_cleanup----------------------------- | |
1012 // If we inserted any instructions between a Call and his CatchNode, | |
1013 // clone the instructions on all paths below the Catch. | |
7196
2aff40cb4703
7092905: C2: Keep track of the number of dead nodes
bharadwaj
parents:
6848
diff
changeset
|
1014 void Block::call_catch_cleanup(Block_Array &bbs, Compile* C) { |
0 | 1015 |
1016 // End of region to clone | |
1017 uint end = end_idx(); | |
1018 if( !_nodes[end]->is_Catch() ) return; | |
1019 // Start of region to clone | |
1020 uint beg = end; | |
3842 | 1021 while(!_nodes[beg-1]->is_MachProj() || |
1022 !_nodes[beg-1]->in(0)->is_MachCall() ) { | |
0 | 1023 beg--; |
1024 assert(beg > 0,"Catch cleanup walking beyond block boundary"); | |
1025 } | |
1026 // Range of inserted instructions is [beg, end) | |
1027 if( beg == end ) return; | |
1028 | |
1029 // Clone along all Catch output paths. Clone area between the 'beg' and | |
1030 // 'end' indices. | |
1031 for( uint i = 0; i < _num_succs; i++ ) { | |
1032 Block *sb = _succs[i]; | |
1033 // Clone the entire area; ignoring the edge fixup for now. | |
1034 for( uint j = end; j > beg; j-- ) { | |
1693
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
1035 // It is safe here to clone a node with anti_dependence |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
1036 // since clones dominate on each path. |
0 | 1037 Node *clone = _nodes[j-1]->clone(); |
1038 sb->_nodes.insert( 1, clone ); | |
1039 bbs.map(clone->_idx,sb); | |
1040 } | |
1041 } | |
1042 | |
1043 | |
1044 // Fixup edges. Check the def-use info per cloned Node | |
1045 for(uint i2 = beg; i2 < end; i2++ ) { | |
1046 uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block | |
1047 Node *n = _nodes[i2]; // Node that got cloned | |
1048 // Need DU safe iterator because of edge manipulation in calls. | |
1049 Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area()); | |
1050 for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) { | |
1051 out->push(n->fast_out(j1)); | |
1052 } | |
1053 uint max = out->size(); | |
1054 for (uint j = 0; j < max; j++) {// For all users | |
1055 Node *use = out->pop(); | |
1056 Block *buse = bbs[use->_idx]; | |
1057 if( use->is_Phi() ) { | |
1058 for( uint k = 1; k < use->req(); k++ ) | |
1059 if( use->in(k) == n ) { | |
1060 Node *fixup = catch_cleanup_find_cloned_def(bbs[buse->pred(k)->_idx], n, this, bbs, n_clone_idx); | |
1061 use->set_req(k, fixup); | |
1062 } | |
1063 } else { | |
1064 if (this == buse) { | |
1065 catch_cleanup_intra_block(use, n, this, beg, n_clone_idx); | |
1066 } else { | |
1067 catch_cleanup_inter_block(use, buse, n, this, bbs, n_clone_idx); | |
1068 } | |
1069 } | |
1070 } // End for all users | |
1071 | |
1072 } // End of for all Nodes in cloned area | |
1073 | |
1074 // Remove the now-dead cloned ops | |
1075 for(uint i3 = beg; i3 < end; i3++ ) { | |
7196
2aff40cb4703
7092905: C2: Keep track of the number of dead nodes
bharadwaj
parents:
6848
diff
changeset
|
1076 _nodes[beg]->disconnect_inputs(NULL, C); |
0 | 1077 _nodes.remove(beg); |
1078 } | |
1079 | |
1080 // If the successor blocks have a CreateEx node, move it back to the top | |
1081 for(uint i4 = 0; i4 < _num_succs; i4++ ) { | |
1082 Block *sb = _succs[i4]; | |
1083 uint new_cnt = end - beg; | |
1084 // Remove any newly created, but dead, nodes. | |
1085 for( uint j = new_cnt; j > 0; j-- ) { | |
1086 Node *n = sb->_nodes[j]; | |
1087 if (n->outcnt() == 0 && | |
1088 (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){ | |
7196
2aff40cb4703
7092905: C2: Keep track of the number of dead nodes
bharadwaj
parents:
6848
diff
changeset
|
1089 n->disconnect_inputs(NULL, C); |
0 | 1090 sb->_nodes.remove(j); |
1091 new_cnt--; | |
1092 } | |
1093 } | |
1094 // If any newly created nodes remain, move the CreateEx node to the top | |
1095 if (new_cnt > 0) { | |
1096 Node *cex = sb->_nodes[1+new_cnt]; | |
1097 if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) { | |
1098 sb->_nodes.remove(1+new_cnt); | |
1099 sb->_nodes.insert(1,cex); | |
1100 } | |
1101 } | |
1102 } | |
1103 } |