Mercurial > hg > truffle
annotate src/share/vm/opto/lcm.cpp @ 1994:6cd6d394f280
7001033: assert(gch->gc_cause() == GCCause::_scavenge_alot || !gch->incremental_collection_failed())
7002546: regression on SpecJbb2005 on 7b118 comparing to 7b117 on small heaps
Summary: Relaxed assertion checking related to incremental_collection_failed flag to allow for ExplicitGCInvokesConcurrent behaviour where we do not want a failing scavenge to bail to a stop-world collection. Parameterized incremental_collection_will_fail() so we can selectively use, or not use, as appropriate, the statistical prediction at specific use sites. This essentially reverts the scavenge bail-out logic to what it was prior to some recent changes that had inadvertently started using the statistical prediction which can be noisy in the presence of bursty loads. Added some associated verbose non-product debugging messages.
Reviewed-by: johnc, tonyp
author | ysr |
---|---|
date | Tue, 07 Dec 2010 21:55:53 -0800 |
parents | f95d63e2154a |
children | 7e88bdae86ec e6beb62de02d |
rev | line source |
---|---|
0 | 1 /* |
1748 | 2 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1151
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1151
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1151
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "memory/allocation.inline.hpp" | |
27 #include "opto/block.hpp" | |
28 #include "opto/c2compiler.hpp" | |
29 #include "opto/callnode.hpp" | |
30 #include "opto/cfgnode.hpp" | |
31 #include "opto/machnode.hpp" | |
32 #include "opto/runtime.hpp" | |
33 #ifdef TARGET_ARCH_MODEL_x86_32 | |
34 # include "adfiles/ad_x86_32.hpp" | |
35 #endif | |
36 #ifdef TARGET_ARCH_MODEL_x86_64 | |
37 # include "adfiles/ad_x86_64.hpp" | |
38 #endif | |
39 #ifdef TARGET_ARCH_MODEL_sparc | |
40 # include "adfiles/ad_sparc.hpp" | |
41 #endif | |
42 #ifdef TARGET_ARCH_MODEL_zero | |
43 # include "adfiles/ad_zero.hpp" | |
44 #endif | |
0 | 45 |
1972 | 46 // Optimization - Graph Style |
0 | 47 |
48 //------------------------------implicit_null_check---------------------------- | |
49 // Detect implicit-null-check opportunities. Basically, find NULL checks | |
50 // with suitable memory ops nearby. Use the memory op to do the NULL check. | |
51 // I can generate a memory op if there is not one nearby. | |
52 // The proj is the control projection for the not-null case. | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
53 // The val is the pointer being checked for nullness or |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
54 // decodeHeapOop_not_null node if it did not fold into address. |
0 | 55 void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons) { |
56 // Assume if null check need for 0 offset then always needed | |
57 // Intel solaris doesn't support any null checks yet and no | |
58 // mechanism exists (yet) to set the switches at an os_cpu level | |
59 if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return; | |
60 | |
61 // Make sure the ptr-is-null path appears to be uncommon! | |
62 float f = end()->as_MachIf()->_prob; | |
63 if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f; | |
64 if( f > PROB_UNLIKELY_MAG(4) ) return; | |
65 | |
66 uint bidx = 0; // Capture index of value into memop | |
67 bool was_store; // Memory op is a store op | |
68 | |
69 // Get the successor block for if the test ptr is non-null | |
70 Block* not_null_block; // this one goes with the proj | |
71 Block* null_block; | |
72 if (_nodes[_nodes.size()-1] == proj) { | |
73 null_block = _succs[0]; | |
74 not_null_block = _succs[1]; | |
75 } else { | |
76 assert(_nodes[_nodes.size()-2] == proj, "proj is one or the other"); | |
77 not_null_block = _succs[0]; | |
78 null_block = _succs[1]; | |
79 } | |
332 | 80 while (null_block->is_Empty() == Block::empty_with_goto) { |
81 null_block = null_block->_succs[0]; | |
82 } | |
0 | 83 |
84 // Search the exception block for an uncommon trap. | |
85 // (See Parse::do_if and Parse::do_ifnull for the reason | |
86 // we need an uncommon trap. Briefly, we need a way to | |
87 // detect failure of this optimization, as in 6366351.) | |
88 { | |
89 bool found_trap = false; | |
90 for (uint i1 = 0; i1 < null_block->_nodes.size(); i1++) { | |
91 Node* nn = null_block->_nodes[i1]; | |
92 if (nn->is_MachCall() && | |
1748 | 93 nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) { |
0 | 94 const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type(); |
95 if (trtype->isa_int() && trtype->is_int()->is_con()) { | |
96 jint tr_con = trtype->is_int()->get_con(); | |
97 Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con); | |
98 Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con); | |
99 assert((int)reason < (int)BitsPerInt, "recode bit map"); | |
100 if (is_set_nth_bit(allowed_reasons, (int) reason) | |
101 && action != Deoptimization::Action_none) { | |
102 // This uncommon trap is sure to recompile, eventually. | |
103 // When that happens, C->too_many_traps will prevent | |
104 // this transformation from happening again. | |
105 found_trap = true; | |
106 } | |
107 } | |
108 break; | |
109 } | |
110 } | |
111 if (!found_trap) { | |
112 // We did not find an uncommon trap. | |
113 return; | |
114 } | |
115 } | |
116 | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
117 // Check for decodeHeapOop_not_null node which did not fold into address |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
118 bool is_decoden = ((intptr_t)val) & 1; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
119 val = (Node*)(((intptr_t)val) & ~1); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
120 |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
121 assert(!is_decoden || (val->in(0) == NULL) && val->is_Mach() && |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
122 (val->as_Mach()->ideal_Opcode() == Op_DecodeN), "sanity"); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
123 |
0 | 124 // Search the successor block for a load or store who's base value is also |
125 // the tested value. There may be several. | |
126 Node_List *out = new Node_List(Thread::current()->resource_area()); | |
127 MachNode *best = NULL; // Best found so far | |
128 for (DUIterator i = val->outs(); val->has_out(i); i++) { | |
129 Node *m = val->out(i); | |
130 if( !m->is_Mach() ) continue; | |
131 MachNode *mach = m->as_Mach(); | |
132 was_store = false; | |
1693
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
133 int iop = mach->ideal_Opcode(); |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
134 switch( iop ) { |
0 | 135 case Op_LoadB: |
558
3b5ac9e7e6ea
6796746: rename LoadC (char) opcode class to LoadUS (unsigned short)
twisti
parents:
365
diff
changeset
|
136 case Op_LoadUS: |
0 | 137 case Op_LoadD: |
138 case Op_LoadF: | |
139 case Op_LoadI: | |
140 case Op_LoadL: | |
141 case Op_LoadP: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
142 case Op_LoadN: |
0 | 143 case Op_LoadS: |
144 case Op_LoadKlass: | |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
125
diff
changeset
|
145 case Op_LoadNKlass: |
0 | 146 case Op_LoadRange: |
147 case Op_LoadD_unaligned: | |
148 case Op_LoadL_unaligned: | |
1151
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
149 assert(mach->in(2) == val, "should be address"); |
0 | 150 break; |
151 case Op_StoreB: | |
152 case Op_StoreC: | |
153 case Op_StoreCM: | |
154 case Op_StoreD: | |
155 case Op_StoreF: | |
156 case Op_StoreI: | |
157 case Op_StoreL: | |
158 case Op_StoreP: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
159 case Op_StoreN: |
0 | 160 was_store = true; // Memory op is a store op |
161 // Stores will have their address in slot 2 (memory in slot 1). | |
162 // If the value being nul-checked is in another slot, it means we | |
163 // are storing the checked value, which does NOT check the value! | |
164 if( mach->in(2) != val ) continue; | |
165 break; // Found a memory op? | |
166 case Op_StrComp: | |
681 | 167 case Op_StrEquals: |
168 case Op_StrIndexOf: | |
169
9148c65abefc
6695049: (coll) Create an x86 intrinsic for Arrays.equals
rasbold
parents:
164
diff
changeset
|
169 case Op_AryEq: |
0 | 170 // Not a legit memory op for implicit null check regardless of |
171 // embedded loads | |
172 continue; | |
173 default: // Also check for embedded loads | |
174 if( !mach->needs_anti_dependence_check() ) | |
175 continue; // Not an memory op; skip it | |
1693
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
176 if( must_clone[iop] ) { |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
177 // Do not move nodes which produce flags because |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
178 // RA will try to clone it to place near branch and |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
179 // it will cause recompilation, see clone_node(). |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
180 continue; |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
181 } |
1151
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
182 { |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
183 // Check that value is used in memory address in |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
184 // instructions with embedded load (CmpP val1,(val2+off)). |
1151
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
185 Node* base; |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
186 Node* index; |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
187 const MachOper* oper = mach->memory_inputs(base, index); |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
188 if (oper == NULL || oper == (MachOper*)-1) { |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
189 continue; // Not an memory op; skip it |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
190 } |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
191 if (val == base || |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
192 val == index && val->bottom_type()->isa_narrowoop()) { |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
193 break; // Found it |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
194 } else { |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
195 continue; // Skip it |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
196 } |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
197 } |
0 | 198 break; |
199 } | |
200 // check if the offset is not too high for implicit exception | |
201 { | |
202 intptr_t offset = 0; | |
203 const TypePtr *adr_type = NULL; // Do not need this return value here | |
204 const Node* base = mach->get_base_and_disp(offset, adr_type); | |
205 if (base == NULL || base == NodeSentinel) { | |
332 | 206 // Narrow oop address doesn't have base, only index |
207 if( val->bottom_type()->isa_narrowoop() && | |
208 MacroAssembler::needs_explicit_null_check(offset) ) | |
209 continue; // Give up if offset is beyond page size | |
0 | 210 // cannot reason about it; is probably not implicit null exception |
211 } else { | |
642
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
212 const TypePtr* tptr; |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
213 if (UseCompressedOops && Universe::narrow_oop_shift() == 0) { |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
214 // 32-bits narrow oop can be the base of address expressions |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
215 tptr = base->bottom_type()->make_ptr(); |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
216 } else { |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
217 // only regular oops are expected here |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
218 tptr = base->bottom_type()->is_ptr(); |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
219 } |
0 | 220 // Give up if offset is not a compile-time constant |
221 if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot ) | |
222 continue; | |
223 offset += tptr->_offset; // correct if base is offseted | |
224 if( MacroAssembler::needs_explicit_null_check(offset) ) | |
225 continue; // Give up is reference is beyond 4K page size | |
226 } | |
227 } | |
228 | |
229 // Check ctrl input to see if the null-check dominates the memory op | |
230 Block *cb = cfg->_bbs[mach->_idx]; | |
231 cb = cb->_idom; // Always hoist at least 1 block | |
232 if( !was_store ) { // Stores can be hoisted only one block | |
233 while( cb->_dom_depth > (_dom_depth + 1)) | |
234 cb = cb->_idom; // Hoist loads as far as we want | |
235 // The non-null-block should dominate the memory op, too. Live | |
236 // range spilling will insert a spill in the non-null-block if it is | |
237 // needs to spill the memory op for an implicit null check. | |
238 if (cb->_dom_depth == (_dom_depth + 1)) { | |
239 if (cb != not_null_block) continue; | |
240 cb = cb->_idom; | |
241 } | |
242 } | |
243 if( cb != this ) continue; | |
244 | |
245 // Found a memory user; see if it can be hoisted to check-block | |
246 uint vidx = 0; // Capture index of value into memop | |
247 uint j; | |
248 for( j = mach->req()-1; j > 0; j-- ) { | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
249 if( mach->in(j) == val ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
250 vidx = j; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
251 // Ignore DecodeN val which could be hoisted to where needed. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
252 if( is_decoden ) continue; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
253 } |
0 | 254 // Block of memory-op input |
255 Block *inb = cfg->_bbs[mach->in(j)->_idx]; | |
256 Block *b = this; // Start from nul check | |
257 while( b != inb && b->_dom_depth > inb->_dom_depth ) | |
258 b = b->_idom; // search upwards for input | |
259 // See if input dominates null check | |
260 if( b != inb ) | |
261 break; | |
262 } | |
263 if( j > 0 ) | |
264 continue; | |
265 Block *mb = cfg->_bbs[mach->_idx]; | |
266 // Hoisting stores requires more checks for the anti-dependence case. | |
267 // Give up hoisting if we have to move the store past any load. | |
268 if( was_store ) { | |
269 Block *b = mb; // Start searching here for a local load | |
270 // mach use (faulting) trying to hoist | |
271 // n might be blocker to hoisting | |
272 while( b != this ) { | |
273 uint k; | |
274 for( k = 1; k < b->_nodes.size(); k++ ) { | |
275 Node *n = b->_nodes[k]; | |
276 if( n->needs_anti_dependence_check() && | |
277 n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) ) | |
278 break; // Found anti-dependent load | |
279 } | |
280 if( k < b->_nodes.size() ) | |
281 break; // Found anti-dependent load | |
282 // Make sure control does not do a merge (would have to check allpaths) | |
283 if( b->num_preds() != 2 ) break; | |
284 b = cfg->_bbs[b->pred(1)->_idx]; // Move up to predecessor block | |
285 } | |
286 if( b != this ) continue; | |
287 } | |
288 | |
289 // Make sure this memory op is not already being used for a NullCheck | |
290 Node *e = mb->end(); | |
291 if( e->is_MachNullCheck() && e->in(1) == mach ) | |
292 continue; // Already being used as a NULL check | |
293 | |
294 // Found a candidate! Pick one with least dom depth - the highest | |
295 // in the dom tree should be closest to the null check. | |
296 if( !best || | |
297 cfg->_bbs[mach->_idx]->_dom_depth < cfg->_bbs[best->_idx]->_dom_depth ) { | |
298 best = mach; | |
299 bidx = vidx; | |
300 | |
301 } | |
302 } | |
303 // No candidate! | |
304 if( !best ) return; | |
305 | |
306 // ---- Found an implicit null check | |
307 extern int implicit_null_checks; | |
308 implicit_null_checks++; | |
309 | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
310 if( is_decoden ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
311 // Check if we need to hoist decodeHeapOop_not_null first. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
312 Block *valb = cfg->_bbs[val->_idx]; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
313 if( this != valb && this->_dom_depth < valb->_dom_depth ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
314 // Hoist it up to the end of the test block. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
315 valb->find_remove(val); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
316 this->add_inst(val); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
317 cfg->_bbs.map(val->_idx,this); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
318 // DecodeN on x86 may kill flags. Check for flag-killing projections |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
319 // that also need to be hoisted. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
320 for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
321 Node* n = val->fast_out(j); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
322 if( n->Opcode() == Op_MachProj ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
323 cfg->_bbs[n->_idx]->find_remove(n); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
324 this->add_inst(n); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
325 cfg->_bbs.map(n->_idx,this); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
326 } |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
327 } |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
328 } |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
329 } |
0 | 330 // Hoist the memory candidate up to the end of the test block. |
331 Block *old_block = cfg->_bbs[best->_idx]; | |
332 old_block->find_remove(best); | |
333 add_inst(best); | |
334 cfg->_bbs.map(best->_idx,this); | |
335 | |
336 // Move the control dependence | |
337 if (best->in(0) && best->in(0) == old_block->_nodes[0]) | |
338 best->set_req(0, _nodes[0]); | |
339 | |
340 // Check for flag-killing projections that also need to be hoisted | |
341 // Should be DU safe because no edge updates. | |
342 for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) { | |
343 Node* n = best->fast_out(j); | |
344 if( n->Opcode() == Op_MachProj ) { | |
345 cfg->_bbs[n->_idx]->find_remove(n); | |
346 add_inst(n); | |
347 cfg->_bbs.map(n->_idx,this); | |
348 } | |
349 } | |
350 | |
351 Compile *C = cfg->C; | |
352 // proj==Op_True --> ne test; proj==Op_False --> eq test. | |
353 // One of two graph shapes got matched: | |
354 // (IfTrue (If (Bool NE (CmpP ptr NULL)))) | |
355 // (IfFalse (If (Bool EQ (CmpP ptr NULL)))) | |
356 // NULL checks are always branch-if-eq. If we see a IfTrue projection | |
357 // then we are replacing a 'ne' test with a 'eq' NULL check test. | |
358 // We need to flip the projections to keep the same semantics. | |
359 if( proj->Opcode() == Op_IfTrue ) { | |
360 // Swap order of projections in basic block to swap branch targets | |
361 Node *tmp1 = _nodes[end_idx()+1]; | |
362 Node *tmp2 = _nodes[end_idx()+2]; | |
363 _nodes.map(end_idx()+1, tmp2); | |
364 _nodes.map(end_idx()+2, tmp1); | |
365 Node *tmp = new (C, 1) Node(C->top()); // Use not NULL input | |
366 tmp1->replace_by(tmp); | |
367 tmp2->replace_by(tmp1); | |
368 tmp->replace_by(tmp2); | |
369 tmp->destruct(); | |
370 } | |
371 | |
372 // Remove the existing null check; use a new implicit null check instead. | |
373 // Since schedule-local needs precise def-use info, we need to correct | |
374 // it as well. | |
375 Node *old_tst = proj->in(0); | |
376 MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx); | |
377 _nodes.map(end_idx(),nul_chk); | |
378 cfg->_bbs.map(nul_chk->_idx,this); | |
379 // Redirect users of old_test to nul_chk | |
380 for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2) | |
381 old_tst->last_out(i2)->set_req(0, nul_chk); | |
382 // Clean-up any dead code | |
383 for (uint i3 = 0; i3 < old_tst->req(); i3++) | |
384 old_tst->set_req(i3, NULL); | |
385 | |
386 cfg->latency_from_uses(nul_chk); | |
387 cfg->latency_from_uses(best); | |
388 } | |
389 | |
390 | |
391 //------------------------------select----------------------------------------- | |
392 // Select a nice fellow from the worklist to schedule next. If there is only | |
393 // one choice, then use it. Projections take top priority for correctness | |
394 // reasons - if I see a projection, then it is next. There are a number of | |
395 // other special cases, for instructions that consume condition codes, et al. | |
396 // These are chosen immediately. Some instructions are required to immediately | |
397 // precede the last instruction in the block, and these are taken last. Of the | |
398 // remaining cases (most), choose the instruction with the greatest latency | |
399 // (that is, the most number of pseudo-cycles required to the end of the | |
400 // routine). If there is a tie, choose the instruction with the most inputs. | |
401 Node *Block::select(PhaseCFG *cfg, Node_List &worklist, int *ready_cnt, VectorSet &next_call, uint sched_slot) { | |
402 | |
403 // If only a single entry on the stack, use it | |
404 uint cnt = worklist.size(); | |
405 if (cnt == 1) { | |
406 Node *n = worklist[0]; | |
407 worklist.map(0,worklist.pop()); | |
408 return n; | |
409 } | |
410 | |
411 uint choice = 0; // Bigger is most important | |
412 uint latency = 0; // Bigger is scheduled first | |
413 uint score = 0; // Bigger is better | |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
414 int idx = -1; // Index in worklist |
0 | 415 |
416 for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist | |
417 // Order in worklist is used to break ties. | |
418 // See caller for how this is used to delay scheduling | |
419 // of induction variable increments to after the other | |
420 // uses of the phi are scheduled. | |
421 Node *n = worklist[i]; // Get Node on worklist | |
422 | |
423 int iop = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : 0; | |
424 if( n->is_Proj() || // Projections always win | |
425 n->Opcode()== Op_Con || // So does constant 'Top' | |
426 iop == Op_CreateEx || // Create-exception must start block | |
427 iop == Op_CheckCastPP | |
428 ) { | |
429 worklist.map(i,worklist.pop()); | |
430 return n; | |
431 } | |
432 | |
433 // Final call in a block must be adjacent to 'catch' | |
434 Node *e = end(); | |
435 if( e->is_Catch() && e->in(0)->in(0) == n ) | |
436 continue; | |
437 | |
438 // Memory op for an implicit null check has to be at the end of the block | |
439 if( e->is_MachNullCheck() && e->in(1) == n ) | |
440 continue; | |
441 | |
442 uint n_choice = 2; | |
443 | |
444 // See if this instruction is consumed by a branch. If so, then (as the | |
445 // branch is the last instruction in the basic block) force it to the | |
446 // end of the basic block | |
447 if ( must_clone[iop] ) { | |
448 // See if any use is a branch | |
449 bool found_machif = false; | |
450 | |
451 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
452 Node* use = n->fast_out(j); | |
453 | |
454 // The use is a conditional branch, make them adjacent | |
455 if (use->is_MachIf() && cfg->_bbs[use->_idx]==this ) { | |
456 found_machif = true; | |
457 break; | |
458 } | |
459 | |
460 // More than this instruction pending for successor to be ready, | |
461 // don't choose this if other opportunities are ready | |
462 if (ready_cnt[use->_idx] > 1) | |
463 n_choice = 1; | |
464 } | |
465 | |
466 // loop terminated, prefer not to use this instruction | |
467 if (found_machif) | |
468 continue; | |
469 } | |
470 | |
471 // See if this has a predecessor that is "must_clone", i.e. sets the | |
472 // condition code. If so, choose this first | |
473 for (uint j = 0; j < n->req() ; j++) { | |
474 Node *inn = n->in(j); | |
475 if (inn) { | |
476 if (inn->is_Mach() && must_clone[inn->as_Mach()->ideal_Opcode()] ) { | |
477 n_choice = 3; | |
478 break; | |
479 } | |
480 } | |
481 } | |
482 | |
483 // MachTemps should be scheduled last so they are near their uses | |
484 if (n->is_MachTemp()) { | |
485 n_choice = 1; | |
486 } | |
487 | |
1685 | 488 uint n_latency = cfg->_node_latency->at_grow(n->_idx); |
0 | 489 uint n_score = n->req(); // Many inputs get high score to break ties |
490 | |
491 // Keep best latency found | |
492 if( choice < n_choice || | |
493 ( choice == n_choice && | |
494 ( latency < n_latency || | |
495 ( latency == n_latency && | |
496 ( score < n_score ))))) { | |
497 choice = n_choice; | |
498 latency = n_latency; | |
499 score = n_score; | |
500 idx = i; // Also keep index in worklist | |
501 } | |
502 } // End of for all ready nodes in worklist | |
503 | |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
504 assert(idx >= 0, "index should be set"); |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
505 Node *n = worklist[(uint)idx]; // Get the winner |
0 | 506 |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
507 worklist.map((uint)idx, worklist.pop()); // Compress worklist |
0 | 508 return n; |
509 } | |
510 | |
511 | |
512 //------------------------------set_next_call---------------------------------- | |
513 void Block::set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs ) { | |
514 if( next_call.test_set(n->_idx) ) return; | |
515 for( uint i=0; i<n->len(); i++ ) { | |
516 Node *m = n->in(i); | |
517 if( !m ) continue; // must see all nodes in block that precede call | |
518 if( bbs[m->_idx] == this ) | |
519 set_next_call( m, next_call, bbs ); | |
520 } | |
521 } | |
522 | |
523 //------------------------------needed_for_next_call--------------------------- | |
524 // Set the flag 'next_call' for each Node that is needed for the next call to | |
525 // be scheduled. This flag lets me bias scheduling so Nodes needed for the | |
526 // next subroutine call get priority - basically it moves things NOT needed | |
527 // for the next call till after the call. This prevents me from trying to | |
528 // carry lots of stuff live across a call. | |
529 void Block::needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs) { | |
530 // Find the next control-defining Node in this block | |
531 Node* call = NULL; | |
532 for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) { | |
533 Node* m = this_call->fast_out(i); | |
534 if( bbs[m->_idx] == this && // Local-block user | |
535 m != this_call && // Not self-start node | |
536 m->is_Call() ) | |
537 call = m; | |
538 break; | |
539 } | |
540 if (call == NULL) return; // No next call (e.g., block end is near) | |
541 // Set next-call for all inputs to this call | |
542 set_next_call(call, next_call, bbs); | |
543 } | |
544 | |
545 //------------------------------sched_call------------------------------------- | |
546 uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { | |
547 RegMask regs; | |
548 | |
549 // Schedule all the users of the call right now. All the users are | |
550 // projection Nodes, so they must be scheduled next to the call. | |
551 // Collect all the defined registers. | |
552 for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { | |
553 Node* n = mcall->fast_out(i); | |
554 assert( n->Opcode()==Op_MachProj, "" ); | |
555 --ready_cnt[n->_idx]; | |
556 assert( !ready_cnt[n->_idx], "" ); | |
557 // Schedule next to call | |
558 _nodes.map(node_cnt++, n); | |
559 // Collect defined registers | |
560 regs.OR(n->out_RegMask()); | |
561 // Check for scheduling the next control-definer | |
562 if( n->bottom_type() == Type::CONTROL ) | |
563 // Warm up next pile of heuristic bits | |
564 needed_for_next_call(n, next_call, bbs); | |
565 | |
566 // Children of projections are now all ready | |
567 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
568 Node* m = n->fast_out(j); // Get user | |
569 if( bbs[m->_idx] != this ) continue; | |
570 if( m->is_Phi() ) continue; | |
571 if( !--ready_cnt[m->_idx] ) | |
572 worklist.push(m); | |
573 } | |
574 | |
575 } | |
576 | |
577 // Act as if the call defines the Frame Pointer. | |
578 // Certainly the FP is alive and well after the call. | |
579 regs.Insert(matcher.c_frame_pointer()); | |
580 | |
581 // Set all registers killed and not already defined by the call. | |
582 uint r_cnt = mcall->tf()->range()->cnt(); | |
583 int op = mcall->ideal_Opcode(); | |
584 MachProjNode *proj = new (matcher.C, 1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); | |
585 bbs.map(proj->_idx,this); | |
586 _nodes.insert(node_cnt++, proj); | |
587 | |
588 // Select the right register save policy. | |
589 const char * save_policy; | |
590 switch (op) { | |
591 case Op_CallRuntime: | |
592 case Op_CallLeaf: | |
593 case Op_CallLeafNoFP: | |
594 // Calling C code so use C calling convention | |
595 save_policy = matcher._c_reg_save_policy; | |
596 break; | |
597 | |
598 case Op_CallStaticJava: | |
599 case Op_CallDynamicJava: | |
600 // Calling Java code so use Java calling convention | |
601 save_policy = matcher._register_save_policy; | |
602 break; | |
603 | |
604 default: | |
605 ShouldNotReachHere(); | |
606 } | |
607 | |
608 // When using CallRuntime mark SOE registers as killed by the call | |
609 // so values that could show up in the RegisterMap aren't live in a | |
610 // callee saved register since the register wouldn't know where to | |
611 // find them. CallLeaf and CallLeafNoFP are ok because they can't | |
612 // have debug info on them. Strictly speaking this only needs to be | |
613 // done for oops since idealreg2debugmask takes care of debug info | |
614 // references but there no way to handle oops differently than other | |
615 // pointers as far as the kill mask goes. | |
616 bool exclude_soe = op == Op_CallRuntime; | |
617 | |
1137
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
618 // If the call is a MethodHandle invoke, we need to exclude the |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
619 // register which is used to save the SP value over MH invokes from |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
620 // the mask. Otherwise this register could be used for |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
621 // deoptimization information. |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
622 if (op == Op_CallStaticJava) { |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
623 MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall; |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
624 if (mcallstaticjava->_method_handle_invoke) |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
625 proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask()); |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
626 } |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
627 |
0 | 628 // Fill in the kill mask for the call |
629 for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { | |
630 if( !regs.Member(r) ) { // Not already defined by the call | |
631 // Save-on-call register? | |
632 if ((save_policy[r] == 'C') || | |
633 (save_policy[r] == 'A') || | |
634 ((save_policy[r] == 'E') && exclude_soe)) { | |
635 proj->_rout.Insert(r); | |
636 } | |
637 } | |
638 } | |
639 | |
640 return node_cnt; | |
641 } | |
642 | |
643 | |
644 //------------------------------schedule_local--------------------------------- | |
645 // Topological sort within a block. Someday become a real scheduler. | |
646 bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, int *ready_cnt, VectorSet &next_call) { | |
647 // Already "sorted" are the block start Node (as the first entry), and | |
648 // the block-ending Node and any trailing control projections. We leave | |
649 // these alone. PhiNodes and ParmNodes are made to follow the block start | |
650 // Node. Everything else gets topo-sorted. | |
651 | |
652 #ifndef PRODUCT | |
653 if (cfg->trace_opto_pipelining()) { | |
654 tty->print_cr("# --- schedule_local B%d, before: ---", _pre_order); | |
655 for (uint i = 0;i < _nodes.size();i++) { | |
656 tty->print("# "); | |
657 _nodes[i]->fast_dump(); | |
658 } | |
659 tty->print_cr("#"); | |
660 } | |
661 #endif | |
662 | |
663 // RootNode is already sorted | |
664 if( _nodes.size() == 1 ) return true; | |
665 | |
666 // Move PhiNodes and ParmNodes from 1 to cnt up to the start | |
667 uint node_cnt = end_idx(); | |
668 uint phi_cnt = 1; | |
669 uint i; | |
670 for( i = 1; i<node_cnt; i++ ) { // Scan for Phi | |
671 Node *n = _nodes[i]; | |
672 if( n->is_Phi() || // Found a PhiNode or ParmNode | |
673 (n->is_Proj() && n->in(0) == head()) ) { | |
674 // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt | |
675 _nodes.map(i,_nodes[phi_cnt]); | |
676 _nodes.map(phi_cnt++,n); // swap Phi/Parm up front | |
677 } else { // All others | |
678 // Count block-local inputs to 'n' | |
679 uint cnt = n->len(); // Input count | |
680 uint local = 0; | |
681 for( uint j=0; j<cnt; j++ ) { | |
682 Node *m = n->in(j); | |
683 if( m && cfg->_bbs[m->_idx] == this && !m->is_top() ) | |
684 local++; // One more block-local input | |
685 } | |
686 ready_cnt[n->_idx] = local; // Count em up | |
687 | |
688 // A few node types require changing a required edge to a precedence edge | |
689 // before allocation. | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
125
diff
changeset
|
690 if( UseConcMarkSweepGC || UseG1GC ) { |
0 | 691 if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { |
692 // Note: Required edges with an index greater than oper_input_base | |
693 // are not supported by the allocator. | |
694 // Note2: Can only depend on unmatched edge being last, | |
695 // can not depend on its absolute position. | |
696 Node *oop_store = n->in(n->req() - 1); | |
697 n->del_req(n->req() - 1); | |
698 n->add_prec(oop_store); | |
699 assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); | |
700 } | |
701 } | |
1100
f96a1a986f7b
6895383: JCK test throws NPE for method compiled with Escape Analysis
kvn
parents:
681
diff
changeset
|
702 if( n->is_Mach() && n->req() > TypeFunc::Parms && |
f96a1a986f7b
6895383: JCK test throws NPE for method compiled with Escape Analysis
kvn
parents:
681
diff
changeset
|
703 (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire || |
f96a1a986f7b
6895383: JCK test throws NPE for method compiled with Escape Analysis
kvn
parents:
681
diff
changeset
|
704 n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) { |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
705 // MemBarAcquire could be created without Precedent edge. |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
706 // del_req() replaces the specified edge with the last input edge |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
707 // and then removes the last edge. If the specified edge > number of |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
708 // edges the last edge will be moved outside of the input edges array |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
709 // and the edge will be lost. This is why this code should be |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
710 // executed only when Precedent (== TypeFunc::Parms) edge is present. |
0 | 711 Node *x = n->in(TypeFunc::Parms); |
712 n->del_req(TypeFunc::Parms); | |
713 n->add_prec(x); | |
714 } | |
715 } | |
716 } | |
717 for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count | |
718 ready_cnt[_nodes[i2]->_idx] = 0; | |
719 | |
720 // All the prescheduled guys do not hold back internal nodes | |
721 uint i3; | |
722 for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled | |
723 Node *n = _nodes[i3]; // Get pre-scheduled | |
724 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
725 Node* m = n->fast_out(j); | |
726 if( cfg->_bbs[m->_idx] ==this ) // Local-block user | |
727 ready_cnt[m->_idx]--; // Fix ready count | |
728 } | |
729 } | |
730 | |
731 Node_List delay; | |
732 // Make a worklist | |
733 Node_List worklist; | |
734 for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist | |
735 Node *m = _nodes[i4]; | |
736 if( !ready_cnt[m->_idx] ) { // Zero ready count? | |
737 if (m->is_iteratively_computed()) { | |
738 // Push induction variable increments last to allow other uses | |
739 // of the phi to be scheduled first. The select() method breaks | |
740 // ties in scheduling by worklist order. | |
741 delay.push(m); | |
125
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
742 } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) { |
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
743 // Force the CreateEx to the top of the list so it's processed |
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
744 // first and ends up at the start of the block. |
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
745 worklist.insert(0, m); |
0 | 746 } else { |
747 worklist.push(m); // Then on to worklist! | |
748 } | |
749 } | |
750 } | |
751 while (delay.size()) { | |
752 Node* d = delay.pop(); | |
753 worklist.push(d); | |
754 } | |
755 | |
756 // Warm up the 'next_call' heuristic bits | |
757 needed_for_next_call(_nodes[0], next_call, cfg->_bbs); | |
758 | |
759 #ifndef PRODUCT | |
760 if (cfg->trace_opto_pipelining()) { | |
761 for (uint j=0; j<_nodes.size(); j++) { | |
762 Node *n = _nodes[j]; | |
763 int idx = n->_idx; | |
764 tty->print("# ready cnt:%3d ", ready_cnt[idx]); | |
1685 | 765 tty->print("latency:%3d ", cfg->_node_latency->at_grow(idx)); |
0 | 766 tty->print("%4d: %s\n", idx, n->Name()); |
767 } | |
768 } | |
769 #endif | |
770 | |
771 // Pull from worklist and schedule | |
772 while( worklist.size() ) { // Worklist is not ready | |
773 | |
774 #ifndef PRODUCT | |
775 if (cfg->trace_opto_pipelining()) { | |
776 tty->print("# ready list:"); | |
777 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist | |
778 Node *n = worklist[i]; // Get Node on worklist | |
779 tty->print(" %d", n->_idx); | |
780 } | |
781 tty->cr(); | |
782 } | |
783 #endif | |
784 | |
785 // Select and pop a ready guy from worklist | |
786 Node* n = select(cfg, worklist, ready_cnt, next_call, phi_cnt); | |
787 _nodes.map(phi_cnt++,n); // Schedule him next | |
788 | |
789 #ifndef PRODUCT | |
790 if (cfg->trace_opto_pipelining()) { | |
791 tty->print("# select %d: %s", n->_idx, n->Name()); | |
1685 | 792 tty->print(", latency:%d", cfg->_node_latency->at_grow(n->_idx)); |
0 | 793 n->dump(); |
794 if (Verbose) { | |
795 tty->print("# ready list:"); | |
796 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist | |
797 Node *n = worklist[i]; // Get Node on worklist | |
798 tty->print(" %d", n->_idx); | |
799 } | |
800 tty->cr(); | |
801 } | |
802 } | |
803 | |
804 #endif | |
805 if( n->is_MachCall() ) { | |
806 MachCallNode *mcall = n->as_MachCall(); | |
807 phi_cnt = sched_call(matcher, cfg->_bbs, phi_cnt, worklist, ready_cnt, mcall, next_call); | |
808 continue; | |
809 } | |
810 // Children are now all ready | |
811 for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { | |
812 Node* m = n->fast_out(i5); // Get user | |
813 if( cfg->_bbs[m->_idx] != this ) continue; | |
814 if( m->is_Phi() ) continue; | |
815 if( !--ready_cnt[m->_idx] ) | |
816 worklist.push(m); | |
817 } | |
818 } | |
819 | |
820 if( phi_cnt != end_idx() ) { | |
821 // did not schedule all. Retry, Bailout, or Die | |
822 Compile* C = matcher.C; | |
823 if (C->subsume_loads() == true && !C->failing()) { | |
824 // Retry with subsume_loads == false | |
825 // If this is the first failure, the sentinel string will "stick" | |
826 // to the Compile object, and the C2Compiler will see it and retry. | |
827 C->record_failure(C2Compiler::retry_no_subsuming_loads()); | |
828 } | |
829 // assert( phi_cnt == end_idx(), "did not schedule all" ); | |
830 return false; | |
831 } | |
832 | |
833 #ifndef PRODUCT | |
834 if (cfg->trace_opto_pipelining()) { | |
835 tty->print_cr("#"); | |
836 tty->print_cr("# after schedule_local"); | |
837 for (uint i = 0;i < _nodes.size();i++) { | |
838 tty->print("# "); | |
839 _nodes[i]->fast_dump(); | |
840 } | |
841 tty->cr(); | |
842 } | |
843 #endif | |
844 | |
845 | |
846 return true; | |
847 } | |
848 | |
849 //--------------------------catch_cleanup_fix_all_inputs----------------------- | |
850 static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) { | |
851 for (uint l = 0; l < use->len(); l++) { | |
852 if (use->in(l) == old_def) { | |
853 if (l < use->req()) { | |
854 use->set_req(l, new_def); | |
855 } else { | |
856 use->rm_prec(l); | |
857 use->add_prec(new_def); | |
858 l--; | |
859 } | |
860 } | |
861 } | |
862 } | |
863 | |
864 //------------------------------catch_cleanup_find_cloned_def------------------ | |
865 static Node *catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) { | |
866 assert( use_blk != def_blk, "Inter-block cleanup only"); | |
867 | |
868 // The use is some block below the Catch. Find and return the clone of the def | |
869 // that dominates the use. If there is no clone in a dominating block, then | |
870 // create a phi for the def in a dominating block. | |
871 | |
872 // Find which successor block dominates this use. The successor | |
873 // blocks must all be single-entry (from the Catch only; I will have | |
874 // split blocks to make this so), hence they all dominate. | |
875 while( use_blk->_dom_depth > def_blk->_dom_depth+1 ) | |
876 use_blk = use_blk->_idom; | |
877 | |
878 // Find the successor | |
879 Node *fixup = NULL; | |
880 | |
881 uint j; | |
882 for( j = 0; j < def_blk->_num_succs; j++ ) | |
883 if( use_blk == def_blk->_succs[j] ) | |
884 break; | |
885 | |
886 if( j == def_blk->_num_succs ) { | |
887 // Block at same level in dom-tree is not a successor. It needs a | |
888 // PhiNode, the PhiNode uses from the def and IT's uses need fixup. | |
889 Node_Array inputs = new Node_List(Thread::current()->resource_area()); | |
890 for(uint k = 1; k < use_blk->num_preds(); k++) { | |
891 inputs.map(k, catch_cleanup_find_cloned_def(bbs[use_blk->pred(k)->_idx], def, def_blk, bbs, n_clone_idx)); | |
892 } | |
893 | |
894 // Check to see if the use_blk already has an identical phi inserted. | |
895 // If it exists, it will be at the first position since all uses of a | |
896 // def are processed together. | |
897 Node *phi = use_blk->_nodes[1]; | |
898 if( phi->is_Phi() ) { | |
899 fixup = phi; | |
900 for (uint k = 1; k < use_blk->num_preds(); k++) { | |
901 if (phi->in(k) != inputs[k]) { | |
902 // Not a match | |
903 fixup = NULL; | |
904 break; | |
905 } | |
906 } | |
907 } | |
908 | |
909 // If an existing PhiNode was not found, make a new one. | |
910 if (fixup == NULL) { | |
911 Node *new_phi = PhiNode::make(use_blk->head(), def); | |
912 use_blk->_nodes.insert(1, new_phi); | |
913 bbs.map(new_phi->_idx, use_blk); | |
914 for (uint k = 1; k < use_blk->num_preds(); k++) { | |
915 new_phi->set_req(k, inputs[k]); | |
916 } | |
917 fixup = new_phi; | |
918 } | |
919 | |
920 } else { | |
921 // Found the use just below the Catch. Make it use the clone. | |
922 fixup = use_blk->_nodes[n_clone_idx]; | |
923 } | |
924 | |
925 return fixup; | |
926 } | |
927 | |
928 //--------------------------catch_cleanup_intra_block-------------------------- | |
929 // Fix all input edges in use that reference "def". The use is in the same | |
930 // block as the def and both have been cloned in each successor block. | |
931 static void catch_cleanup_intra_block(Node *use, Node *def, Block *blk, int beg, int n_clone_idx) { | |
932 | |
933 // Both the use and def have been cloned. For each successor block, | |
934 // get the clone of the use, and make its input the clone of the def | |
935 // found in that block. | |
936 | |
937 uint use_idx = blk->find_node(use); | |
938 uint offset_idx = use_idx - beg; | |
939 for( uint k = 0; k < blk->_num_succs; k++ ) { | |
940 // Get clone in each successor block | |
941 Block *sb = blk->_succs[k]; | |
942 Node *clone = sb->_nodes[offset_idx+1]; | |
943 assert( clone->Opcode() == use->Opcode(), "" ); | |
944 | |
945 // Make use-clone reference the def-clone | |
946 catch_cleanup_fix_all_inputs(clone, def, sb->_nodes[n_clone_idx]); | |
947 } | |
948 } | |
949 | |
950 //------------------------------catch_cleanup_inter_block--------------------- | |
951 // Fix all input edges in use that reference "def". The use is in a different | |
952 // block than the def. | |
953 static void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) { | |
954 if( !use_blk ) return; // Can happen if the use is a precedence edge | |
955 | |
956 Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, bbs, n_clone_idx); | |
957 catch_cleanup_fix_all_inputs(use, def, new_def); | |
958 } | |
959 | |
960 //------------------------------call_catch_cleanup----------------------------- | |
961 // If we inserted any instructions between a Call and his CatchNode, | |
962 // clone the instructions on all paths below the Catch. | |
963 void Block::call_catch_cleanup(Block_Array &bbs) { | |
964 | |
965 // End of region to clone | |
966 uint end = end_idx(); | |
967 if( !_nodes[end]->is_Catch() ) return; | |
968 // Start of region to clone | |
969 uint beg = end; | |
970 while( _nodes[beg-1]->Opcode() != Op_MachProj || | |
971 !_nodes[beg-1]->in(0)->is_Call() ) { | |
972 beg--; | |
973 assert(beg > 0,"Catch cleanup walking beyond block boundary"); | |
974 } | |
975 // Range of inserted instructions is [beg, end) | |
976 if( beg == end ) return; | |
977 | |
978 // Clone along all Catch output paths. Clone area between the 'beg' and | |
979 // 'end' indices. | |
980 for( uint i = 0; i < _num_succs; i++ ) { | |
981 Block *sb = _succs[i]; | |
982 // Clone the entire area; ignoring the edge fixup for now. | |
983 for( uint j = end; j > beg; j-- ) { | |
1693
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
984 // It is safe here to clone a node with anti_dependence |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
985 // since clones dominate on each path. |
0 | 986 Node *clone = _nodes[j-1]->clone(); |
987 sb->_nodes.insert( 1, clone ); | |
988 bbs.map(clone->_idx,sb); | |
989 } | |
990 } | |
991 | |
992 | |
993 // Fixup edges. Check the def-use info per cloned Node | |
994 for(uint i2 = beg; i2 < end; i2++ ) { | |
995 uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block | |
996 Node *n = _nodes[i2]; // Node that got cloned | |
997 // Need DU safe iterator because of edge manipulation in calls. | |
998 Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area()); | |
999 for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) { | |
1000 out->push(n->fast_out(j1)); | |
1001 } | |
1002 uint max = out->size(); | |
1003 for (uint j = 0; j < max; j++) {// For all users | |
1004 Node *use = out->pop(); | |
1005 Block *buse = bbs[use->_idx]; | |
1006 if( use->is_Phi() ) { | |
1007 for( uint k = 1; k < use->req(); k++ ) | |
1008 if( use->in(k) == n ) { | |
1009 Node *fixup = catch_cleanup_find_cloned_def(bbs[buse->pred(k)->_idx], n, this, bbs, n_clone_idx); | |
1010 use->set_req(k, fixup); | |
1011 } | |
1012 } else { | |
1013 if (this == buse) { | |
1014 catch_cleanup_intra_block(use, n, this, beg, n_clone_idx); | |
1015 } else { | |
1016 catch_cleanup_inter_block(use, buse, n, this, bbs, n_clone_idx); | |
1017 } | |
1018 } | |
1019 } // End for all users | |
1020 | |
1021 } // End of for all Nodes in cloned area | |
1022 | |
1023 // Remove the now-dead cloned ops | |
1024 for(uint i3 = beg; i3 < end; i3++ ) { | |
1025 _nodes[beg]->disconnect_inputs(NULL); | |
1026 _nodes.remove(beg); | |
1027 } | |
1028 | |
1029 // If the successor blocks have a CreateEx node, move it back to the top | |
1030 for(uint i4 = 0; i4 < _num_succs; i4++ ) { | |
1031 Block *sb = _succs[i4]; | |
1032 uint new_cnt = end - beg; | |
1033 // Remove any newly created, but dead, nodes. | |
1034 for( uint j = new_cnt; j > 0; j-- ) { | |
1035 Node *n = sb->_nodes[j]; | |
1036 if (n->outcnt() == 0 && | |
1037 (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){ | |
1038 n->disconnect_inputs(NULL); | |
1039 sb->_nodes.remove(j); | |
1040 new_cnt--; | |
1041 } | |
1042 } | |
1043 // If any newly created nodes remain, move the CreateEx node to the top | |
1044 if (new_cnt > 0) { | |
1045 Node *cex = sb->_nodes[1+new_cnt]; | |
1046 if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) { | |
1047 sb->_nodes.remove(1+new_cnt); | |
1048 sb->_nodes.insert(1,cex); | |
1049 } | |
1050 } | |
1051 } | |
1052 } |