Mercurial > hg > truffle
annotate src/share/vm/opto/lcm.cpp @ 4710:41406797186b
7113012: G1: rename not-fully-young GCs as "mixed"
Summary: Renamed partially-young GCs as mixed and fully-young GCs as young. Change all external output that includes those terms (GC log and GC ergo log) as well as any comments, fields, methods, etc. The changeset also includes very minor code tidying up (added some curly brackets).
Reviewed-by: johnc, brutisso
author | tonyp |
---|---|
date | Fri, 16 Dec 2011 02:14:27 -0500 |
parents | f03a3c8bd5e5 |
children | cf407b7d3d78 |
rev | line source |
---|---|
0 | 1 /* |
2426
1d1603768966
7010070: Update all 2010 Oracle-changed OpenJDK files to have the proper copyright dates - second pass
trims
parents:
2401
diff
changeset
|
2 * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1151
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1151
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1151
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "memory/allocation.inline.hpp" | |
27 #include "opto/block.hpp" | |
28 #include "opto/c2compiler.hpp" | |
29 #include "opto/callnode.hpp" | |
30 #include "opto/cfgnode.hpp" | |
31 #include "opto/machnode.hpp" | |
32 #include "opto/runtime.hpp" | |
33 #ifdef TARGET_ARCH_MODEL_x86_32 | |
34 # include "adfiles/ad_x86_32.hpp" | |
35 #endif | |
36 #ifdef TARGET_ARCH_MODEL_x86_64 | |
37 # include "adfiles/ad_x86_64.hpp" | |
38 #endif | |
39 #ifdef TARGET_ARCH_MODEL_sparc | |
40 # include "adfiles/ad_sparc.hpp" | |
41 #endif | |
42 #ifdef TARGET_ARCH_MODEL_zero | |
43 # include "adfiles/ad_zero.hpp" | |
44 #endif | |
2401
7e88bdae86ec
7029017: Additional architecture support for c2 compiler
roland
parents:
1972
diff
changeset
|
45 #ifdef TARGET_ARCH_MODEL_arm |
7e88bdae86ec
7029017: Additional architecture support for c2 compiler
roland
parents:
1972
diff
changeset
|
46 # include "adfiles/ad_arm.hpp" |
7e88bdae86ec
7029017: Additional architecture support for c2 compiler
roland
parents:
1972
diff
changeset
|
47 #endif |
3796 | 48 #ifdef TARGET_ARCH_MODEL_ppc |
49 # include "adfiles/ad_ppc.hpp" | |
50 #endif | |
0 | 51 |
1972 | 52 // Optimization - Graph Style |
0 | 53 |
54 //------------------------------implicit_null_check---------------------------- | |
55 // Detect implicit-null-check opportunities. Basically, find NULL checks | |
56 // with suitable memory ops nearby. Use the memory op to do the NULL check. | |
57 // I can generate a memory op if there is not one nearby. | |
58 // The proj is the control projection for the not-null case. | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
59 // The val is the pointer being checked for nullness or |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
60 // decodeHeapOop_not_null node if it did not fold into address. |
0 | 61 void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons) { |
62 // Assume if null check need for 0 offset then always needed | |
63 // Intel solaris doesn't support any null checks yet and no | |
64 // mechanism exists (yet) to set the switches at an os_cpu level | |
65 if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return; | |
66 | |
67 // Make sure the ptr-is-null path appears to be uncommon! | |
68 float f = end()->as_MachIf()->_prob; | |
69 if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f; | |
70 if( f > PROB_UNLIKELY_MAG(4) ) return; | |
71 | |
72 uint bidx = 0; // Capture index of value into memop | |
73 bool was_store; // Memory op is a store op | |
74 | |
75 // Get the successor block for if the test ptr is non-null | |
76 Block* not_null_block; // this one goes with the proj | |
77 Block* null_block; | |
78 if (_nodes[_nodes.size()-1] == proj) { | |
79 null_block = _succs[0]; | |
80 not_null_block = _succs[1]; | |
81 } else { | |
82 assert(_nodes[_nodes.size()-2] == proj, "proj is one or the other"); | |
83 not_null_block = _succs[0]; | |
84 null_block = _succs[1]; | |
85 } | |
332 | 86 while (null_block->is_Empty() == Block::empty_with_goto) { |
87 null_block = null_block->_succs[0]; | |
88 } | |
0 | 89 |
90 // Search the exception block for an uncommon trap. | |
91 // (See Parse::do_if and Parse::do_ifnull for the reason | |
92 // we need an uncommon trap. Briefly, we need a way to | |
93 // detect failure of this optimization, as in 6366351.) | |
94 { | |
95 bool found_trap = false; | |
96 for (uint i1 = 0; i1 < null_block->_nodes.size(); i1++) { | |
97 Node* nn = null_block->_nodes[i1]; | |
98 if (nn->is_MachCall() && | |
1748 | 99 nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) { |
0 | 100 const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type(); |
101 if (trtype->isa_int() && trtype->is_int()->is_con()) { | |
102 jint tr_con = trtype->is_int()->get_con(); | |
103 Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con); | |
104 Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con); | |
105 assert((int)reason < (int)BitsPerInt, "recode bit map"); | |
106 if (is_set_nth_bit(allowed_reasons, (int) reason) | |
107 && action != Deoptimization::Action_none) { | |
108 // This uncommon trap is sure to recompile, eventually. | |
109 // When that happens, C->too_many_traps will prevent | |
110 // this transformation from happening again. | |
111 found_trap = true; | |
112 } | |
113 } | |
114 break; | |
115 } | |
116 } | |
117 if (!found_trap) { | |
118 // We did not find an uncommon trap. | |
119 return; | |
120 } | |
121 } | |
122 | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
123 // Check for decodeHeapOop_not_null node which did not fold into address |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
124 bool is_decoden = ((intptr_t)val) & 1; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
125 val = (Node*)(((intptr_t)val) & ~1); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
126 |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
127 assert(!is_decoden || (val->in(0) == NULL) && val->is_Mach() && |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
128 (val->as_Mach()->ideal_Opcode() == Op_DecodeN), "sanity"); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
129 |
0 | 130 // Search the successor block for a load or store who's base value is also |
131 // the tested value. There may be several. | |
132 Node_List *out = new Node_List(Thread::current()->resource_area()); | |
133 MachNode *best = NULL; // Best found so far | |
134 for (DUIterator i = val->outs(); val->has_out(i); i++) { | |
135 Node *m = val->out(i); | |
136 if( !m->is_Mach() ) continue; | |
137 MachNode *mach = m->as_Mach(); | |
138 was_store = false; | |
1693
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
139 int iop = mach->ideal_Opcode(); |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
140 switch( iop ) { |
0 | 141 case Op_LoadB: |
558
3b5ac9e7e6ea
6796746: rename LoadC (char) opcode class to LoadUS (unsigned short)
twisti
parents:
365
diff
changeset
|
142 case Op_LoadUS: |
0 | 143 case Op_LoadD: |
144 case Op_LoadF: | |
145 case Op_LoadI: | |
146 case Op_LoadL: | |
147 case Op_LoadP: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
148 case Op_LoadN: |
0 | 149 case Op_LoadS: |
150 case Op_LoadKlass: | |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
125
diff
changeset
|
151 case Op_LoadNKlass: |
0 | 152 case Op_LoadRange: |
153 case Op_LoadD_unaligned: | |
154 case Op_LoadL_unaligned: | |
1151
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
155 assert(mach->in(2) == val, "should be address"); |
0 | 156 break; |
157 case Op_StoreB: | |
158 case Op_StoreC: | |
159 case Op_StoreCM: | |
160 case Op_StoreD: | |
161 case Op_StoreF: | |
162 case Op_StoreI: | |
163 case Op_StoreL: | |
164 case Op_StoreP: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
165 case Op_StoreN: |
0 | 166 was_store = true; // Memory op is a store op |
167 // Stores will have their address in slot 2 (memory in slot 1). | |
168 // If the value being nul-checked is in another slot, it means we | |
169 // are storing the checked value, which does NOT check the value! | |
170 if( mach->in(2) != val ) continue; | |
171 break; // Found a memory op? | |
172 case Op_StrComp: | |
681 | 173 case Op_StrEquals: |
174 case Op_StrIndexOf: | |
169
9148c65abefc
6695049: (coll) Create an x86 intrinsic for Arrays.equals
rasbold
parents:
164
diff
changeset
|
175 case Op_AryEq: |
0 | 176 // Not a legit memory op for implicit null check regardless of |
177 // embedded loads | |
178 continue; | |
179 default: // Also check for embedded loads | |
180 if( !mach->needs_anti_dependence_check() ) | |
181 continue; // Not an memory op; skip it | |
1693
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
182 if( must_clone[iop] ) { |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
183 // Do not move nodes which produce flags because |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
184 // RA will try to clone it to place near branch and |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
185 // it will cause recompilation, see clone_node(). |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
186 continue; |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
187 } |
1151
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
188 { |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
189 // Check that value is used in memory address in |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
190 // instructions with embedded load (CmpP val1,(val2+off)). |
1151
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
191 Node* base; |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
192 Node* index; |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
193 const MachOper* oper = mach->memory_inputs(base, index); |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
194 if (oper == NULL || oper == (MachOper*)-1) { |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
195 continue; // Not an memory op; skip it |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
196 } |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
197 if (val == base || |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
198 val == index && val->bottom_type()->isa_narrowoop()) { |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
199 break; // Found it |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
200 } else { |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
201 continue; // Skip it |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
202 } |
1271af4ec18c
6912517: JIT bug compiles out (and stops running) code that needs to be run. Causes NPE.
kvn
parents:
1137
diff
changeset
|
203 } |
0 | 204 break; |
205 } | |
206 // check if the offset is not too high for implicit exception | |
207 { | |
208 intptr_t offset = 0; | |
209 const TypePtr *adr_type = NULL; // Do not need this return value here | |
210 const Node* base = mach->get_base_and_disp(offset, adr_type); | |
211 if (base == NULL || base == NodeSentinel) { | |
332 | 212 // Narrow oop address doesn't have base, only index |
213 if( val->bottom_type()->isa_narrowoop() && | |
214 MacroAssembler::needs_explicit_null_check(offset) ) | |
215 continue; // Give up if offset is beyond page size | |
0 | 216 // cannot reason about it; is probably not implicit null exception |
217 } else { | |
642
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
218 const TypePtr* tptr; |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
219 if (UseCompressedOops && Universe::narrow_oop_shift() == 0) { |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
220 // 32-bits narrow oop can be the base of address expressions |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
221 tptr = base->bottom_type()->make_ptr(); |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
222 } else { |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
223 // only regular oops are expected here |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
224 tptr = base->bottom_type()->is_ptr(); |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
558
diff
changeset
|
225 } |
0 | 226 // Give up if offset is not a compile-time constant |
227 if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot ) | |
228 continue; | |
229 offset += tptr->_offset; // correct if base is offseted | |
230 if( MacroAssembler::needs_explicit_null_check(offset) ) | |
231 continue; // Give up is reference is beyond 4K page size | |
232 } | |
233 } | |
234 | |
235 // Check ctrl input to see if the null-check dominates the memory op | |
236 Block *cb = cfg->_bbs[mach->_idx]; | |
237 cb = cb->_idom; // Always hoist at least 1 block | |
238 if( !was_store ) { // Stores can be hoisted only one block | |
239 while( cb->_dom_depth > (_dom_depth + 1)) | |
240 cb = cb->_idom; // Hoist loads as far as we want | |
241 // The non-null-block should dominate the memory op, too. Live | |
242 // range spilling will insert a spill in the non-null-block if it is | |
243 // needs to spill the memory op for an implicit null check. | |
244 if (cb->_dom_depth == (_dom_depth + 1)) { | |
245 if (cb != not_null_block) continue; | |
246 cb = cb->_idom; | |
247 } | |
248 } | |
249 if( cb != this ) continue; | |
250 | |
251 // Found a memory user; see if it can be hoisted to check-block | |
252 uint vidx = 0; // Capture index of value into memop | |
253 uint j; | |
254 for( j = mach->req()-1; j > 0; j-- ) { | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
255 if( mach->in(j) == val ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
256 vidx = j; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
257 // Ignore DecodeN val which could be hoisted to where needed. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
258 if( is_decoden ) continue; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
259 } |
0 | 260 // Block of memory-op input |
261 Block *inb = cfg->_bbs[mach->in(j)->_idx]; | |
262 Block *b = this; // Start from nul check | |
263 while( b != inb && b->_dom_depth > inb->_dom_depth ) | |
264 b = b->_idom; // search upwards for input | |
265 // See if input dominates null check | |
266 if( b != inb ) | |
267 break; | |
268 } | |
269 if( j > 0 ) | |
270 continue; | |
271 Block *mb = cfg->_bbs[mach->_idx]; | |
272 // Hoisting stores requires more checks for the anti-dependence case. | |
273 // Give up hoisting if we have to move the store past any load. | |
274 if( was_store ) { | |
275 Block *b = mb; // Start searching here for a local load | |
276 // mach use (faulting) trying to hoist | |
277 // n might be blocker to hoisting | |
278 while( b != this ) { | |
279 uint k; | |
280 for( k = 1; k < b->_nodes.size(); k++ ) { | |
281 Node *n = b->_nodes[k]; | |
282 if( n->needs_anti_dependence_check() && | |
283 n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) ) | |
284 break; // Found anti-dependent load | |
285 } | |
286 if( k < b->_nodes.size() ) | |
287 break; // Found anti-dependent load | |
288 // Make sure control does not do a merge (would have to check allpaths) | |
289 if( b->num_preds() != 2 ) break; | |
290 b = cfg->_bbs[b->pred(1)->_idx]; // Move up to predecessor block | |
291 } | |
292 if( b != this ) continue; | |
293 } | |
294 | |
295 // Make sure this memory op is not already being used for a NullCheck | |
296 Node *e = mb->end(); | |
297 if( e->is_MachNullCheck() && e->in(1) == mach ) | |
298 continue; // Already being used as a NULL check | |
299 | |
300 // Found a candidate! Pick one with least dom depth - the highest | |
301 // in the dom tree should be closest to the null check. | |
302 if( !best || | |
303 cfg->_bbs[mach->_idx]->_dom_depth < cfg->_bbs[best->_idx]->_dom_depth ) { | |
304 best = mach; | |
305 bidx = vidx; | |
306 | |
307 } | |
308 } | |
309 // No candidate! | |
310 if( !best ) return; | |
311 | |
312 // ---- Found an implicit null check | |
313 extern int implicit_null_checks; | |
314 implicit_null_checks++; | |
315 | |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
316 if( is_decoden ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
317 // Check if we need to hoist decodeHeapOop_not_null first. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
318 Block *valb = cfg->_bbs[val->_idx]; |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
319 if( this != valb && this->_dom_depth < valb->_dom_depth ) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
320 // Hoist it up to the end of the test block. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
321 valb->find_remove(val); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
322 this->add_inst(val); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
323 cfg->_bbs.map(val->_idx,this); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
324 // DecodeN on x86 may kill flags. Check for flag-killing projections |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
325 // that also need to be hoisted. |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
326 for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) { |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
327 Node* n = val->fast_out(j); |
3842 | 328 if( n->is_MachProj() ) { |
1575
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
329 cfg->_bbs[n->_idx]->find_remove(n); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
330 this->add_inst(n); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
331 cfg->_bbs.map(n->_idx,this); |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
332 } |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
333 } |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
334 } |
3657cb01ffc5
6954029: Improve implicit null check generation with compressed oops
kvn
parents:
1151
diff
changeset
|
335 } |
0 | 336 // Hoist the memory candidate up to the end of the test block. |
337 Block *old_block = cfg->_bbs[best->_idx]; | |
338 old_block->find_remove(best); | |
339 add_inst(best); | |
340 cfg->_bbs.map(best->_idx,this); | |
341 | |
342 // Move the control dependence | |
343 if (best->in(0) && best->in(0) == old_block->_nodes[0]) | |
344 best->set_req(0, _nodes[0]); | |
345 | |
346 // Check for flag-killing projections that also need to be hoisted | |
347 // Should be DU safe because no edge updates. | |
348 for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) { | |
349 Node* n = best->fast_out(j); | |
3842 | 350 if( n->is_MachProj() ) { |
0 | 351 cfg->_bbs[n->_idx]->find_remove(n); |
352 add_inst(n); | |
353 cfg->_bbs.map(n->_idx,this); | |
354 } | |
355 } | |
356 | |
357 Compile *C = cfg->C; | |
358 // proj==Op_True --> ne test; proj==Op_False --> eq test. | |
359 // One of two graph shapes got matched: | |
360 // (IfTrue (If (Bool NE (CmpP ptr NULL)))) | |
361 // (IfFalse (If (Bool EQ (CmpP ptr NULL)))) | |
362 // NULL checks are always branch-if-eq. If we see a IfTrue projection | |
363 // then we are replacing a 'ne' test with a 'eq' NULL check test. | |
364 // We need to flip the projections to keep the same semantics. | |
365 if( proj->Opcode() == Op_IfTrue ) { | |
366 // Swap order of projections in basic block to swap branch targets | |
367 Node *tmp1 = _nodes[end_idx()+1]; | |
368 Node *tmp2 = _nodes[end_idx()+2]; | |
369 _nodes.map(end_idx()+1, tmp2); | |
370 _nodes.map(end_idx()+2, tmp1); | |
371 Node *tmp = new (C, 1) Node(C->top()); // Use not NULL input | |
372 tmp1->replace_by(tmp); | |
373 tmp2->replace_by(tmp1); | |
374 tmp->replace_by(tmp2); | |
375 tmp->destruct(); | |
376 } | |
377 | |
378 // Remove the existing null check; use a new implicit null check instead. | |
379 // Since schedule-local needs precise def-use info, we need to correct | |
380 // it as well. | |
381 Node *old_tst = proj->in(0); | |
382 MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx); | |
383 _nodes.map(end_idx(),nul_chk); | |
384 cfg->_bbs.map(nul_chk->_idx,this); | |
385 // Redirect users of old_test to nul_chk | |
386 for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2) | |
387 old_tst->last_out(i2)->set_req(0, nul_chk); | |
388 // Clean-up any dead code | |
389 for (uint i3 = 0; i3 < old_tst->req(); i3++) | |
390 old_tst->set_req(i3, NULL); | |
391 | |
392 cfg->latency_from_uses(nul_chk); | |
393 cfg->latency_from_uses(best); | |
394 } | |
395 | |
396 | |
397 //------------------------------select----------------------------------------- | |
398 // Select a nice fellow from the worklist to schedule next. If there is only | |
399 // one choice, then use it. Projections take top priority for correctness | |
400 // reasons - if I see a projection, then it is next. There are a number of | |
401 // other special cases, for instructions that consume condition codes, et al. | |
402 // These are chosen immediately. Some instructions are required to immediately | |
403 // precede the last instruction in the block, and these are taken last. Of the | |
404 // remaining cases (most), choose the instruction with the greatest latency | |
405 // (that is, the most number of pseudo-cycles required to the end of the | |
406 // routine). If there is a tie, choose the instruction with the most inputs. | |
407 Node *Block::select(PhaseCFG *cfg, Node_List &worklist, int *ready_cnt, VectorSet &next_call, uint sched_slot) { | |
408 | |
409 // If only a single entry on the stack, use it | |
410 uint cnt = worklist.size(); | |
411 if (cnt == 1) { | |
412 Node *n = worklist[0]; | |
413 worklist.map(0,worklist.pop()); | |
414 return n; | |
415 } | |
416 | |
417 uint choice = 0; // Bigger is most important | |
418 uint latency = 0; // Bigger is scheduled first | |
419 uint score = 0; // Bigger is better | |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
420 int idx = -1; // Index in worklist |
0 | 421 |
422 for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist | |
423 // Order in worklist is used to break ties. | |
424 // See caller for how this is used to delay scheduling | |
425 // of induction variable increments to after the other | |
426 // uses of the phi are scheduled. | |
427 Node *n = worklist[i]; // Get Node on worklist | |
428 | |
429 int iop = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : 0; | |
430 if( n->is_Proj() || // Projections always win | |
431 n->Opcode()== Op_Con || // So does constant 'Top' | |
432 iop == Op_CreateEx || // Create-exception must start block | |
433 iop == Op_CheckCastPP | |
434 ) { | |
435 worklist.map(i,worklist.pop()); | |
436 return n; | |
437 } | |
438 | |
439 // Final call in a block must be adjacent to 'catch' | |
440 Node *e = end(); | |
441 if( e->is_Catch() && e->in(0)->in(0) == n ) | |
442 continue; | |
443 | |
444 // Memory op for an implicit null check has to be at the end of the block | |
445 if( e->is_MachNullCheck() && e->in(1) == n ) | |
446 continue; | |
447 | |
448 uint n_choice = 2; | |
449 | |
450 // See if this instruction is consumed by a branch. If so, then (as the | |
451 // branch is the last instruction in the basic block) force it to the | |
452 // end of the basic block | |
453 if ( must_clone[iop] ) { | |
454 // See if any use is a branch | |
455 bool found_machif = false; | |
456 | |
457 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
458 Node* use = n->fast_out(j); | |
459 | |
460 // The use is a conditional branch, make them adjacent | |
461 if (use->is_MachIf() && cfg->_bbs[use->_idx]==this ) { | |
462 found_machif = true; | |
463 break; | |
464 } | |
465 | |
466 // More than this instruction pending for successor to be ready, | |
467 // don't choose this if other opportunities are ready | |
468 if (ready_cnt[use->_idx] > 1) | |
469 n_choice = 1; | |
470 } | |
471 | |
472 // loop terminated, prefer not to use this instruction | |
473 if (found_machif) | |
474 continue; | |
475 } | |
476 | |
477 // See if this has a predecessor that is "must_clone", i.e. sets the | |
478 // condition code. If so, choose this first | |
479 for (uint j = 0; j < n->req() ; j++) { | |
480 Node *inn = n->in(j); | |
481 if (inn) { | |
482 if (inn->is_Mach() && must_clone[inn->as_Mach()->ideal_Opcode()] ) { | |
483 n_choice = 3; | |
484 break; | |
485 } | |
486 } | |
487 } | |
488 | |
489 // MachTemps should be scheduled last so they are near their uses | |
490 if (n->is_MachTemp()) { | |
491 n_choice = 1; | |
492 } | |
493 | |
1685 | 494 uint n_latency = cfg->_node_latency->at_grow(n->_idx); |
0 | 495 uint n_score = n->req(); // Many inputs get high score to break ties |
496 | |
497 // Keep best latency found | |
498 if( choice < n_choice || | |
499 ( choice == n_choice && | |
500 ( latency < n_latency || | |
501 ( latency == n_latency && | |
502 ( score < n_score ))))) { | |
503 choice = n_choice; | |
504 latency = n_latency; | |
505 score = n_score; | |
506 idx = i; // Also keep index in worklist | |
507 } | |
508 } // End of for all ready nodes in worklist | |
509 | |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
510 assert(idx >= 0, "index should be set"); |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
511 Node *n = worklist[(uint)idx]; // Get the winner |
0 | 512 |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
513 worklist.map((uint)idx, worklist.pop()); // Compress worklist |
0 | 514 return n; |
515 } | |
516 | |
517 | |
518 //------------------------------set_next_call---------------------------------- | |
519 void Block::set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs ) { | |
520 if( next_call.test_set(n->_idx) ) return; | |
521 for( uint i=0; i<n->len(); i++ ) { | |
522 Node *m = n->in(i); | |
523 if( !m ) continue; // must see all nodes in block that precede call | |
524 if( bbs[m->_idx] == this ) | |
525 set_next_call( m, next_call, bbs ); | |
526 } | |
527 } | |
528 | |
529 //------------------------------needed_for_next_call--------------------------- | |
530 // Set the flag 'next_call' for each Node that is needed for the next call to | |
531 // be scheduled. This flag lets me bias scheduling so Nodes needed for the | |
532 // next subroutine call get priority - basically it moves things NOT needed | |
533 // for the next call till after the call. This prevents me from trying to | |
534 // carry lots of stuff live across a call. | |
535 void Block::needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs) { | |
536 // Find the next control-defining Node in this block | |
537 Node* call = NULL; | |
538 for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) { | |
539 Node* m = this_call->fast_out(i); | |
540 if( bbs[m->_idx] == this && // Local-block user | |
541 m != this_call && // Not self-start node | |
3842 | 542 m->is_MachCall() ) |
0 | 543 call = m; |
544 break; | |
545 } | |
546 if (call == NULL) return; // No next call (e.g., block end is near) | |
547 // Set next-call for all inputs to this call | |
548 set_next_call(call, next_call, bbs); | |
549 } | |
550 | |
4120
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
551 //------------------------------add_call_kills------------------------------------- |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
552 void Block::add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) { |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
553 // Fill in the kill mask for the call |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
554 for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
555 if( !regs.Member(r) ) { // Not already defined by the call |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
556 // Save-on-call register? |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
557 if ((save_policy[r] == 'C') || |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
558 (save_policy[r] == 'A') || |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
559 ((save_policy[r] == 'E') && exclude_soe)) { |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
560 proj->_rout.Insert(r); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
561 } |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
562 } |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
563 } |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
564 } |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
565 |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
566 |
0 | 567 //------------------------------sched_call------------------------------------- |
568 uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { | |
569 RegMask regs; | |
570 | |
571 // Schedule all the users of the call right now. All the users are | |
572 // projection Nodes, so they must be scheduled next to the call. | |
573 // Collect all the defined registers. | |
574 for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { | |
575 Node* n = mcall->fast_out(i); | |
3842 | 576 assert( n->is_MachProj(), "" ); |
0 | 577 --ready_cnt[n->_idx]; |
578 assert( !ready_cnt[n->_idx], "" ); | |
579 // Schedule next to call | |
580 _nodes.map(node_cnt++, n); | |
581 // Collect defined registers | |
582 regs.OR(n->out_RegMask()); | |
583 // Check for scheduling the next control-definer | |
584 if( n->bottom_type() == Type::CONTROL ) | |
585 // Warm up next pile of heuristic bits | |
586 needed_for_next_call(n, next_call, bbs); | |
587 | |
588 // Children of projections are now all ready | |
589 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
590 Node* m = n->fast_out(j); // Get user | |
591 if( bbs[m->_idx] != this ) continue; | |
592 if( m->is_Phi() ) continue; | |
593 if( !--ready_cnt[m->_idx] ) | |
594 worklist.push(m); | |
595 } | |
596 | |
597 } | |
598 | |
599 // Act as if the call defines the Frame Pointer. | |
600 // Certainly the FP is alive and well after the call. | |
601 regs.Insert(matcher.c_frame_pointer()); | |
602 | |
603 // Set all registers killed and not already defined by the call. | |
604 uint r_cnt = mcall->tf()->range()->cnt(); | |
605 int op = mcall->ideal_Opcode(); | |
606 MachProjNode *proj = new (matcher.C, 1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); | |
607 bbs.map(proj->_idx,this); | |
608 _nodes.insert(node_cnt++, proj); | |
609 | |
610 // Select the right register save policy. | |
611 const char * save_policy; | |
612 switch (op) { | |
613 case Op_CallRuntime: | |
614 case Op_CallLeaf: | |
615 case Op_CallLeafNoFP: | |
616 // Calling C code so use C calling convention | |
617 save_policy = matcher._c_reg_save_policy; | |
618 break; | |
619 | |
620 case Op_CallStaticJava: | |
621 case Op_CallDynamicJava: | |
622 // Calling Java code so use Java calling convention | |
623 save_policy = matcher._register_save_policy; | |
624 break; | |
625 | |
626 default: | |
627 ShouldNotReachHere(); | |
628 } | |
629 | |
630 // When using CallRuntime mark SOE registers as killed by the call | |
631 // so values that could show up in the RegisterMap aren't live in a | |
632 // callee saved register since the register wouldn't know where to | |
633 // find them. CallLeaf and CallLeafNoFP are ok because they can't | |
634 // have debug info on them. Strictly speaking this only needs to be | |
635 // done for oops since idealreg2debugmask takes care of debug info | |
636 // references but there no way to handle oops differently than other | |
637 // pointers as far as the kill mask goes. | |
638 bool exclude_soe = op == Op_CallRuntime; | |
639 | |
1137
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
640 // If the call is a MethodHandle invoke, we need to exclude the |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
641 // register which is used to save the SP value over MH invokes from |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
642 // the mask. Otherwise this register could be used for |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
643 // deoptimization information. |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
644 if (op == Op_CallStaticJava) { |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
645 MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall; |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
646 if (mcallstaticjava->_method_handle_invoke) |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
647 proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask()); |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
648 } |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1100
diff
changeset
|
649 |
4120
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
650 add_call_kills(proj, regs, save_policy, exclude_soe); |
0 | 651 |
652 return node_cnt; | |
653 } | |
654 | |
655 | |
656 //------------------------------schedule_local--------------------------------- | |
657 // Topological sort within a block. Someday become a real scheduler. | |
658 bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, int *ready_cnt, VectorSet &next_call) { | |
659 // Already "sorted" are the block start Node (as the first entry), and | |
660 // the block-ending Node and any trailing control projections. We leave | |
661 // these alone. PhiNodes and ParmNodes are made to follow the block start | |
662 // Node. Everything else gets topo-sorted. | |
663 | |
664 #ifndef PRODUCT | |
665 if (cfg->trace_opto_pipelining()) { | |
666 tty->print_cr("# --- schedule_local B%d, before: ---", _pre_order); | |
667 for (uint i = 0;i < _nodes.size();i++) { | |
668 tty->print("# "); | |
669 _nodes[i]->fast_dump(); | |
670 } | |
671 tty->print_cr("#"); | |
672 } | |
673 #endif | |
674 | |
675 // RootNode is already sorted | |
676 if( _nodes.size() == 1 ) return true; | |
677 | |
678 // Move PhiNodes and ParmNodes from 1 to cnt up to the start | |
679 uint node_cnt = end_idx(); | |
680 uint phi_cnt = 1; | |
681 uint i; | |
682 for( i = 1; i<node_cnt; i++ ) { // Scan for Phi | |
683 Node *n = _nodes[i]; | |
684 if( n->is_Phi() || // Found a PhiNode or ParmNode | |
685 (n->is_Proj() && n->in(0) == head()) ) { | |
686 // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt | |
687 _nodes.map(i,_nodes[phi_cnt]); | |
688 _nodes.map(phi_cnt++,n); // swap Phi/Parm up front | |
689 } else { // All others | |
690 // Count block-local inputs to 'n' | |
691 uint cnt = n->len(); // Input count | |
692 uint local = 0; | |
693 for( uint j=0; j<cnt; j++ ) { | |
694 Node *m = n->in(j); | |
695 if( m && cfg->_bbs[m->_idx] == this && !m->is_top() ) | |
696 local++; // One more block-local input | |
697 } | |
698 ready_cnt[n->_idx] = local; // Count em up | |
699 | |
3248
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
700 #ifdef ASSERT |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
125
diff
changeset
|
701 if( UseConcMarkSweepGC || UseG1GC ) { |
0 | 702 if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { |
3248
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
703 // Check the precedence edges |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
704 for (uint prec = n->req(); prec < n->len(); prec++) { |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
705 Node* oop_store = n->in(prec); |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
706 if (oop_store != NULL) { |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
707 assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
708 } |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
709 } |
0 | 710 } |
711 } | |
3248
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
712 #endif |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
713 |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
714 // A few node types require changing a required edge to a precedence edge |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
1972
diff
changeset
|
715 // before allocation. |
1100
f96a1a986f7b
6895383: JCK test throws NPE for method compiled with Escape Analysis
kvn
parents:
681
diff
changeset
|
716 if( n->is_Mach() && n->req() > TypeFunc::Parms && |
f96a1a986f7b
6895383: JCK test throws NPE for method compiled with Escape Analysis
kvn
parents:
681
diff
changeset
|
717 (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire || |
f96a1a986f7b
6895383: JCK test throws NPE for method compiled with Escape Analysis
kvn
parents:
681
diff
changeset
|
718 n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) { |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
719 // MemBarAcquire could be created without Precedent edge. |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
720 // del_req() replaces the specified edge with the last input edge |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
721 // and then removes the last edge. If the specified edge > number of |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
722 // edges the last edge will be moved outside of the input edges array |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
723 // and the edge will be lost. This is why this code should be |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
196
diff
changeset
|
724 // executed only when Precedent (== TypeFunc::Parms) edge is present. |
0 | 725 Node *x = n->in(TypeFunc::Parms); |
726 n->del_req(TypeFunc::Parms); | |
727 n->add_prec(x); | |
728 } | |
729 } | |
730 } | |
731 for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count | |
732 ready_cnt[_nodes[i2]->_idx] = 0; | |
733 | |
734 // All the prescheduled guys do not hold back internal nodes | |
735 uint i3; | |
736 for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled | |
737 Node *n = _nodes[i3]; // Get pre-scheduled | |
738 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | |
739 Node* m = n->fast_out(j); | |
740 if( cfg->_bbs[m->_idx] ==this ) // Local-block user | |
741 ready_cnt[m->_idx]--; // Fix ready count | |
742 } | |
743 } | |
744 | |
745 Node_List delay; | |
746 // Make a worklist | |
747 Node_List worklist; | |
748 for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist | |
749 Node *m = _nodes[i4]; | |
750 if( !ready_cnt[m->_idx] ) { // Zero ready count? | |
751 if (m->is_iteratively_computed()) { | |
752 // Push induction variable increments last to allow other uses | |
753 // of the phi to be scheduled first. The select() method breaks | |
754 // ties in scheduling by worklist order. | |
755 delay.push(m); | |
125
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
756 } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) { |
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
757 // Force the CreateEx to the top of the list so it's processed |
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
758 // first and ends up at the start of the block. |
d942c7e64bd9
6601321: Assert(j == 1 || b->_nodes[j-1]->is_Phi(),"CreateEx must be first instruction in block")
never
parents:
113
diff
changeset
|
759 worklist.insert(0, m); |
0 | 760 } else { |
761 worklist.push(m); // Then on to worklist! | |
762 } | |
763 } | |
764 } | |
765 while (delay.size()) { | |
766 Node* d = delay.pop(); | |
767 worklist.push(d); | |
768 } | |
769 | |
770 // Warm up the 'next_call' heuristic bits | |
771 needed_for_next_call(_nodes[0], next_call, cfg->_bbs); | |
772 | |
773 #ifndef PRODUCT | |
774 if (cfg->trace_opto_pipelining()) { | |
775 for (uint j=0; j<_nodes.size(); j++) { | |
776 Node *n = _nodes[j]; | |
777 int idx = n->_idx; | |
778 tty->print("# ready cnt:%3d ", ready_cnt[idx]); | |
1685 | 779 tty->print("latency:%3d ", cfg->_node_latency->at_grow(idx)); |
0 | 780 tty->print("%4d: %s\n", idx, n->Name()); |
781 } | |
782 } | |
783 #endif | |
784 | |
4120
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
785 uint max_idx = matcher.C->unique(); |
0 | 786 // Pull from worklist and schedule |
787 while( worklist.size() ) { // Worklist is not ready | |
788 | |
789 #ifndef PRODUCT | |
790 if (cfg->trace_opto_pipelining()) { | |
791 tty->print("# ready list:"); | |
792 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist | |
793 Node *n = worklist[i]; // Get Node on worklist | |
794 tty->print(" %d", n->_idx); | |
795 } | |
796 tty->cr(); | |
797 } | |
798 #endif | |
799 | |
800 // Select and pop a ready guy from worklist | |
801 Node* n = select(cfg, worklist, ready_cnt, next_call, phi_cnt); | |
802 _nodes.map(phi_cnt++,n); // Schedule him next | |
803 | |
804 #ifndef PRODUCT | |
805 if (cfg->trace_opto_pipelining()) { | |
806 tty->print("# select %d: %s", n->_idx, n->Name()); | |
1685 | 807 tty->print(", latency:%d", cfg->_node_latency->at_grow(n->_idx)); |
0 | 808 n->dump(); |
809 if (Verbose) { | |
810 tty->print("# ready list:"); | |
811 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist | |
812 Node *n = worklist[i]; // Get Node on worklist | |
813 tty->print(" %d", n->_idx); | |
814 } | |
815 tty->cr(); | |
816 } | |
817 } | |
818 | |
819 #endif | |
820 if( n->is_MachCall() ) { | |
821 MachCallNode *mcall = n->as_MachCall(); | |
822 phi_cnt = sched_call(matcher, cfg->_bbs, phi_cnt, worklist, ready_cnt, mcall, next_call); | |
823 continue; | |
824 } | |
4120
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
825 |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
826 if (n->is_Mach() && n->as_Mach()->has_call()) { |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
827 RegMask regs; |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
828 regs.Insert(matcher.c_frame_pointer()); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
829 regs.OR(n->out_RegMask()); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
830 |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
831 MachProjNode *proj = new (matcher.C, 1) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj ); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
832 cfg->_bbs.map(proj->_idx,this); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
833 _nodes.insert(phi_cnt++, proj); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
834 |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
835 add_call_kills(proj, regs, matcher._c_reg_save_policy, false); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
836 } |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
837 |
0 | 838 // Children are now all ready |
839 for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { | |
840 Node* m = n->fast_out(i5); // Get user | |
841 if( cfg->_bbs[m->_idx] != this ) continue; | |
842 if( m->is_Phi() ) continue; | |
4120
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
843 if (m->_idx > max_idx) { // new node, skip it |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
844 assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
845 continue; |
f03a3c8bd5e5
7077312: Provide a CALL effect for instruct declaration in the ad file
roland
parents:
3842
diff
changeset
|
846 } |
0 | 847 if( !--ready_cnt[m->_idx] ) |
848 worklist.push(m); | |
849 } | |
850 } | |
851 | |
852 if( phi_cnt != end_idx() ) { | |
853 // did not schedule all. Retry, Bailout, or Die | |
854 Compile* C = matcher.C; | |
855 if (C->subsume_loads() == true && !C->failing()) { | |
856 // Retry with subsume_loads == false | |
857 // If this is the first failure, the sentinel string will "stick" | |
858 // to the Compile object, and the C2Compiler will see it and retry. | |
859 C->record_failure(C2Compiler::retry_no_subsuming_loads()); | |
860 } | |
861 // assert( phi_cnt == end_idx(), "did not schedule all" ); | |
862 return false; | |
863 } | |
864 | |
865 #ifndef PRODUCT | |
866 if (cfg->trace_opto_pipelining()) { | |
867 tty->print_cr("#"); | |
868 tty->print_cr("# after schedule_local"); | |
869 for (uint i = 0;i < _nodes.size();i++) { | |
870 tty->print("# "); | |
871 _nodes[i]->fast_dump(); | |
872 } | |
873 tty->cr(); | |
874 } | |
875 #endif | |
876 | |
877 | |
878 return true; | |
879 } | |
880 | |
881 //--------------------------catch_cleanup_fix_all_inputs----------------------- | |
882 static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) { | |
883 for (uint l = 0; l < use->len(); l++) { | |
884 if (use->in(l) == old_def) { | |
885 if (l < use->req()) { | |
886 use->set_req(l, new_def); | |
887 } else { | |
888 use->rm_prec(l); | |
889 use->add_prec(new_def); | |
890 l--; | |
891 } | |
892 } | |
893 } | |
894 } | |
895 | |
896 //------------------------------catch_cleanup_find_cloned_def------------------ | |
897 static Node *catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) { | |
898 assert( use_blk != def_blk, "Inter-block cleanup only"); | |
899 | |
900 // The use is some block below the Catch. Find and return the clone of the def | |
901 // that dominates the use. If there is no clone in a dominating block, then | |
902 // create a phi for the def in a dominating block. | |
903 | |
904 // Find which successor block dominates this use. The successor | |
905 // blocks must all be single-entry (from the Catch only; I will have | |
906 // split blocks to make this so), hence they all dominate. | |
907 while( use_blk->_dom_depth > def_blk->_dom_depth+1 ) | |
908 use_blk = use_blk->_idom; | |
909 | |
910 // Find the successor | |
911 Node *fixup = NULL; | |
912 | |
913 uint j; | |
914 for( j = 0; j < def_blk->_num_succs; j++ ) | |
915 if( use_blk == def_blk->_succs[j] ) | |
916 break; | |
917 | |
918 if( j == def_blk->_num_succs ) { | |
919 // Block at same level in dom-tree is not a successor. It needs a | |
920 // PhiNode, the PhiNode uses from the def and IT's uses need fixup. | |
921 Node_Array inputs = new Node_List(Thread::current()->resource_area()); | |
922 for(uint k = 1; k < use_blk->num_preds(); k++) { | |
923 inputs.map(k, catch_cleanup_find_cloned_def(bbs[use_blk->pred(k)->_idx], def, def_blk, bbs, n_clone_idx)); | |
924 } | |
925 | |
926 // Check to see if the use_blk already has an identical phi inserted. | |
927 // If it exists, it will be at the first position since all uses of a | |
928 // def are processed together. | |
929 Node *phi = use_blk->_nodes[1]; | |
930 if( phi->is_Phi() ) { | |
931 fixup = phi; | |
932 for (uint k = 1; k < use_blk->num_preds(); k++) { | |
933 if (phi->in(k) != inputs[k]) { | |
934 // Not a match | |
935 fixup = NULL; | |
936 break; | |
937 } | |
938 } | |
939 } | |
940 | |
941 // If an existing PhiNode was not found, make a new one. | |
942 if (fixup == NULL) { | |
943 Node *new_phi = PhiNode::make(use_blk->head(), def); | |
944 use_blk->_nodes.insert(1, new_phi); | |
945 bbs.map(new_phi->_idx, use_blk); | |
946 for (uint k = 1; k < use_blk->num_preds(); k++) { | |
947 new_phi->set_req(k, inputs[k]); | |
948 } | |
949 fixup = new_phi; | |
950 } | |
951 | |
952 } else { | |
953 // Found the use just below the Catch. Make it use the clone. | |
954 fixup = use_blk->_nodes[n_clone_idx]; | |
955 } | |
956 | |
957 return fixup; | |
958 } | |
959 | |
960 //--------------------------catch_cleanup_intra_block-------------------------- | |
961 // Fix all input edges in use that reference "def". The use is in the same | |
962 // block as the def and both have been cloned in each successor block. | |
963 static void catch_cleanup_intra_block(Node *use, Node *def, Block *blk, int beg, int n_clone_idx) { | |
964 | |
965 // Both the use and def have been cloned. For each successor block, | |
966 // get the clone of the use, and make its input the clone of the def | |
967 // found in that block. | |
968 | |
969 uint use_idx = blk->find_node(use); | |
970 uint offset_idx = use_idx - beg; | |
971 for( uint k = 0; k < blk->_num_succs; k++ ) { | |
972 // Get clone in each successor block | |
973 Block *sb = blk->_succs[k]; | |
974 Node *clone = sb->_nodes[offset_idx+1]; | |
975 assert( clone->Opcode() == use->Opcode(), "" ); | |
976 | |
977 // Make use-clone reference the def-clone | |
978 catch_cleanup_fix_all_inputs(clone, def, sb->_nodes[n_clone_idx]); | |
979 } | |
980 } | |
981 | |
982 //------------------------------catch_cleanup_inter_block--------------------- | |
983 // Fix all input edges in use that reference "def". The use is in a different | |
984 // block than the def. | |
985 static void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) { | |
986 if( !use_blk ) return; // Can happen if the use is a precedence edge | |
987 | |
988 Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, bbs, n_clone_idx); | |
989 catch_cleanup_fix_all_inputs(use, def, new_def); | |
990 } | |
991 | |
992 //------------------------------call_catch_cleanup----------------------------- | |
993 // If we inserted any instructions between a Call and his CatchNode, | |
994 // clone the instructions on all paths below the Catch. | |
995 void Block::call_catch_cleanup(Block_Array &bbs) { | |
996 | |
997 // End of region to clone | |
998 uint end = end_idx(); | |
999 if( !_nodes[end]->is_Catch() ) return; | |
1000 // Start of region to clone | |
1001 uint beg = end; | |
3842 | 1002 while(!_nodes[beg-1]->is_MachProj() || |
1003 !_nodes[beg-1]->in(0)->is_MachCall() ) { | |
0 | 1004 beg--; |
1005 assert(beg > 0,"Catch cleanup walking beyond block boundary"); | |
1006 } | |
1007 // Range of inserted instructions is [beg, end) | |
1008 if( beg == end ) return; | |
1009 | |
1010 // Clone along all Catch output paths. Clone area between the 'beg' and | |
1011 // 'end' indices. | |
1012 for( uint i = 0; i < _num_succs; i++ ) { | |
1013 Block *sb = _succs[i]; | |
1014 // Clone the entire area; ignoring the edge fixup for now. | |
1015 for( uint j = end; j > beg; j-- ) { | |
1693
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
1016 // It is safe here to clone a node with anti_dependence |
6c9cc03d8726
6973329: C2 with Zero based COOP produces code with broken anti-dependency on x86
kvn
parents:
1685
diff
changeset
|
1017 // since clones dominate on each path. |
0 | 1018 Node *clone = _nodes[j-1]->clone(); |
1019 sb->_nodes.insert( 1, clone ); | |
1020 bbs.map(clone->_idx,sb); | |
1021 } | |
1022 } | |
1023 | |
1024 | |
1025 // Fixup edges. Check the def-use info per cloned Node | |
1026 for(uint i2 = beg; i2 < end; i2++ ) { | |
1027 uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block | |
1028 Node *n = _nodes[i2]; // Node that got cloned | |
1029 // Need DU safe iterator because of edge manipulation in calls. | |
1030 Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area()); | |
1031 for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) { | |
1032 out->push(n->fast_out(j1)); | |
1033 } | |
1034 uint max = out->size(); | |
1035 for (uint j = 0; j < max; j++) {// For all users | |
1036 Node *use = out->pop(); | |
1037 Block *buse = bbs[use->_idx]; | |
1038 if( use->is_Phi() ) { | |
1039 for( uint k = 1; k < use->req(); k++ ) | |
1040 if( use->in(k) == n ) { | |
1041 Node *fixup = catch_cleanup_find_cloned_def(bbs[buse->pred(k)->_idx], n, this, bbs, n_clone_idx); | |
1042 use->set_req(k, fixup); | |
1043 } | |
1044 } else { | |
1045 if (this == buse) { | |
1046 catch_cleanup_intra_block(use, n, this, beg, n_clone_idx); | |
1047 } else { | |
1048 catch_cleanup_inter_block(use, buse, n, this, bbs, n_clone_idx); | |
1049 } | |
1050 } | |
1051 } // End for all users | |
1052 | |
1053 } // End of for all Nodes in cloned area | |
1054 | |
1055 // Remove the now-dead cloned ops | |
1056 for(uint i3 = beg; i3 < end; i3++ ) { | |
1057 _nodes[beg]->disconnect_inputs(NULL); | |
1058 _nodes.remove(beg); | |
1059 } | |
1060 | |
1061 // If the successor blocks have a CreateEx node, move it back to the top | |
1062 for(uint i4 = 0; i4 < _num_succs; i4++ ) { | |
1063 Block *sb = _succs[i4]; | |
1064 uint new_cnt = end - beg; | |
1065 // Remove any newly created, but dead, nodes. | |
1066 for( uint j = new_cnt; j > 0; j-- ) { | |
1067 Node *n = sb->_nodes[j]; | |
1068 if (n->outcnt() == 0 && | |
1069 (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){ | |
1070 n->disconnect_inputs(NULL); | |
1071 sb->_nodes.remove(j); | |
1072 new_cnt--; | |
1073 } | |
1074 } | |
1075 // If any newly created nodes remain, move the CreateEx node to the top | |
1076 if (new_cnt > 0) { | |
1077 Node *cex = sb->_nodes[1+new_cnt]; | |
1078 if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) { | |
1079 sb->_nodes.remove(1+new_cnt); | |
1080 sb->_nodes.insert(1,cex); | |
1081 } | |
1082 } | |
1083 } | |
1084 } |