Mercurial > hg > truffle
annotate src/share/vm/opto/matcher.cpp @ 452:00b023ae2d78
6722113: CMS: Incorrect overflow handling during precleaning of Reference lists
Summary: When we encounter marking stack overflow during precleaning of Reference lists, we were using the overflow list mechanism, which can cause problems on account of mutating the mark word of the header because of conflicts with mutator accesses and updates of that field. Instead we should use the usual mechanism for overflow handling in concurrent phases, namely dirtying of the card on which the overflowed object lies. Since precleaning effectively does a form of discovered list processing, albeit with discovery enabled, we needed to adjust some code to be correct in the face of interleaved processing and discovery.
Reviewed-by: apetrusenko, jcoomes
author | ysr |
---|---|
date | Thu, 20 Nov 2008 12:27:41 -0800 |
parents | a1980da045cc |
children | 3b5ac9e7e6ea |
rev | line source |
---|---|
0 | 1 /* |
196 | 2 * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
20 * CA 95054 USA or visit www.sun.com if you need additional information or | |
21 * have any questions. | |
22 * | |
23 */ | |
24 | |
25 #include "incls/_precompiled.incl" | |
26 #include "incls/_matcher.cpp.incl" | |
27 | |
28 OptoReg::Name OptoReg::c_frame_pointer; | |
29 | |
30 | |
31 | |
32 const int Matcher::base2reg[Type::lastype] = { | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
33 Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, |
0 | 34 Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ |
35 Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ | |
36 0, 0/*abio*/, | |
37 Op_RegP /* Return address */, 0, /* the memories */ | |
38 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, | |
39 0 /*bottom*/ | |
40 }; | |
41 | |
42 const RegMask *Matcher::idealreg2regmask[_last_machine_leaf]; | |
43 RegMask Matcher::mreg2regmask[_last_Mach_Reg]; | |
44 RegMask Matcher::STACK_ONLY_mask; | |
45 RegMask Matcher::c_frame_ptr_mask; | |
46 const uint Matcher::_begin_rematerialize = _BEGIN_REMATERIALIZE; | |
47 const uint Matcher::_end_rematerialize = _END_REMATERIALIZE; | |
48 | |
49 //---------------------------Matcher------------------------------------------- | |
50 Matcher::Matcher( Node_List &proj_list ) : | |
51 PhaseTransform( Phase::Ins_Select ), | |
52 #ifdef ASSERT | |
53 _old2new_map(C->comp_arena()), | |
222 | 54 _new2old_map(C->comp_arena()), |
0 | 55 #endif |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
56 _shared_nodes(C->comp_arena()), |
0 | 57 _reduceOp(reduceOp), _leftOp(leftOp), _rightOp(rightOp), |
58 _swallowed(swallowed), | |
59 _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE), | |
60 _end_inst_chain_rule(_END_INST_CHAIN_RULE), | |
61 _must_clone(must_clone), _proj_list(proj_list), | |
62 _register_save_policy(register_save_policy), | |
63 _c_reg_save_policy(c_reg_save_policy), | |
64 _register_save_type(register_save_type), | |
65 _ruleName(ruleName), | |
66 _allocation_started(false), | |
67 _states_arena(Chunk::medium_size), | |
68 _visited(&_states_arena), | |
69 _shared(&_states_arena), | |
70 _dontcare(&_states_arena) { | |
71 C->set_matcher(this); | |
72 | |
73 idealreg2spillmask[Op_RegI] = NULL; | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
74 idealreg2spillmask[Op_RegN] = NULL; |
0 | 75 idealreg2spillmask[Op_RegL] = NULL; |
76 idealreg2spillmask[Op_RegF] = NULL; | |
77 idealreg2spillmask[Op_RegD] = NULL; | |
78 idealreg2spillmask[Op_RegP] = NULL; | |
79 | |
80 idealreg2debugmask[Op_RegI] = NULL; | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
81 idealreg2debugmask[Op_RegN] = NULL; |
0 | 82 idealreg2debugmask[Op_RegL] = NULL; |
83 idealreg2debugmask[Op_RegF] = NULL; | |
84 idealreg2debugmask[Op_RegD] = NULL; | |
85 idealreg2debugmask[Op_RegP] = NULL; | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
86 debug_only(_mem_node = NULL;) // Ideal memory node consumed by mach node |
0 | 87 } |
88 | |
89 //------------------------------warp_incoming_stk_arg------------------------ | |
90 // This warps a VMReg into an OptoReg::Name | |
91 OptoReg::Name Matcher::warp_incoming_stk_arg( VMReg reg ) { | |
92 OptoReg::Name warped; | |
93 if( reg->is_stack() ) { // Stack slot argument? | |
94 warped = OptoReg::add(_old_SP, reg->reg2stack() ); | |
95 warped = OptoReg::add(warped, C->out_preserve_stack_slots()); | |
96 if( warped >= _in_arg_limit ) | |
97 _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen | |
98 if (!RegMask::can_represent(warped)) { | |
99 // the compiler cannot represent this method's calling sequence | |
100 C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence"); | |
101 return OptoReg::Bad; | |
102 } | |
103 return warped; | |
104 } | |
105 return OptoReg::as_OptoReg(reg); | |
106 } | |
107 | |
108 //---------------------------compute_old_SP------------------------------------ | |
109 OptoReg::Name Compile::compute_old_SP() { | |
110 int fixed = fixed_slots(); | |
111 int preserve = in_preserve_stack_slots(); | |
112 return OptoReg::stack2reg(round_to(fixed + preserve, Matcher::stack_alignment_in_slots())); | |
113 } | |
114 | |
115 | |
116 | |
117 #ifdef ASSERT | |
118 void Matcher::verify_new_nodes_only(Node* xroot) { | |
119 // Make sure that the new graph only references new nodes | |
120 ResourceMark rm; | |
121 Unique_Node_List worklist; | |
122 VectorSet visited(Thread::current()->resource_area()); | |
123 worklist.push(xroot); | |
124 while (worklist.size() > 0) { | |
125 Node* n = worklist.pop(); | |
126 visited <<= n->_idx; | |
127 assert(C->node_arena()->contains(n), "dead node"); | |
128 for (uint j = 0; j < n->req(); j++) { | |
129 Node* in = n->in(j); | |
130 if (in != NULL) { | |
131 assert(C->node_arena()->contains(in), "dead node"); | |
132 if (!visited.test(in->_idx)) { | |
133 worklist.push(in); | |
134 } | |
135 } | |
136 } | |
137 } | |
138 } | |
139 #endif | |
140 | |
141 | |
142 //---------------------------match--------------------------------------------- | |
143 void Matcher::match( ) { | |
144 // One-time initialization of some register masks. | |
145 init_spill_mask( C->root()->in(1) ); | |
146 _return_addr_mask = return_addr(); | |
147 #ifdef _LP64 | |
148 // Pointers take 2 slots in 64-bit land | |
149 _return_addr_mask.Insert(OptoReg::add(return_addr(),1)); | |
150 #endif | |
151 | |
152 // Map a Java-signature return type into return register-value | |
153 // machine registers for 0, 1 and 2 returned values. | |
154 const TypeTuple *range = C->tf()->range(); | |
155 if( range->cnt() > TypeFunc::Parms ) { // If not a void function | |
156 // Get ideal-register return type | |
157 int ireg = base2reg[range->field_at(TypeFunc::Parms)->base()]; | |
158 // Get machine return register | |
159 uint sop = C->start()->Opcode(); | |
160 OptoRegPair regs = return_value(ireg, false); | |
161 | |
162 // And mask for same | |
163 _return_value_mask = RegMask(regs.first()); | |
164 if( OptoReg::is_valid(regs.second()) ) | |
165 _return_value_mask.Insert(regs.second()); | |
166 } | |
167 | |
168 // --------------- | |
169 // Frame Layout | |
170 | |
171 // Need the method signature to determine the incoming argument types, | |
172 // because the types determine which registers the incoming arguments are | |
173 // in, and this affects the matched code. | |
174 const TypeTuple *domain = C->tf()->domain(); | |
175 uint argcnt = domain->cnt() - TypeFunc::Parms; | |
176 BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, argcnt ); | |
177 VMRegPair *vm_parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt ); | |
178 _parm_regs = NEW_RESOURCE_ARRAY( OptoRegPair, argcnt ); | |
179 _calling_convention_mask = NEW_RESOURCE_ARRAY( RegMask, argcnt ); | |
180 uint i; | |
181 for( i = 0; i<argcnt; i++ ) { | |
182 sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type(); | |
183 } | |
184 | |
185 // Pass array of ideal registers and length to USER code (from the AD file) | |
186 // that will convert this to an array of register numbers. | |
187 const StartNode *start = C->start(); | |
188 start->calling_convention( sig_bt, vm_parm_regs, argcnt ); | |
189 #ifdef ASSERT | |
190 // Sanity check users' calling convention. Real handy while trying to | |
191 // get the initial port correct. | |
192 { for (uint i = 0; i<argcnt; i++) { | |
193 if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) { | |
194 assert(domain->field_at(i+TypeFunc::Parms)==Type::HALF, "only allowed on halve" ); | |
195 _parm_regs[i].set_bad(); | |
196 continue; | |
197 } | |
198 VMReg parm_reg = vm_parm_regs[i].first(); | |
199 assert(parm_reg->is_valid(), "invalid arg?"); | |
200 if (parm_reg->is_reg()) { | |
201 OptoReg::Name opto_parm_reg = OptoReg::as_OptoReg(parm_reg); | |
202 assert(can_be_java_arg(opto_parm_reg) || | |
203 C->stub_function() == CAST_FROM_FN_PTR(address, OptoRuntime::rethrow_C) || | |
204 opto_parm_reg == inline_cache_reg(), | |
205 "parameters in register must be preserved by runtime stubs"); | |
206 } | |
207 for (uint j = 0; j < i; j++) { | |
208 assert(parm_reg != vm_parm_regs[j].first(), | |
209 "calling conv. must produce distinct regs"); | |
210 } | |
211 } | |
212 } | |
213 #endif | |
214 | |
215 // Do some initial frame layout. | |
216 | |
217 // Compute the old incoming SP (may be called FP) as | |
218 // OptoReg::stack0() + locks + in_preserve_stack_slots + pad2. | |
219 _old_SP = C->compute_old_SP(); | |
220 assert( is_even(_old_SP), "must be even" ); | |
221 | |
222 // Compute highest incoming stack argument as | |
223 // _old_SP + out_preserve_stack_slots + incoming argument size. | |
224 _in_arg_limit = OptoReg::add(_old_SP, C->out_preserve_stack_slots()); | |
225 assert( is_even(_in_arg_limit), "out_preserve must be even" ); | |
226 for( i = 0; i < argcnt; i++ ) { | |
227 // Permit args to have no register | |
228 _calling_convention_mask[i].Clear(); | |
229 if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) { | |
230 continue; | |
231 } | |
232 // calling_convention returns stack arguments as a count of | |
233 // slots beyond OptoReg::stack0()/VMRegImpl::stack0. We need to convert this to | |
234 // the allocators point of view, taking into account all the | |
235 // preserve area, locks & pad2. | |
236 | |
237 OptoReg::Name reg1 = warp_incoming_stk_arg(vm_parm_regs[i].first()); | |
238 if( OptoReg::is_valid(reg1)) | |
239 _calling_convention_mask[i].Insert(reg1); | |
240 | |
241 OptoReg::Name reg2 = warp_incoming_stk_arg(vm_parm_regs[i].second()); | |
242 if( OptoReg::is_valid(reg2)) | |
243 _calling_convention_mask[i].Insert(reg2); | |
244 | |
245 // Saved biased stack-slot register number | |
246 _parm_regs[i].set_pair(reg2, reg1); | |
247 } | |
248 | |
249 // Finally, make sure the incoming arguments take up an even number of | |
250 // words, in case the arguments or locals need to contain doubleword stack | |
251 // slots. The rest of the system assumes that stack slot pairs (in | |
252 // particular, in the spill area) which look aligned will in fact be | |
253 // aligned relative to the stack pointer in the target machine. Double | |
254 // stack slots will always be allocated aligned. | |
255 _new_SP = OptoReg::Name(round_to(_in_arg_limit, RegMask::SlotsPerLong)); | |
256 | |
257 // Compute highest outgoing stack argument as | |
258 // _new_SP + out_preserve_stack_slots + max(outgoing argument size). | |
259 _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots()); | |
260 assert( is_even(_out_arg_limit), "out_preserve must be even" ); | |
261 | |
262 if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) { | |
263 // the compiler cannot represent this method's calling sequence | |
264 C->record_method_not_compilable("must be able to represent all call arguments in reg mask"); | |
265 } | |
266 | |
267 if (C->failing()) return; // bailed out on incoming arg failure | |
268 | |
269 // --------------- | |
270 // Collect roots of matcher trees. Every node for which | |
271 // _shared[_idx] is cleared is guaranteed to not be shared, and thus | |
272 // can be a valid interior of some tree. | |
273 find_shared( C->root() ); | |
274 find_shared( C->top() ); | |
275 | |
367
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
309
diff
changeset
|
276 C->print_method("Before Matching"); |
0 | 277 |
278 // Swap out to old-space; emptying new-space | |
279 Arena *old = C->node_arena()->move_contents(C->old_arena()); | |
280 | |
281 // Save debug and profile information for nodes in old space: | |
282 _old_node_note_array = C->node_note_array(); | |
283 if (_old_node_note_array != NULL) { | |
284 C->set_node_note_array(new(C->comp_arena()) GrowableArray<Node_Notes*> | |
285 (C->comp_arena(), _old_node_note_array->length(), | |
286 0, NULL)); | |
287 } | |
288 | |
289 // Pre-size the new_node table to avoid the need for range checks. | |
290 grow_new_node_array(C->unique()); | |
291 | |
292 // Reset node counter so MachNodes start with _idx at 0 | |
293 int nodes = C->unique(); // save value | |
294 C->set_unique(0); | |
295 | |
296 // Recursively match trees from old space into new space. | |
297 // Correct leaves of new-space Nodes; they point to old-space. | |
298 _visited.Clear(); // Clear visit bits for xform call | |
299 C->set_cached_top_node(xform( C->top(), nodes )); | |
300 if (!C->failing()) { | |
301 Node* xroot = xform( C->root(), 1 ); | |
302 if (xroot == NULL) { | |
303 Matcher::soft_match_failure(); // recursive matching process failed | |
304 C->record_method_not_compilable("instruction match failed"); | |
305 } else { | |
306 // During matching shared constants were attached to C->root() | |
307 // because xroot wasn't available yet, so transfer the uses to | |
308 // the xroot. | |
309 for( DUIterator_Fast jmax, j = C->root()->fast_outs(jmax); j < jmax; j++ ) { | |
310 Node* n = C->root()->fast_out(j); | |
311 if (C->node_arena()->contains(n)) { | |
312 assert(n->in(0) == C->root(), "should be control user"); | |
313 n->set_req(0, xroot); | |
314 --j; | |
315 --jmax; | |
316 } | |
317 } | |
318 | |
319 C->set_root(xroot->is_Root() ? xroot->as_Root() : NULL); | |
320 #ifdef ASSERT | |
321 verify_new_nodes_only(xroot); | |
322 #endif | |
323 } | |
324 } | |
325 if (C->top() == NULL || C->root() == NULL) { | |
326 C->record_method_not_compilable("graph lost"); // %%% cannot happen? | |
327 } | |
328 if (C->failing()) { | |
329 // delete old; | |
330 old->destruct_contents(); | |
331 return; | |
332 } | |
333 assert( C->top(), "" ); | |
334 assert( C->root(), "" ); | |
335 validate_null_checks(); | |
336 | |
337 // Now smoke old-space | |
338 NOT_DEBUG( old->destruct_contents() ); | |
339 | |
340 // ------------------------ | |
341 // Set up save-on-entry registers | |
342 Fixup_Save_On_Entry( ); | |
343 } | |
344 | |
345 | |
346 //------------------------------Fixup_Save_On_Entry---------------------------- | |
347 // The stated purpose of this routine is to take care of save-on-entry | |
348 // registers. However, the overall goal of the Match phase is to convert into | |
349 // machine-specific instructions which have RegMasks to guide allocation. | |
350 // So what this procedure really does is put a valid RegMask on each input | |
351 // to the machine-specific variations of all Return, TailCall and Halt | |
352 // instructions. It also adds edgs to define the save-on-entry values (and of | |
353 // course gives them a mask). | |
354 | |
355 static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) { | |
356 RegMask *rms = NEW_RESOURCE_ARRAY( RegMask, size ); | |
357 // Do all the pre-defined register masks | |
358 rms[TypeFunc::Control ] = RegMask::Empty; | |
359 rms[TypeFunc::I_O ] = RegMask::Empty; | |
360 rms[TypeFunc::Memory ] = RegMask::Empty; | |
361 rms[TypeFunc::ReturnAdr] = ret_adr; | |
362 rms[TypeFunc::FramePtr ] = fp; | |
363 return rms; | |
364 } | |
365 | |
366 //---------------------------init_first_stack_mask----------------------------- | |
367 // Create the initial stack mask used by values spilling to the stack. | |
368 // Disallow any debug info in outgoing argument areas by setting the | |
369 // initial mask accordingly. | |
370 void Matcher::init_first_stack_mask() { | |
371 | |
372 // Allocate storage for spill masks as masks for the appropriate load type. | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
373 RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask)*12); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
374 idealreg2spillmask[Op_RegN] = &rms[0]; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
375 idealreg2spillmask[Op_RegI] = &rms[1]; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
376 idealreg2spillmask[Op_RegL] = &rms[2]; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
377 idealreg2spillmask[Op_RegF] = &rms[3]; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
378 idealreg2spillmask[Op_RegD] = &rms[4]; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
379 idealreg2spillmask[Op_RegP] = &rms[5]; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
380 idealreg2debugmask[Op_RegN] = &rms[6]; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
381 idealreg2debugmask[Op_RegI] = &rms[7]; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
382 idealreg2debugmask[Op_RegL] = &rms[8]; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
383 idealreg2debugmask[Op_RegF] = &rms[9]; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
384 idealreg2debugmask[Op_RegD] = &rms[10]; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
385 idealreg2debugmask[Op_RegP] = &rms[11]; |
0 | 386 |
387 OptoReg::Name i; | |
388 | |
389 // At first, start with the empty mask | |
390 C->FIRST_STACK_mask().Clear(); | |
391 | |
392 // Add in the incoming argument area | |
393 OptoReg::Name init = OptoReg::add(_old_SP, C->out_preserve_stack_slots()); | |
394 for (i = init; i < _in_arg_limit; i = OptoReg::add(i,1)) | |
395 C->FIRST_STACK_mask().Insert(i); | |
396 | |
397 // Add in all bits past the outgoing argument area | |
398 guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)), | |
399 "must be able to represent all call arguments in reg mask"); | |
400 init = _out_arg_limit; | |
401 for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1)) | |
402 C->FIRST_STACK_mask().Insert(i); | |
403 | |
404 // Finally, set the "infinite stack" bit. | |
405 C->FIRST_STACK_mask().set_AllStack(); | |
406 | |
407 // Make spill masks. Registers for their class, plus FIRST_STACK_mask. | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
408 #ifdef _LP64 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
409 *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN]; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
410 idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask()); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
411 #endif |
0 | 412 *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI]; |
413 idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask()); | |
414 *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL]; | |
415 idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask()); | |
416 *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF]; | |
417 idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask()); | |
418 *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD]; | |
419 idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask()); | |
420 *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; | |
421 idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask()); | |
422 | |
423 // Make up debug masks. Any spill slot plus callee-save registers. | |
424 // Caller-save registers are assumed to be trashable by the various | |
425 // inline-cache fixup routines. | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
426 *idealreg2debugmask[Op_RegN]= *idealreg2spillmask[Op_RegN]; |
0 | 427 *idealreg2debugmask[Op_RegI]= *idealreg2spillmask[Op_RegI]; |
428 *idealreg2debugmask[Op_RegL]= *idealreg2spillmask[Op_RegL]; | |
429 *idealreg2debugmask[Op_RegF]= *idealreg2spillmask[Op_RegF]; | |
430 *idealreg2debugmask[Op_RegD]= *idealreg2spillmask[Op_RegD]; | |
431 *idealreg2debugmask[Op_RegP]= *idealreg2spillmask[Op_RegP]; | |
432 | |
433 // Prevent stub compilations from attempting to reference | |
434 // callee-saved registers from debug info | |
435 bool exclude_soe = !Compile::current()->is_method_compilation(); | |
436 | |
437 for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) { | |
438 // registers the caller has to save do not work | |
439 if( _register_save_policy[i] == 'C' || | |
440 _register_save_policy[i] == 'A' || | |
441 (_register_save_policy[i] == 'E' && exclude_soe) ) { | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
442 idealreg2debugmask[Op_RegN]->Remove(i); |
0 | 443 idealreg2debugmask[Op_RegI]->Remove(i); // Exclude save-on-call |
444 idealreg2debugmask[Op_RegL]->Remove(i); // registers from debug | |
445 idealreg2debugmask[Op_RegF]->Remove(i); // masks | |
446 idealreg2debugmask[Op_RegD]->Remove(i); | |
447 idealreg2debugmask[Op_RegP]->Remove(i); | |
448 } | |
449 } | |
450 } | |
451 | |
452 //---------------------------is_save_on_entry---------------------------------- | |
453 bool Matcher::is_save_on_entry( int reg ) { | |
454 return | |
455 _register_save_policy[reg] == 'E' || | |
456 _register_save_policy[reg] == 'A' || // Save-on-entry register? | |
457 // Also save argument registers in the trampolining stubs | |
458 (C->save_argument_registers() && is_spillable_arg(reg)); | |
459 } | |
460 | |
461 //---------------------------Fixup_Save_On_Entry------------------------------- | |
462 void Matcher::Fixup_Save_On_Entry( ) { | |
463 init_first_stack_mask(); | |
464 | |
465 Node *root = C->root(); // Short name for root | |
466 // Count number of save-on-entry registers. | |
467 uint soe_cnt = number_of_saved_registers(); | |
468 uint i; | |
469 | |
470 // Find the procedure Start Node | |
471 StartNode *start = C->start(); | |
472 assert( start, "Expect a start node" ); | |
473 | |
474 // Save argument registers in the trampolining stubs | |
475 if( C->save_argument_registers() ) | |
476 for( i = 0; i < _last_Mach_Reg; i++ ) | |
477 if( is_spillable_arg(i) ) | |
478 soe_cnt++; | |
479 | |
480 // Input RegMask array shared by all Returns. | |
481 // The type for doubles and longs has a count of 2, but | |
482 // there is only 1 returned value | |
483 uint ret_edge_cnt = TypeFunc::Parms + ((C->tf()->range()->cnt() == TypeFunc::Parms) ? 0 : 1); | |
484 RegMask *ret_rms = init_input_masks( ret_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); | |
485 // Returns have 0 or 1 returned values depending on call signature. | |
486 // Return register is specified by return_value in the AD file. | |
487 if (ret_edge_cnt > TypeFunc::Parms) | |
488 ret_rms[TypeFunc::Parms+0] = _return_value_mask; | |
489 | |
490 // Input RegMask array shared by all Rethrows. | |
491 uint reth_edge_cnt = TypeFunc::Parms+1; | |
492 RegMask *reth_rms = init_input_masks( reth_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); | |
493 // Rethrow takes exception oop only, but in the argument 0 slot. | |
494 reth_rms[TypeFunc::Parms] = mreg2regmask[find_receiver(false)]; | |
495 #ifdef _LP64 | |
496 // Need two slots for ptrs in 64-bit land | |
497 reth_rms[TypeFunc::Parms].Insert(OptoReg::add(OptoReg::Name(find_receiver(false)),1)); | |
498 #endif | |
499 | |
500 // Input RegMask array shared by all TailCalls | |
501 uint tail_call_edge_cnt = TypeFunc::Parms+2; | |
502 RegMask *tail_call_rms = init_input_masks( tail_call_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); | |
503 | |
504 // Input RegMask array shared by all TailJumps | |
505 uint tail_jump_edge_cnt = TypeFunc::Parms+2; | |
506 RegMask *tail_jump_rms = init_input_masks( tail_jump_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); | |
507 | |
508 // TailCalls have 2 returned values (target & moop), whose masks come | |
509 // from the usual MachNode/MachOper mechanism. Find a sample | |
510 // TailCall to extract these masks and put the correct masks into | |
511 // the tail_call_rms array. | |
512 for( i=1; i < root->req(); i++ ) { | |
513 MachReturnNode *m = root->in(i)->as_MachReturn(); | |
514 if( m->ideal_Opcode() == Op_TailCall ) { | |
515 tail_call_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0); | |
516 tail_call_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1); | |
517 break; | |
518 } | |
519 } | |
520 | |
521 // TailJumps have 2 returned values (target & ex_oop), whose masks come | |
522 // from the usual MachNode/MachOper mechanism. Find a sample | |
523 // TailJump to extract these masks and put the correct masks into | |
524 // the tail_jump_rms array. | |
525 for( i=1; i < root->req(); i++ ) { | |
526 MachReturnNode *m = root->in(i)->as_MachReturn(); | |
527 if( m->ideal_Opcode() == Op_TailJump ) { | |
528 tail_jump_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0); | |
529 tail_jump_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1); | |
530 break; | |
531 } | |
532 } | |
533 | |
534 // Input RegMask array shared by all Halts | |
535 uint halt_edge_cnt = TypeFunc::Parms; | |
536 RegMask *halt_rms = init_input_masks( halt_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask ); | |
537 | |
538 // Capture the return input masks into each exit flavor | |
539 for( i=1; i < root->req(); i++ ) { | |
540 MachReturnNode *exit = root->in(i)->as_MachReturn(); | |
541 switch( exit->ideal_Opcode() ) { | |
542 case Op_Return : exit->_in_rms = ret_rms; break; | |
543 case Op_Rethrow : exit->_in_rms = reth_rms; break; | |
544 case Op_TailCall : exit->_in_rms = tail_call_rms; break; | |
545 case Op_TailJump : exit->_in_rms = tail_jump_rms; break; | |
546 case Op_Halt : exit->_in_rms = halt_rms; break; | |
547 default : ShouldNotReachHere(); | |
548 } | |
549 } | |
550 | |
551 // Next unused projection number from Start. | |
552 int proj_cnt = C->tf()->domain()->cnt(); | |
553 | |
554 // Do all the save-on-entry registers. Make projections from Start for | |
555 // them, and give them a use at the exit points. To the allocator, they | |
556 // look like incoming register arguments. | |
557 for( i = 0; i < _last_Mach_Reg; i++ ) { | |
558 if( is_save_on_entry(i) ) { | |
559 | |
560 // Add the save-on-entry to the mask array | |
561 ret_rms [ ret_edge_cnt] = mreg2regmask[i]; | |
562 reth_rms [ reth_edge_cnt] = mreg2regmask[i]; | |
563 tail_call_rms[tail_call_edge_cnt] = mreg2regmask[i]; | |
564 tail_jump_rms[tail_jump_edge_cnt] = mreg2regmask[i]; | |
565 // Halts need the SOE registers, but only in the stack as debug info. | |
566 // A just-prior uncommon-trap or deoptimization will use the SOE regs. | |
567 halt_rms [ halt_edge_cnt] = *idealreg2spillmask[_register_save_type[i]]; | |
568 | |
569 Node *mproj; | |
570 | |
571 // Is this a RegF low half of a RegD? Double up 2 adjacent RegF's | |
572 // into a single RegD. | |
573 if( (i&1) == 0 && | |
574 _register_save_type[i ] == Op_RegF && | |
575 _register_save_type[i+1] == Op_RegF && | |
576 is_save_on_entry(i+1) ) { | |
577 // Add other bit for double | |
578 ret_rms [ ret_edge_cnt].Insert(OptoReg::Name(i+1)); | |
579 reth_rms [ reth_edge_cnt].Insert(OptoReg::Name(i+1)); | |
580 tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1)); | |
581 tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1)); | |
582 halt_rms [ halt_edge_cnt].Insert(OptoReg::Name(i+1)); | |
583 mproj = new (C, 1) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegD ); | |
584 proj_cnt += 2; // Skip 2 for doubles | |
585 } | |
586 else if( (i&1) == 1 && // Else check for high half of double | |
587 _register_save_type[i-1] == Op_RegF && | |
588 _register_save_type[i ] == Op_RegF && | |
589 is_save_on_entry(i-1) ) { | |
590 ret_rms [ ret_edge_cnt] = RegMask::Empty; | |
591 reth_rms [ reth_edge_cnt] = RegMask::Empty; | |
592 tail_call_rms[tail_call_edge_cnt] = RegMask::Empty; | |
593 tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty; | |
594 halt_rms [ halt_edge_cnt] = RegMask::Empty; | |
595 mproj = C->top(); | |
596 } | |
597 // Is this a RegI low half of a RegL? Double up 2 adjacent RegI's | |
598 // into a single RegL. | |
599 else if( (i&1) == 0 && | |
600 _register_save_type[i ] == Op_RegI && | |
601 _register_save_type[i+1] == Op_RegI && | |
602 is_save_on_entry(i+1) ) { | |
603 // Add other bit for long | |
604 ret_rms [ ret_edge_cnt].Insert(OptoReg::Name(i+1)); | |
605 reth_rms [ reth_edge_cnt].Insert(OptoReg::Name(i+1)); | |
606 tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1)); | |
607 tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1)); | |
608 halt_rms [ halt_edge_cnt].Insert(OptoReg::Name(i+1)); | |
609 mproj = new (C, 1) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegL ); | |
610 proj_cnt += 2; // Skip 2 for longs | |
611 } | |
612 else if( (i&1) == 1 && // Else check for high half of long | |
613 _register_save_type[i-1] == Op_RegI && | |
614 _register_save_type[i ] == Op_RegI && | |
615 is_save_on_entry(i-1) ) { | |
616 ret_rms [ ret_edge_cnt] = RegMask::Empty; | |
617 reth_rms [ reth_edge_cnt] = RegMask::Empty; | |
618 tail_call_rms[tail_call_edge_cnt] = RegMask::Empty; | |
619 tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty; | |
620 halt_rms [ halt_edge_cnt] = RegMask::Empty; | |
621 mproj = C->top(); | |
622 } else { | |
623 // Make a projection for it off the Start | |
624 mproj = new (C, 1) MachProjNode( start, proj_cnt++, ret_rms[ret_edge_cnt], _register_save_type[i] ); | |
625 } | |
626 | |
627 ret_edge_cnt ++; | |
628 reth_edge_cnt ++; | |
629 tail_call_edge_cnt ++; | |
630 tail_jump_edge_cnt ++; | |
631 halt_edge_cnt ++; | |
632 | |
633 // Add a use of the SOE register to all exit paths | |
634 for( uint j=1; j < root->req(); j++ ) | |
635 root->in(j)->add_req(mproj); | |
636 } // End of if a save-on-entry register | |
637 } // End of for all machine registers | |
638 } | |
639 | |
640 //------------------------------init_spill_mask-------------------------------- | |
641 void Matcher::init_spill_mask( Node *ret ) { | |
642 if( idealreg2regmask[Op_RegI] ) return; // One time only init | |
643 | |
644 OptoReg::c_frame_pointer = c_frame_pointer(); | |
645 c_frame_ptr_mask = c_frame_pointer(); | |
646 #ifdef _LP64 | |
647 // pointers are twice as big | |
648 c_frame_ptr_mask.Insert(OptoReg::add(c_frame_pointer(),1)); | |
649 #endif | |
650 | |
651 // Start at OptoReg::stack0() | |
652 STACK_ONLY_mask.Clear(); | |
653 OptoReg::Name init = OptoReg::stack2reg(0); | |
654 // STACK_ONLY_mask is all stack bits | |
655 OptoReg::Name i; | |
656 for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1)) | |
657 STACK_ONLY_mask.Insert(i); | |
658 // Also set the "infinite stack" bit. | |
659 STACK_ONLY_mask.set_AllStack(); | |
660 | |
661 // Copy the register names over into the shared world | |
662 for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) { | |
663 // SharedInfo::regName[i] = regName[i]; | |
664 // Handy RegMasks per machine register | |
665 mreg2regmask[i].Insert(i); | |
666 } | |
667 | |
668 // Grab the Frame Pointer | |
669 Node *fp = ret->in(TypeFunc::FramePtr); | |
670 Node *mem = ret->in(TypeFunc::Memory); | |
671 const TypePtr* atp = TypePtr::BOTTOM; | |
672 // Share frame pointer while making spill ops | |
673 set_shared(fp); | |
674 | |
675 // Compute generic short-offset Loads | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
676 #ifdef _LP64 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
677 MachNode *spillCP = match_tree(new (C, 3) LoadNNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM)); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
678 #endif |
0 | 679 MachNode *spillI = match_tree(new (C, 3) LoadINode(NULL,mem,fp,atp)); |
680 MachNode *spillL = match_tree(new (C, 3) LoadLNode(NULL,mem,fp,atp)); | |
681 MachNode *spillF = match_tree(new (C, 3) LoadFNode(NULL,mem,fp,atp)); | |
682 MachNode *spillD = match_tree(new (C, 3) LoadDNode(NULL,mem,fp,atp)); | |
683 MachNode *spillP = match_tree(new (C, 3) LoadPNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM)); | |
684 assert(spillI != NULL && spillL != NULL && spillF != NULL && | |
685 spillD != NULL && spillP != NULL, ""); | |
686 | |
687 // Get the ADLC notion of the right regmask, for each basic type. | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
688 #ifdef _LP64 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
689 idealreg2regmask[Op_RegN] = &spillCP->out_RegMask(); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
690 #endif |
0 | 691 idealreg2regmask[Op_RegI] = &spillI->out_RegMask(); |
692 idealreg2regmask[Op_RegL] = &spillL->out_RegMask(); | |
693 idealreg2regmask[Op_RegF] = &spillF->out_RegMask(); | |
694 idealreg2regmask[Op_RegD] = &spillD->out_RegMask(); | |
695 idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); | |
696 } | |
697 | |
698 #ifdef ASSERT | |
699 static void match_alias_type(Compile* C, Node* n, Node* m) { | |
700 if (!VerifyAliases) return; // do not go looking for trouble by default | |
701 const TypePtr* nat = n->adr_type(); | |
702 const TypePtr* mat = m->adr_type(); | |
703 int nidx = C->get_alias_index(nat); | |
704 int midx = C->get_alias_index(mat); | |
705 // Detune the assert for cases like (AndI 0xFF (LoadB p)). | |
706 if (nidx == Compile::AliasIdxTop && midx >= Compile::AliasIdxRaw) { | |
707 for (uint i = 1; i < n->req(); i++) { | |
708 Node* n1 = n->in(i); | |
709 const TypePtr* n1at = n1->adr_type(); | |
710 if (n1at != NULL) { | |
711 nat = n1at; | |
712 nidx = C->get_alias_index(n1at); | |
713 } | |
714 } | |
715 } | |
716 // %%% Kludgery. Instead, fix ideal adr_type methods for all these cases: | |
717 if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxRaw) { | |
718 switch (n->Opcode()) { | |
719 case Op_PrefetchRead: | |
720 case Op_PrefetchWrite: | |
721 nidx = Compile::AliasIdxRaw; | |
722 nat = TypeRawPtr::BOTTOM; | |
723 break; | |
724 } | |
725 } | |
726 if (nidx == Compile::AliasIdxRaw && midx == Compile::AliasIdxTop) { | |
727 switch (n->Opcode()) { | |
728 case Op_ClearArray: | |
729 midx = Compile::AliasIdxRaw; | |
730 mat = TypeRawPtr::BOTTOM; | |
731 break; | |
732 } | |
733 } | |
734 if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxBot) { | |
735 switch (n->Opcode()) { | |
736 case Op_Return: | |
737 case Op_Rethrow: | |
738 case Op_Halt: | |
739 case Op_TailCall: | |
740 case Op_TailJump: | |
741 nidx = Compile::AliasIdxBot; | |
742 nat = TypePtr::BOTTOM; | |
743 break; | |
744 } | |
745 } | |
746 if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) { | |
747 switch (n->Opcode()) { | |
748 case Op_StrComp: | |
169
9148c65abefc
6695049: (coll) Create an x86 intrinsic for Arrays.equals
rasbold
parents:
168
diff
changeset
|
749 case Op_AryEq: |
0 | 750 case Op_MemBarVolatile: |
751 case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type? | |
752 nidx = Compile::AliasIdxTop; | |
753 nat = NULL; | |
754 break; | |
755 } | |
756 } | |
757 if (nidx != midx) { | |
758 if (PrintOpto || (PrintMiscellaneous && (WizardMode || Verbose))) { | |
759 tty->print_cr("==== Matcher alias shift %d => %d", nidx, midx); | |
760 n->dump(); | |
761 m->dump(); | |
762 } | |
763 assert(C->subsume_loads() && C->must_alias(nat, midx), | |
764 "must not lose alias info when matching"); | |
765 } | |
766 } | |
767 #endif | |
768 | |
769 | |
770 //------------------------------MStack----------------------------------------- | |
771 // State and MStack class used in xform() and find_shared() iterative methods. | |
772 enum Node_State { Pre_Visit, // node has to be pre-visited | |
773 Visit, // visit node | |
774 Post_Visit, // post-visit node | |
775 Alt_Post_Visit // alternative post-visit path | |
776 }; | |
777 | |
778 class MStack: public Node_Stack { | |
779 public: | |
780 MStack(int size) : Node_Stack(size) { } | |
781 | |
782 void push(Node *n, Node_State ns) { | |
783 Node_Stack::push(n, (uint)ns); | |
784 } | |
785 void push(Node *n, Node_State ns, Node *parent, int indx) { | |
786 ++_inode_top; | |
787 if ((_inode_top + 1) >= _inode_max) grow(); | |
788 _inode_top->node = parent; | |
789 _inode_top->indx = (uint)indx; | |
790 ++_inode_top; | |
791 _inode_top->node = n; | |
792 _inode_top->indx = (uint)ns; | |
793 } | |
794 Node *parent() { | |
795 pop(); | |
796 return node(); | |
797 } | |
798 Node_State state() const { | |
799 return (Node_State)index(); | |
800 } | |
801 void set_state(Node_State ns) { | |
802 set_index((uint)ns); | |
803 } | |
804 }; | |
805 | |
806 | |
807 //------------------------------xform------------------------------------------ | |
808 // Given a Node in old-space, Match him (Label/Reduce) to produce a machine | |
809 // Node in new-space. Given a new-space Node, recursively walk his children. | |
810 Node *Matcher::transform( Node *n ) { ShouldNotCallThis(); return n; } | |
811 Node *Matcher::xform( Node *n, int max_stack ) { | |
812 // Use one stack to keep both: child's node/state and parent's node/index | |
813 MStack mstack(max_stack * 2 * 2); // C->unique() * 2 * 2 | |
814 mstack.push(n, Visit, NULL, -1); // set NULL as parent to indicate root | |
815 | |
816 while (mstack.is_nonempty()) { | |
817 n = mstack.node(); // Leave node on stack | |
818 Node_State nstate = mstack.state(); | |
819 if (nstate == Visit) { | |
820 mstack.set_state(Post_Visit); | |
821 Node *oldn = n; | |
822 // Old-space or new-space check | |
823 if (!C->node_arena()->contains(n)) { | |
824 // Old space! | |
825 Node* m; | |
826 if (has_new_node(n)) { // Not yet Label/Reduced | |
827 m = new_node(n); | |
828 } else { | |
829 if (!is_dontcare(n)) { // Matcher can match this guy | |
830 // Calls match special. They match alone with no children. | |
831 // Their children, the incoming arguments, match normally. | |
832 m = n->is_SafePoint() ? match_sfpt(n->as_SafePoint()):match_tree(n); | |
833 if (C->failing()) return NULL; | |
834 if (m == NULL) { Matcher::soft_match_failure(); return NULL; } | |
835 } else { // Nothing the matcher cares about | |
836 if( n->is_Proj() && n->in(0)->is_Multi()) { // Projections? | |
837 // Convert to machine-dependent projection | |
838 m = n->in(0)->as_Multi()->match( n->as_Proj(), this ); | |
222 | 839 #ifdef ASSERT |
840 _new2old_map.map(m->_idx, n); | |
841 #endif | |
0 | 842 if (m->in(0) != NULL) // m might be top |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
843 collect_null_checks(m, n); |
0 | 844 } else { // Else just a regular 'ol guy |
845 m = n->clone(); // So just clone into new-space | |
222 | 846 #ifdef ASSERT |
847 _new2old_map.map(m->_idx, n); | |
848 #endif | |
0 | 849 // Def-Use edges will be added incrementally as Uses |
850 // of this node are matched. | |
851 assert(m->outcnt() == 0, "no Uses of this clone yet"); | |
852 } | |
853 } | |
854 | |
855 set_new_node(n, m); // Map old to new | |
856 if (_old_node_note_array != NULL) { | |
857 Node_Notes* nn = C->locate_node_notes(_old_node_note_array, | |
858 n->_idx); | |
859 C->set_node_notes_at(m->_idx, nn); | |
860 } | |
861 debug_only(match_alias_type(C, n, m)); | |
862 } | |
863 n = m; // n is now a new-space node | |
864 mstack.set_node(n); | |
865 } | |
866 | |
867 // New space! | |
868 if (_visited.test_set(n->_idx)) continue; // while(mstack.is_nonempty()) | |
869 | |
870 int i; | |
871 // Put precedence edges on stack first (match them last). | |
872 for (i = oldn->req(); (uint)i < oldn->len(); i++) { | |
873 Node *m = oldn->in(i); | |
874 if (m == NULL) break; | |
875 // set -1 to call add_prec() instead of set_req() during Step1 | |
876 mstack.push(m, Visit, n, -1); | |
877 } | |
878 | |
879 // For constant debug info, I'd rather have unmatched constants. | |
880 int cnt = n->req(); | |
881 JVMState* jvms = n->jvms(); | |
882 int debug_cnt = jvms ? jvms->debug_start() : cnt; | |
883 | |
884 // Now do only debug info. Clone constants rather than matching. | |
885 // Constants are represented directly in the debug info without | |
886 // the need for executable machine instructions. | |
887 // Monitor boxes are also represented directly. | |
888 for (i = cnt - 1; i >= debug_cnt; --i) { // For all debug inputs do | |
889 Node *m = n->in(i); // Get input | |
890 int op = m->Opcode(); | |
891 assert((op == Op_BoxLock) == jvms->is_monitor_use(i), "boxes only at monitor sites"); | |
163 | 892 if( op == Op_ConI || op == Op_ConP || op == Op_ConN || |
0 | 893 op == Op_ConF || op == Op_ConD || op == Op_ConL |
894 // || op == Op_BoxLock // %%%% enable this and remove (+++) in chaitin.cpp | |
895 ) { | |
896 m = m->clone(); | |
222 | 897 #ifdef ASSERT |
898 _new2old_map.map(m->_idx, n); | |
899 #endif | |
0 | 900 mstack.push(m, Post_Visit, n, i); // Don't neet to visit |
901 mstack.push(m->in(0), Visit, m, 0); | |
902 } else { | |
903 mstack.push(m, Visit, n, i); | |
904 } | |
905 } | |
906 | |
907 // And now walk his children, and convert his inputs to new-space. | |
908 for( ; i >= 0; --i ) { // For all normal inputs do | |
909 Node *m = n->in(i); // Get input | |
910 if(m != NULL) | |
911 mstack.push(m, Visit, n, i); | |
912 } | |
913 | |
914 } | |
915 else if (nstate == Post_Visit) { | |
916 // Set xformed input | |
917 Node *p = mstack.parent(); | |
918 if (p != NULL) { // root doesn't have parent | |
919 int i = (int)mstack.index(); | |
920 if (i >= 0) | |
921 p->set_req(i, n); // required input | |
922 else if (i == -1) | |
923 p->add_prec(n); // precedence input | |
924 else | |
925 ShouldNotReachHere(); | |
926 } | |
927 mstack.pop(); // remove processed node from stack | |
928 } | |
929 else { | |
930 ShouldNotReachHere(); | |
931 } | |
932 } // while (mstack.is_nonempty()) | |
933 return n; // Return new-space Node | |
934 } | |
935 | |
936 //------------------------------warp_outgoing_stk_arg------------------------ | |
937 OptoReg::Name Matcher::warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call ) { | |
938 // Convert outgoing argument location to a pre-biased stack offset | |
939 if (reg->is_stack()) { | |
940 OptoReg::Name warped = reg->reg2stack(); | |
941 // Adjust the stack slot offset to be the register number used | |
942 // by the allocator. | |
943 warped = OptoReg::add(begin_out_arg_area, warped); | |
944 // Keep track of the largest numbered stack slot used for an arg. | |
945 // Largest used slot per call-site indicates the amount of stack | |
946 // that is killed by the call. | |
947 if( warped >= out_arg_limit_per_call ) | |
948 out_arg_limit_per_call = OptoReg::add(warped,1); | |
949 if (!RegMask::can_represent(warped)) { | |
950 C->record_method_not_compilable_all_tiers("unsupported calling sequence"); | |
951 return OptoReg::Bad; | |
952 } | |
953 return warped; | |
954 } | |
955 return OptoReg::as_OptoReg(reg); | |
956 } | |
957 | |
958 | |
959 //------------------------------match_sfpt------------------------------------- | |
960 // Helper function to match call instructions. Calls match special. | |
961 // They match alone with no children. Their children, the incoming | |
962 // arguments, match normally. | |
963 MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) { | |
964 MachSafePointNode *msfpt = NULL; | |
965 MachCallNode *mcall = NULL; | |
966 uint cnt; | |
967 // Split out case for SafePoint vs Call | |
968 CallNode *call; | |
969 const TypeTuple *domain; | |
970 ciMethod* method = NULL; | |
971 if( sfpt->is_Call() ) { | |
972 call = sfpt->as_Call(); | |
973 domain = call->tf()->domain(); | |
974 cnt = domain->cnt(); | |
975 | |
976 // Match just the call, nothing else | |
977 MachNode *m = match_tree(call); | |
978 if (C->failing()) return NULL; | |
979 if( m == NULL ) { Matcher::soft_match_failure(); return NULL; } | |
980 | |
981 // Copy data from the Ideal SafePoint to the machine version | |
982 mcall = m->as_MachCall(); | |
983 | |
984 mcall->set_tf( call->tf()); | |
985 mcall->set_entry_point(call->entry_point()); | |
986 mcall->set_cnt( call->cnt()); | |
987 | |
988 if( mcall->is_MachCallJava() ) { | |
989 MachCallJavaNode *mcall_java = mcall->as_MachCallJava(); | |
990 const CallJavaNode *call_java = call->as_CallJava(); | |
991 method = call_java->method(); | |
992 mcall_java->_method = method; | |
993 mcall_java->_bci = call_java->_bci; | |
994 mcall_java->_optimized_virtual = call_java->is_optimized_virtual(); | |
995 if( mcall_java->is_MachCallStaticJava() ) | |
996 mcall_java->as_MachCallStaticJava()->_name = | |
997 call_java->as_CallStaticJava()->_name; | |
998 if( mcall_java->is_MachCallDynamicJava() ) | |
999 mcall_java->as_MachCallDynamicJava()->_vtable_index = | |
1000 call_java->as_CallDynamicJava()->_vtable_index; | |
1001 } | |
1002 else if( mcall->is_MachCallRuntime() ) { | |
1003 mcall->as_MachCallRuntime()->_name = call->as_CallRuntime()->_name; | |
1004 } | |
1005 msfpt = mcall; | |
1006 } | |
1007 // This is a non-call safepoint | |
1008 else { | |
1009 call = NULL; | |
1010 domain = NULL; | |
1011 MachNode *mn = match_tree(sfpt); | |
1012 if (C->failing()) return NULL; | |
1013 msfpt = mn->as_MachSafePoint(); | |
1014 cnt = TypeFunc::Parms; | |
1015 } | |
1016 | |
1017 // Advertise the correct memory effects (for anti-dependence computation). | |
1018 msfpt->set_adr_type(sfpt->adr_type()); | |
1019 | |
1020 // Allocate a private array of RegMasks. These RegMasks are not shared. | |
1021 msfpt->_in_rms = NEW_RESOURCE_ARRAY( RegMask, cnt ); | |
1022 // Empty them all. | |
1023 memset( msfpt->_in_rms, 0, sizeof(RegMask)*cnt ); | |
1024 | |
1025 // Do all the pre-defined non-Empty register masks | |
1026 msfpt->_in_rms[TypeFunc::ReturnAdr] = _return_addr_mask; | |
1027 msfpt->_in_rms[TypeFunc::FramePtr ] = c_frame_ptr_mask; | |
1028 | |
1029 // Place first outgoing argument can possibly be put. | |
1030 OptoReg::Name begin_out_arg_area = OptoReg::add(_new_SP, C->out_preserve_stack_slots()); | |
1031 assert( is_even(begin_out_arg_area), "" ); | |
1032 // Compute max outgoing register number per call site. | |
1033 OptoReg::Name out_arg_limit_per_call = begin_out_arg_area; | |
1034 // Calls to C may hammer extra stack slots above and beyond any arguments. | |
1035 // These are usually backing store for register arguments for varargs. | |
1036 if( call != NULL && call->is_CallRuntime() ) | |
1037 out_arg_limit_per_call = OptoReg::add(out_arg_limit_per_call,C->varargs_C_out_slots_killed()); | |
1038 | |
1039 | |
1040 // Do the normal argument list (parameters) register masks | |
1041 int argcnt = cnt - TypeFunc::Parms; | |
1042 if( argcnt > 0 ) { // Skip it all if we have no args | |
1043 BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, argcnt ); | |
1044 VMRegPair *parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt ); | |
1045 int i; | |
1046 for( i = 0; i < argcnt; i++ ) { | |
1047 sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type(); | |
1048 } | |
1049 // V-call to pick proper calling convention | |
1050 call->calling_convention( sig_bt, parm_regs, argcnt ); | |
1051 | |
1052 #ifdef ASSERT | |
1053 // Sanity check users' calling convention. Really handy during | |
1054 // the initial porting effort. Fairly expensive otherwise. | |
1055 { for (int i = 0; i<argcnt; i++) { | |
1056 if( !parm_regs[i].first()->is_valid() && | |
1057 !parm_regs[i].second()->is_valid() ) continue; | |
1058 VMReg reg1 = parm_regs[i].first(); | |
1059 VMReg reg2 = parm_regs[i].second(); | |
1060 for (int j = 0; j < i; j++) { | |
1061 if( !parm_regs[j].first()->is_valid() && | |
1062 !parm_regs[j].second()->is_valid() ) continue; | |
1063 VMReg reg3 = parm_regs[j].first(); | |
1064 VMReg reg4 = parm_regs[j].second(); | |
1065 if( !reg1->is_valid() ) { | |
1066 assert( !reg2->is_valid(), "valid halvsies" ); | |
1067 } else if( !reg3->is_valid() ) { | |
1068 assert( !reg4->is_valid(), "valid halvsies" ); | |
1069 } else { | |
1070 assert( reg1 != reg2, "calling conv. must produce distinct regs"); | |
1071 assert( reg1 != reg3, "calling conv. must produce distinct regs"); | |
1072 assert( reg1 != reg4, "calling conv. must produce distinct regs"); | |
1073 assert( reg2 != reg3, "calling conv. must produce distinct regs"); | |
1074 assert( reg2 != reg4 || !reg2->is_valid(), "calling conv. must produce distinct regs"); | |
1075 assert( reg3 != reg4, "calling conv. must produce distinct regs"); | |
1076 } | |
1077 } | |
1078 } | |
1079 } | |
1080 #endif | |
1081 | |
1082 // Visit each argument. Compute its outgoing register mask. | |
1083 // Return results now can have 2 bits returned. | |
1084 // Compute max over all outgoing arguments both per call-site | |
1085 // and over the entire method. | |
1086 for( i = 0; i < argcnt; i++ ) { | |
1087 // Address of incoming argument mask to fill in | |
1088 RegMask *rm = &mcall->_in_rms[i+TypeFunc::Parms]; | |
1089 if( !parm_regs[i].first()->is_valid() && | |
1090 !parm_regs[i].second()->is_valid() ) { | |
1091 continue; // Avoid Halves | |
1092 } | |
1093 // Grab first register, adjust stack slots and insert in mask. | |
1094 OptoReg::Name reg1 = warp_outgoing_stk_arg(parm_regs[i].first(), begin_out_arg_area, out_arg_limit_per_call ); | |
1095 if (OptoReg::is_valid(reg1)) | |
1096 rm->Insert( reg1 ); | |
1097 // Grab second register (if any), adjust stack slots and insert in mask. | |
1098 OptoReg::Name reg2 = warp_outgoing_stk_arg(parm_regs[i].second(), begin_out_arg_area, out_arg_limit_per_call ); | |
1099 if (OptoReg::is_valid(reg2)) | |
1100 rm->Insert( reg2 ); | |
1101 } // End of for all arguments | |
1102 | |
1103 // Compute number of stack slots needed to restore stack in case of | |
1104 // Pascal-style argument popping. | |
1105 mcall->_argsize = out_arg_limit_per_call - begin_out_arg_area; | |
1106 } | |
1107 | |
1108 // Compute the max stack slot killed by any call. These will not be | |
1109 // available for debug info, and will be used to adjust FIRST_STACK_mask | |
1110 // after all call sites have been visited. | |
1111 if( _out_arg_limit < out_arg_limit_per_call) | |
1112 _out_arg_limit = out_arg_limit_per_call; | |
1113 | |
1114 if (mcall) { | |
1115 // Kill the outgoing argument area, including any non-argument holes and | |
1116 // any legacy C-killed slots. Use Fat-Projections to do the killing. | |
1117 // Since the max-per-method covers the max-per-call-site and debug info | |
1118 // is excluded on the max-per-method basis, debug info cannot land in | |
1119 // this killed area. | |
1120 uint r_cnt = mcall->tf()->range()->cnt(); | |
1121 MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj ); | |
1122 if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) { | |
1123 C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence"); | |
1124 } else { | |
1125 for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++) | |
1126 proj->_rout.Insert(OptoReg::Name(i)); | |
1127 } | |
1128 if( proj->_rout.is_NotEmpty() ) | |
1129 _proj_list.push(proj); | |
1130 } | |
1131 // Transfer the safepoint information from the call to the mcall | |
1132 // Move the JVMState list | |
1133 msfpt->set_jvms(sfpt->jvms()); | |
1134 for (JVMState* jvms = msfpt->jvms(); jvms; jvms = jvms->caller()) { | |
1135 jvms->set_map(sfpt); | |
1136 } | |
1137 | |
1138 // Debug inputs begin just after the last incoming parameter | |
1139 assert( (mcall == NULL) || (mcall->jvms() == NULL) || | |
1140 (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain()->cnt()), "" ); | |
1141 | |
1142 // Move the OopMap | |
1143 msfpt->_oop_map = sfpt->_oop_map; | |
1144 | |
1145 // Registers killed by the call are set in the local scheduling pass | |
1146 // of Global Code Motion. | |
1147 return msfpt; | |
1148 } | |
1149 | |
1150 //---------------------------match_tree---------------------------------------- | |
1151 // Match a Ideal Node DAG - turn it into a tree; Label & Reduce. Used as part | |
1152 // of the whole-sale conversion from Ideal to Mach Nodes. Also used for | |
1153 // making GotoNodes while building the CFG and in init_spill_mask() to identify | |
1154 // a Load's result RegMask for memoization in idealreg2regmask[] | |
1155 MachNode *Matcher::match_tree( const Node *n ) { | |
1156 assert( n->Opcode() != Op_Phi, "cannot match" ); | |
1157 assert( !n->is_block_start(), "cannot match" ); | |
1158 // Set the mark for all locally allocated State objects. | |
1159 // When this call returns, the _states_arena arena will be reset | |
1160 // freeing all State objects. | |
1161 ResourceMark rm( &_states_arena ); | |
1162 | |
1163 LabelRootDepth = 0; | |
1164 | |
1165 // StoreNodes require their Memory input to match any LoadNodes | |
1166 Node *mem = n->is_Store() ? n->in(MemNode::Memory) : (Node*)1 ; | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1167 #ifdef ASSERT |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1168 Node* save_mem_node = _mem_node; |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1169 _mem_node = n->is_Store() ? (Node*)n : NULL; |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1170 #endif |
0 | 1171 // State object for root node of match tree |
1172 // Allocate it on _states_arena - stack allocation can cause stack overflow. | |
1173 State *s = new (&_states_arena) State; | |
1174 s->_kids[0] = NULL; | |
1175 s->_kids[1] = NULL; | |
1176 s->_leaf = (Node*)n; | |
1177 // Label the input tree, allocating labels from top-level arena | |
1178 Label_Root( n, s, n->in(0), mem ); | |
1179 if (C->failing()) return NULL; | |
1180 | |
1181 // The minimum cost match for the whole tree is found at the root State | |
1182 uint mincost = max_juint; | |
1183 uint cost = max_juint; | |
1184 uint i; | |
1185 for( i = 0; i < NUM_OPERANDS; i++ ) { | |
1186 if( s->valid(i) && // valid entry and | |
1187 s->_cost[i] < cost && // low cost and | |
1188 s->_rule[i] >= NUM_OPERANDS ) // not an operand | |
1189 cost = s->_cost[mincost=i]; | |
1190 } | |
1191 if (mincost == max_juint) { | |
1192 #ifndef PRODUCT | |
1193 tty->print("No matching rule for:"); | |
1194 s->dump(); | |
1195 #endif | |
1196 Matcher::soft_match_failure(); | |
1197 return NULL; | |
1198 } | |
1199 // Reduce input tree based upon the state labels to machine Nodes | |
1200 MachNode *m = ReduceInst( s, s->_rule[mincost], mem ); | |
1201 #ifdef ASSERT | |
1202 _old2new_map.map(n->_idx, m); | |
222 | 1203 _new2old_map.map(m->_idx, (Node*)n); |
0 | 1204 #endif |
1205 | |
1206 // Add any Matcher-ignored edges | |
1207 uint cnt = n->req(); | |
1208 uint start = 1; | |
1209 if( mem != (Node*)1 ) start = MemNode::Memory+1; | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1210 if( n->is_AddP() ) { |
0 | 1211 assert( mem == (Node*)1, "" ); |
1212 start = AddPNode::Base+1; | |
1213 } | |
1214 for( i = start; i < cnt; i++ ) { | |
1215 if( !n->match_edge(i) ) { | |
1216 if( i < m->req() ) | |
1217 m->ins_req( i, n->in(i) ); | |
1218 else | |
1219 m->add_req( n->in(i) ); | |
1220 } | |
1221 } | |
1222 | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1223 debug_only( _mem_node = save_mem_node; ) |
0 | 1224 return m; |
1225 } | |
1226 | |
1227 | |
1228 //------------------------------match_into_reg--------------------------------- | |
1229 // Choose to either match this Node in a register or part of the current | |
1230 // match tree. Return true for requiring a register and false for matching | |
1231 // as part of the current match tree. | |
1232 static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool shared ) { | |
1233 | |
1234 const Type *t = m->bottom_type(); | |
1235 | |
1236 if( t->singleton() ) { | |
1237 // Never force constants into registers. Allow them to match as | |
1238 // constants or registers. Copies of the same value will share | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1239 // the same register. See find_shared_node. |
0 | 1240 return false; |
1241 } else { // Not a constant | |
1242 // Stop recursion if they have different Controls. | |
1243 // Slot 0 of constants is not really a Control. | |
1244 if( control && m->in(0) && control != m->in(0) ) { | |
1245 | |
1246 // Actually, we can live with the most conservative control we | |
1247 // find, if it post-dominates the others. This allows us to | |
1248 // pick up load/op/store trees where the load can float a little | |
1249 // above the store. | |
1250 Node *x = control; | |
1251 const uint max_scan = 6; // Arbitrary scan cutoff | |
1252 uint j; | |
1253 for( j=0; j<max_scan; j++ ) { | |
1254 if( x->is_Region() ) // Bail out at merge points | |
1255 return true; | |
1256 x = x->in(0); | |
1257 if( x == m->in(0) ) // Does 'control' post-dominate | |
1258 break; // m->in(0)? If so, we can use it | |
1259 } | |
1260 if( j == max_scan ) // No post-domination before scan end? | |
1261 return true; // Then break the match tree up | |
1262 } | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1263 if (m->is_DecodeN() && Matcher::clone_shift_expressions) { |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
1264 // These are commonly used in address expressions and can |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1265 // efficiently fold into them on X64 in some cases. |
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1266 return false; |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
1267 } |
0 | 1268 } |
1269 | |
1270 // Not forceably cloning. If shared, put it into a register. | |
1271 return shared; | |
1272 } | |
1273 | |
1274 | |
1275 //------------------------------Instruction Selection-------------------------- | |
1276 // Label method walks a "tree" of nodes, using the ADLC generated DFA to match | |
1277 // ideal nodes to machine instructions. Trees are delimited by shared Nodes, | |
1278 // things the Matcher does not match (e.g., Memory), and things with different | |
1279 // Controls (hence forced into different blocks). We pass in the Control | |
1280 // selected for this entire State tree. | |
1281 | |
1282 // The Matcher works on Trees, but an Intel add-to-memory requires a DAG: the | |
1283 // Store and the Load must have identical Memories (as well as identical | |
1284 // pointers). Since the Matcher does not have anything for Memory (and | |
1285 // does not handle DAGs), I have to match the Memory input myself. If the | |
1286 // Tree root is a Store, I require all Loads to have the identical memory. | |
1287 Node *Matcher::Label_Root( const Node *n, State *svec, Node *control, const Node *mem){ | |
1288 // Since Label_Root is a recursive function, its possible that we might run | |
1289 // out of stack space. See bugs 6272980 & 6227033 for more info. | |
1290 LabelRootDepth++; | |
1291 if (LabelRootDepth > MaxLabelRootDepth) { | |
1292 C->record_method_not_compilable_all_tiers("Out of stack space, increase MaxLabelRootDepth"); | |
1293 return NULL; | |
1294 } | |
1295 uint care = 0; // Edges matcher cares about | |
1296 uint cnt = n->req(); | |
1297 uint i = 0; | |
1298 | |
1299 // Examine children for memory state | |
1300 // Can only subsume a child into your match-tree if that child's memory state | |
1301 // is not modified along the path to another input. | |
1302 // It is unsafe even if the other inputs are separate roots. | |
1303 Node *input_mem = NULL; | |
1304 for( i = 1; i < cnt; i++ ) { | |
1305 if( !n->match_edge(i) ) continue; | |
1306 Node *m = n->in(i); // Get ith input | |
1307 assert( m, "expect non-null children" ); | |
1308 if( m->is_Load() ) { | |
1309 if( input_mem == NULL ) { | |
1310 input_mem = m->in(MemNode::Memory); | |
1311 } else if( input_mem != m->in(MemNode::Memory) ) { | |
1312 input_mem = NodeSentinel; | |
1313 } | |
1314 } | |
1315 } | |
1316 | |
1317 for( i = 1; i < cnt; i++ ){// For my children | |
1318 if( !n->match_edge(i) ) continue; | |
1319 Node *m = n->in(i); // Get ith input | |
1320 // Allocate states out of a private arena | |
1321 State *s = new (&_states_arena) State; | |
1322 svec->_kids[care++] = s; | |
1323 assert( care <= 2, "binary only for now" ); | |
1324 | |
1325 // Recursively label the State tree. | |
1326 s->_kids[0] = NULL; | |
1327 s->_kids[1] = NULL; | |
1328 s->_leaf = m; | |
1329 | |
1330 // Check for leaves of the State Tree; things that cannot be a part of | |
1331 // the current tree. If it finds any, that value is matched as a | |
1332 // register operand. If not, then the normal matching is used. | |
1333 if( match_into_reg(n, m, control, i, is_shared(m)) || | |
1334 // | |
1335 // Stop recursion if this is LoadNode and the root of this tree is a | |
1336 // StoreNode and the load & store have different memories. | |
1337 ((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) || | |
1338 // Can NOT include the match of a subtree when its memory state | |
1339 // is used by any of the other subtrees | |
1340 (input_mem == NodeSentinel) ) { | |
1341 #ifndef PRODUCT | |
1342 // Print when we exclude matching due to different memory states at input-loads | |
1343 if( PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel) | |
1344 && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ) { | |
1345 tty->print_cr("invalid input_mem"); | |
1346 } | |
1347 #endif | |
1348 // Switch to a register-only opcode; this value must be in a register | |
1349 // and cannot be subsumed as part of a larger instruction. | |
1350 s->DFA( m->ideal_reg(), m ); | |
1351 | |
1352 } else { | |
1353 // If match tree has no control and we do, adopt it for entire tree | |
1354 if( control == NULL && m->in(0) != NULL && m->req() > 1 ) | |
1355 control = m->in(0); // Pick up control | |
1356 // Else match as a normal part of the match tree. | |
1357 control = Label_Root(m,s,control,mem); | |
1358 if (C->failing()) return NULL; | |
1359 } | |
1360 } | |
1361 | |
1362 | |
1363 // Call DFA to match this node, and return | |
1364 svec->DFA( n->Opcode(), n ); | |
1365 | |
1366 #ifdef ASSERT | |
1367 uint x; | |
1368 for( x = 0; x < _LAST_MACH_OPER; x++ ) | |
1369 if( svec->valid(x) ) | |
1370 break; | |
1371 | |
1372 if (x >= _LAST_MACH_OPER) { | |
1373 n->dump(); | |
1374 svec->dump(); | |
1375 assert( false, "bad AD file" ); | |
1376 } | |
1377 #endif | |
1378 return control; | |
1379 } | |
1380 | |
1381 | |
1382 // Con nodes reduced using the same rule can share their MachNode | |
1383 // which reduces the number of copies of a constant in the final | |
1384 // program. The register allocator is free to split uses later to | |
1385 // split live ranges. | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1386 MachNode* Matcher::find_shared_node(Node* leaf, uint rule) { |
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1387 if (!leaf->is_Con() && !leaf->is_DecodeN()) return NULL; |
0 | 1388 |
1389 // See if this Con has already been reduced using this rule. | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1390 if (_shared_nodes.Size() <= leaf->_idx) return NULL; |
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1391 MachNode* last = (MachNode*)_shared_nodes.at(leaf->_idx); |
0 | 1392 if (last != NULL && rule == last->rule()) { |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1393 // Don't expect control change for DecodeN |
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1394 if (leaf->is_DecodeN()) |
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1395 return last; |
0 | 1396 // Get the new space root. |
1397 Node* xroot = new_node(C->root()); | |
1398 if (xroot == NULL) { | |
1399 // This shouldn't happen give the order of matching. | |
1400 return NULL; | |
1401 } | |
1402 | |
1403 // Shared constants need to have their control be root so they | |
1404 // can be scheduled properly. | |
1405 Node* control = last->in(0); | |
1406 if (control != xroot) { | |
1407 if (control == NULL || control == C->root()) { | |
1408 last->set_req(0, xroot); | |
1409 } else { | |
1410 assert(false, "unexpected control"); | |
1411 return NULL; | |
1412 } | |
1413 } | |
1414 return last; | |
1415 } | |
1416 return NULL; | |
1417 } | |
1418 | |
1419 | |
1420 //------------------------------ReduceInst------------------------------------- | |
1421 // Reduce a State tree (with given Control) into a tree of MachNodes. | |
1422 // This routine (and it's cohort ReduceOper) convert Ideal Nodes into | |
1423 // complicated machine Nodes. Each MachNode covers some tree of Ideal Nodes. | |
1424 // Each MachNode has a number of complicated MachOper operands; each | |
1425 // MachOper also covers a further tree of Ideal Nodes. | |
1426 | |
1427 // The root of the Ideal match tree is always an instruction, so we enter | |
1428 // the recursion here. After building the MachNode, we need to recurse | |
1429 // the tree checking for these cases: | |
1430 // (1) Child is an instruction - | |
1431 // Build the instruction (recursively), add it as an edge. | |
1432 // Build a simple operand (register) to hold the result of the instruction. | |
1433 // (2) Child is an interior part of an instruction - | |
1434 // Skip over it (do nothing) | |
1435 // (3) Child is the start of a operand - | |
1436 // Build the operand, place it inside the instruction | |
1437 // Call ReduceOper. | |
1438 MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) { | |
1439 assert( rule >= NUM_OPERANDS, "called with operand rule" ); | |
1440 | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1441 MachNode* shared_node = find_shared_node(s->_leaf, rule); |
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1442 if (shared_node != NULL) { |
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1443 return shared_node; |
0 | 1444 } |
1445 | |
1446 // Build the object to represent this state & prepare for recursive calls | |
1447 MachNode *mach = s->MachNodeGenerator( rule, C ); | |
1448 mach->_opnds[0] = s->MachOperGenerator( _reduceOp[rule], C ); | |
1449 assert( mach->_opnds[0] != NULL, "Missing result operand" ); | |
1450 Node *leaf = s->_leaf; | |
1451 // Check for instruction or instruction chain rule | |
1452 if( rule >= _END_INST_CHAIN_RULE || rule < _BEGIN_INST_CHAIN_RULE ) { | |
309
eaf496ad4a14
6732698: crash with dead code from compressed oops in gcm
never
parents:
235
diff
changeset
|
1453 assert(C->node_arena()->contains(s->_leaf) || !has_new_node(s->_leaf), |
eaf496ad4a14
6732698: crash with dead code from compressed oops in gcm
never
parents:
235
diff
changeset
|
1454 "duplicating node that's already been matched"); |
0 | 1455 // Instruction |
1456 mach->add_req( leaf->in(0) ); // Set initial control | |
1457 // Reduce interior of complex instruction | |
1458 ReduceInst_Interior( s, rule, mem, mach, 1 ); | |
1459 } else { | |
1460 // Instruction chain rules are data-dependent on their inputs | |
1461 mach->add_req(0); // Set initial control to none | |
1462 ReduceInst_Chain_Rule( s, rule, mem, mach ); | |
1463 } | |
1464 | |
1465 // If a Memory was used, insert a Memory edge | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1466 if( mem != (Node*)1 ) { |
0 | 1467 mach->ins_req(MemNode::Memory,mem); |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1468 #ifdef ASSERT |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1469 // Verify adr type after matching memory operation |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1470 const MachOper* oper = mach->memory_operand(); |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1471 if (oper != NULL && oper != (MachOper*)-1 && |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1472 mach->adr_type() != TypeRawPtr::BOTTOM) { // non-direct addressing mode |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1473 // It has a unique memory operand. Find corresponding ideal mem node. |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1474 Node* m = NULL; |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1475 if (leaf->is_Mem()) { |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1476 m = leaf; |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1477 } else { |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1478 m = _mem_node; |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1479 assert(m != NULL && m->is_Mem(), "expecting memory node"); |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1480 } |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
1481 const Type* mach_at = mach->adr_type(); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
1482 // DecodeN node consumed by an address may have different type |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
1483 // then its input. Don't compare types for such case. |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
1484 if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() && |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
1485 m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
1486 mach_at = m->adr_type(); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
1487 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
1488 if (m->adr_type() != mach_at) { |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1489 m->dump(); |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1490 tty->print_cr("mach:"); |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1491 mach->dump(1); |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1492 } |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
1493 assert(m->adr_type() == mach_at, "matcher should not change adr type"); |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1494 } |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1495 #endif |
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1496 } |
0 | 1497 |
1498 // If the _leaf is an AddP, insert the base edge | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1499 if( leaf->is_AddP() ) |
0 | 1500 mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base)); |
1501 | |
1502 uint num_proj = _proj_list.size(); | |
1503 | |
1504 // Perform any 1-to-many expansions required | |
1505 MachNode *ex = mach->Expand(s,_proj_list); | |
1506 if( ex != mach ) { | |
1507 assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match"); | |
1508 if( ex->in(1)->is_Con() ) | |
1509 ex->in(1)->set_req(0, C->root()); | |
1510 // Remove old node from the graph | |
1511 for( uint i=0; i<mach->req(); i++ ) { | |
1512 mach->set_req(i,NULL); | |
1513 } | |
222 | 1514 #ifdef ASSERT |
1515 _new2old_map.map(ex->_idx, s->_leaf); | |
1516 #endif | |
0 | 1517 } |
1518 | |
1519 // PhaseChaitin::fixup_spills will sometimes generate spill code | |
1520 // via the matcher. By the time, nodes have been wired into the CFG, | |
1521 // and any further nodes generated by expand rules will be left hanging | |
1522 // in space, and will not get emitted as output code. Catch this. | |
1523 // Also, catch any new register allocation constraints ("projections") | |
1524 // generated belatedly during spill code generation. | |
1525 if (_allocation_started) { | |
1526 guarantee(ex == mach, "no expand rules during spill generation"); | |
1527 guarantee(_proj_list.size() == num_proj, "no allocation during spill generation"); | |
1528 } | |
1529 | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1530 if (leaf->is_Con() || leaf->is_DecodeN()) { |
0 | 1531 // Record the con for sharing |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1532 _shared_nodes.map(leaf->_idx, ex); |
0 | 1533 } |
1534 | |
1535 return ex; | |
1536 } | |
1537 | |
1538 void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach ) { | |
1539 // 'op' is what I am expecting to receive | |
1540 int op = _leftOp[rule]; | |
1541 // Operand type to catch childs result | |
1542 // This is what my child will give me. | |
1543 int opnd_class_instance = s->_rule[op]; | |
1544 // Choose between operand class or not. | |
1545 // This is what I will recieve. | |
1546 int catch_op = (FIRST_OPERAND_CLASS <= op && op < NUM_OPERANDS) ? opnd_class_instance : op; | |
1547 // New rule for child. Chase operand classes to get the actual rule. | |
1548 int newrule = s->_rule[catch_op]; | |
1549 | |
1550 if( newrule < NUM_OPERANDS ) { | |
1551 // Chain from operand or operand class, may be output of shared node | |
1552 assert( 0 <= opnd_class_instance && opnd_class_instance < NUM_OPERANDS, | |
1553 "Bad AD file: Instruction chain rule must chain from operand"); | |
1554 // Insert operand into array of operands for this instruction | |
1555 mach->_opnds[1] = s->MachOperGenerator( opnd_class_instance, C ); | |
1556 | |
1557 ReduceOper( s, newrule, mem, mach ); | |
1558 } else { | |
1559 // Chain from the result of an instruction | |
1560 assert( newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand"); | |
1561 mach->_opnds[1] = s->MachOperGenerator( _reduceOp[catch_op], C ); | |
1562 Node *mem1 = (Node*)1; | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1563 debug_only(Node *save_mem_node = _mem_node;) |
0 | 1564 mach->add_req( ReduceInst(s, newrule, mem1) ); |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1565 debug_only(_mem_node = save_mem_node;) |
0 | 1566 } |
1567 return; | |
1568 } | |
1569 | |
1570 | |
1571 uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds ) { | |
1572 if( s->_leaf->is_Load() ) { | |
1573 Node *mem2 = s->_leaf->in(MemNode::Memory); | |
1574 assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" ); | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1575 debug_only( if( mem == (Node*)1 ) _mem_node = s->_leaf;) |
0 | 1576 mem = mem2; |
1577 } | |
1578 if( s->_leaf->in(0) != NULL && s->_leaf->req() > 1) { | |
1579 if( mach->in(0) == NULL ) | |
1580 mach->set_req(0, s->_leaf->in(0)); | |
1581 } | |
1582 | |
1583 // Now recursively walk the state tree & add operand list. | |
1584 for( uint i=0; i<2; i++ ) { // binary tree | |
1585 State *newstate = s->_kids[i]; | |
1586 if( newstate == NULL ) break; // Might only have 1 child | |
1587 // 'op' is what I am expecting to receive | |
1588 int op; | |
1589 if( i == 0 ) { | |
1590 op = _leftOp[rule]; | |
1591 } else { | |
1592 op = _rightOp[rule]; | |
1593 } | |
1594 // Operand type to catch childs result | |
1595 // This is what my child will give me. | |
1596 int opnd_class_instance = newstate->_rule[op]; | |
1597 // Choose between operand class or not. | |
1598 // This is what I will receive. | |
1599 int catch_op = (op >= FIRST_OPERAND_CLASS && op < NUM_OPERANDS) ? opnd_class_instance : op; | |
1600 // New rule for child. Chase operand classes to get the actual rule. | |
1601 int newrule = newstate->_rule[catch_op]; | |
1602 | |
1603 if( newrule < NUM_OPERANDS ) { // Operand/operandClass or internalOp/instruction? | |
1604 // Operand/operandClass | |
1605 // Insert operand into array of operands for this instruction | |
1606 mach->_opnds[num_opnds++] = newstate->MachOperGenerator( opnd_class_instance, C ); | |
1607 ReduceOper( newstate, newrule, mem, mach ); | |
1608 | |
1609 } else { // Child is internal operand or new instruction | |
1610 if( newrule < _LAST_MACH_OPER ) { // internal operand or instruction? | |
1611 // internal operand --> call ReduceInst_Interior | |
1612 // Interior of complex instruction. Do nothing but recurse. | |
1613 num_opnds = ReduceInst_Interior( newstate, newrule, mem, mach, num_opnds ); | |
1614 } else { | |
1615 // instruction --> call build operand( ) to catch result | |
1616 // --> ReduceInst( newrule ) | |
1617 mach->_opnds[num_opnds++] = s->MachOperGenerator( _reduceOp[catch_op], C ); | |
1618 Node *mem1 = (Node*)1; | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1619 debug_only(Node *save_mem_node = _mem_node;) |
0 | 1620 mach->add_req( ReduceInst( newstate, newrule, mem1 ) ); |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1621 debug_only(_mem_node = save_mem_node;) |
0 | 1622 } |
1623 } | |
1624 assert( mach->_opnds[num_opnds-1], "" ); | |
1625 } | |
1626 return num_opnds; | |
1627 } | |
1628 | |
1629 // This routine walks the interior of possible complex operands. | |
1630 // At each point we check our children in the match tree: | |
1631 // (1) No children - | |
1632 // We are a leaf; add _leaf field as an input to the MachNode | |
1633 // (2) Child is an internal operand - | |
1634 // Skip over it ( do nothing ) | |
1635 // (3) Child is an instruction - | |
1636 // Call ReduceInst recursively and | |
1637 // and instruction as an input to the MachNode | |
1638 void Matcher::ReduceOper( State *s, int rule, Node *&mem, MachNode *mach ) { | |
1639 assert( rule < _LAST_MACH_OPER, "called with operand rule" ); | |
1640 State *kid = s->_kids[0]; | |
1641 assert( kid == NULL || s->_leaf->in(0) == NULL, "internal operands have no control" ); | |
1642 | |
1643 // Leaf? And not subsumed? | |
1644 if( kid == NULL && !_swallowed[rule] ) { | |
1645 mach->add_req( s->_leaf ); // Add leaf pointer | |
1646 return; // Bail out | |
1647 } | |
1648 | |
1649 if( s->_leaf->is_Load() ) { | |
1650 assert( mem == (Node*)1, "multiple Memories being matched at once?" ); | |
1651 mem = s->_leaf->in(MemNode::Memory); | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1652 debug_only(_mem_node = s->_leaf;) |
0 | 1653 } |
1654 if( s->_leaf->in(0) && s->_leaf->req() > 1) { | |
1655 if( !mach->in(0) ) | |
1656 mach->set_req(0,s->_leaf->in(0)); | |
1657 else { | |
1658 assert( s->_leaf->in(0) == mach->in(0), "same instruction, differing controls?" ); | |
1659 } | |
1660 } | |
1661 | |
1662 for( uint i=0; kid != NULL && i<2; kid = s->_kids[1], i++ ) { // binary tree | |
1663 int newrule; | |
1664 if( i == 0 ) | |
1665 newrule = kid->_rule[_leftOp[rule]]; | |
1666 else | |
1667 newrule = kid->_rule[_rightOp[rule]]; | |
1668 | |
1669 if( newrule < _LAST_MACH_OPER ) { // Operand or instruction? | |
1670 // Internal operand; recurse but do nothing else | |
1671 ReduceOper( kid, newrule, mem, mach ); | |
1672 | |
1673 } else { // Child is a new instruction | |
1674 // Reduce the instruction, and add a direct pointer from this | |
1675 // machine instruction to the newly reduced one. | |
1676 Node *mem1 = (Node*)1; | |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1677 debug_only(Node *save_mem_node = _mem_node;) |
0 | 1678 mach->add_req( ReduceInst( kid, newrule, mem1 ) ); |
216
8d191a7697e2
6715633: when matching a memory node the adr_type should not change
kvn
parents:
169
diff
changeset
|
1679 debug_only(_mem_node = save_mem_node;) |
0 | 1680 } |
1681 } | |
1682 } | |
1683 | |
1684 | |
1685 // ------------------------------------------------------------------------- | |
1686 // Java-Java calling convention | |
1687 // (what you use when Java calls Java) | |
1688 | |
1689 //------------------------------find_receiver---------------------------------- | |
1690 // For a given signature, return the OptoReg for parameter 0. | |
1691 OptoReg::Name Matcher::find_receiver( bool is_outgoing ) { | |
1692 VMRegPair regs; | |
1693 BasicType sig_bt = T_OBJECT; | |
1694 calling_convention(&sig_bt, ®s, 1, is_outgoing); | |
1695 // Return argument 0 register. In the LP64 build pointers | |
1696 // take 2 registers, but the VM wants only the 'main' name. | |
1697 return OptoReg::as_OptoReg(regs.first()); | |
1698 } | |
1699 | |
1700 // A method-klass-holder may be passed in the inline_cache_reg | |
1701 // and then expanded into the inline_cache_reg and a method_oop register | |
1702 // defined in ad_<arch>.cpp | |
1703 | |
1704 | |
1705 //------------------------------find_shared------------------------------------ | |
1706 // Set bits if Node is shared or otherwise a root | |
1707 void Matcher::find_shared( Node *n ) { | |
1708 // Allocate stack of size C->unique() * 2 to avoid frequent realloc | |
1709 MStack mstack(C->unique() * 2); | |
1710 mstack.push(n, Visit); // Don't need to pre-visit root node | |
1711 while (mstack.is_nonempty()) { | |
1712 n = mstack.node(); // Leave node on stack | |
1713 Node_State nstate = mstack.state(); | |
1714 if (nstate == Pre_Visit) { | |
1715 if (is_visited(n)) { // Visited already? | |
1716 // Node is shared and has no reason to clone. Flag it as shared. | |
1717 // This causes it to match into a register for the sharing. | |
1718 set_shared(n); // Flag as shared and | |
1719 mstack.pop(); // remove node from stack | |
1720 continue; | |
1721 } | |
1722 nstate = Visit; // Not already visited; so visit now | |
1723 } | |
1724 if (nstate == Visit) { | |
1725 mstack.set_state(Post_Visit); | |
1726 set_visited(n); // Flag as visited now | |
1727 bool mem_op = false; | |
1728 | |
1729 switch( n->Opcode() ) { // Handle some opcodes special | |
1730 case Op_Phi: // Treat Phis as shared roots | |
1731 case Op_Parm: | |
1732 case Op_Proj: // All handled specially during matching | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
0
diff
changeset
|
1733 case Op_SafePointScalarObject: |
0 | 1734 set_shared(n); |
1735 set_dontcare(n); | |
1736 break; | |
1737 case Op_If: | |
1738 case Op_CountedLoopEnd: | |
1739 mstack.set_state(Alt_Post_Visit); // Alternative way | |
1740 // Convert (If (Bool (CmpX A B))) into (If (Bool) (CmpX A B)). Helps | |
1741 // with matching cmp/branch in 1 instruction. The Matcher needs the | |
1742 // Bool and CmpX side-by-side, because it can only get at constants | |
1743 // that are at the leaves of Match trees, and the Bool's condition acts | |
1744 // as a constant here. | |
1745 mstack.push(n->in(1), Visit); // Clone the Bool | |
1746 mstack.push(n->in(0), Pre_Visit); // Visit control input | |
1747 continue; // while (mstack.is_nonempty()) | |
1748 case Op_ConvI2D: // These forms efficiently match with a prior | |
1749 case Op_ConvI2F: // Load but not a following Store | |
1750 if( n->in(1)->is_Load() && // Prior load | |
1751 n->outcnt() == 1 && // Not already shared | |
1752 n->unique_out()->is_Store() ) // Following store | |
1753 set_shared(n); // Force it to be a root | |
1754 break; | |
1755 case Op_ReverseBytesI: | |
1756 case Op_ReverseBytesL: | |
1757 if( n->in(1)->is_Load() && // Prior load | |
1758 n->outcnt() == 1 ) // Not already shared | |
1759 set_shared(n); // Force it to be a root | |
1760 break; | |
1761 case Op_BoxLock: // Cant match until we get stack-regs in ADLC | |
1762 case Op_IfFalse: | |
1763 case Op_IfTrue: | |
1764 case Op_MachProj: | |
1765 case Op_MergeMem: | |
1766 case Op_Catch: | |
1767 case Op_CatchProj: | |
1768 case Op_CProj: | |
1769 case Op_JumpProj: | |
1770 case Op_JProj: | |
1771 case Op_NeverBranch: | |
1772 set_dontcare(n); | |
1773 break; | |
1774 case Op_Jump: | |
1775 mstack.push(n->in(1), Visit); // Switch Value | |
1776 mstack.push(n->in(0), Pre_Visit); // Visit Control input | |
1777 continue; // while (mstack.is_nonempty()) | |
1778 case Op_StrComp: | |
169
9148c65abefc
6695049: (coll) Create an x86 intrinsic for Arrays.equals
rasbold
parents:
168
diff
changeset
|
1779 case Op_AryEq: |
0 | 1780 set_shared(n); // Force result into register (it will be anyways) |
1781 break; | |
1782 case Op_ConP: { // Convert pointers above the centerline to NUL | |
1783 TypeNode *tn = n->as_Type(); // Constants derive from type nodes | |
1784 const TypePtr* tp = tn->type()->is_ptr(); | |
1785 if (tp->_ptr == TypePtr::AnyNull) { | |
1786 tn->set_type(TypePtr::NULL_PTR); | |
1787 } | |
1788 break; | |
1789 } | |
163 | 1790 case Op_ConN: { // Convert narrow pointers above the centerline to NUL |
1791 TypeNode *tn = n->as_Type(); // Constants derive from type nodes | |
221
1e026f8da827
6710487: More than half of JDI Regression tests hang with COOPs in -Xcomp mode
kvn
parents:
216
diff
changeset
|
1792 const TypePtr* tp = tn->type()->make_ptr(); |
1e026f8da827
6710487: More than half of JDI Regression tests hang with COOPs in -Xcomp mode
kvn
parents:
216
diff
changeset
|
1793 if (tp && tp->_ptr == TypePtr::AnyNull) { |
163 | 1794 tn->set_type(TypeNarrowOop::NULL_PTR); |
1795 } | |
1796 break; | |
1797 } | |
0 | 1798 case Op_Binary: // These are introduced in the Post_Visit state. |
1799 ShouldNotReachHere(); | |
1800 break; | |
1801 case Op_StoreB: // Do match these, despite no ideal reg | |
1802 case Op_StoreC: | |
1803 case Op_StoreCM: | |
1804 case Op_StoreD: | |
1805 case Op_StoreF: | |
1806 case Op_StoreI: | |
1807 case Op_StoreL: | |
1808 case Op_StoreP: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
1809 case Op_StoreN: |
0 | 1810 case Op_Store16B: |
1811 case Op_Store8B: | |
1812 case Op_Store4B: | |
1813 case Op_Store8C: | |
1814 case Op_Store4C: | |
1815 case Op_Store2C: | |
1816 case Op_Store4I: | |
1817 case Op_Store2I: | |
1818 case Op_Store2L: | |
1819 case Op_Store4F: | |
1820 case Op_Store2F: | |
1821 case Op_Store2D: | |
1822 case Op_ClearArray: | |
1823 case Op_SafePoint: | |
1824 mem_op = true; | |
1825 break; | |
1826 case Op_LoadB: | |
1827 case Op_LoadC: | |
1828 case Op_LoadD: | |
1829 case Op_LoadF: | |
1830 case Op_LoadI: | |
1831 case Op_LoadKlass: | |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
163
diff
changeset
|
1832 case Op_LoadNKlass: |
0 | 1833 case Op_LoadL: |
1834 case Op_LoadS: | |
1835 case Op_LoadP: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
1836 case Op_LoadN: |
0 | 1837 case Op_LoadRange: |
1838 case Op_LoadD_unaligned: | |
1839 case Op_LoadL_unaligned: | |
1840 case Op_Load16B: | |
1841 case Op_Load8B: | |
1842 case Op_Load4B: | |
1843 case Op_Load4C: | |
1844 case Op_Load2C: | |
1845 case Op_Load8C: | |
1846 case Op_Load8S: | |
1847 case Op_Load4S: | |
1848 case Op_Load2S: | |
1849 case Op_Load4I: | |
1850 case Op_Load2I: | |
1851 case Op_Load2L: | |
1852 case Op_Load4F: | |
1853 case Op_Load2F: | |
1854 case Op_Load2D: | |
1855 mem_op = true; | |
1856 // Must be root of match tree due to prior load conflict | |
1857 if( C->subsume_loads() == false ) { | |
1858 set_shared(n); | |
1859 } | |
1860 // Fall into default case | |
1861 default: | |
1862 if( !n->ideal_reg() ) | |
1863 set_dontcare(n); // Unmatchable Nodes | |
1864 } // end_switch | |
1865 | |
1866 for(int i = n->req() - 1; i >= 0; --i) { // For my children | |
1867 Node *m = n->in(i); // Get ith input | |
1868 if (m == NULL) continue; // Ignore NULLs | |
1869 uint mop = m->Opcode(); | |
1870 | |
1871 // Must clone all producers of flags, or we will not match correctly. | |
1872 // Suppose a compare setting int-flags is shared (e.g., a switch-tree) | |
1873 // then it will match into an ideal Op_RegFlags. Alas, the fp-flags | |
1874 // are also there, so we may match a float-branch to int-flags and | |
1875 // expect the allocator to haul the flags from the int-side to the | |
1876 // fp-side. No can do. | |
1877 if( _must_clone[mop] ) { | |
1878 mstack.push(m, Visit); | |
1879 continue; // for(int i = ...) | |
1880 } | |
1881 | |
1882 // Clone addressing expressions as they are "free" in most instructions | |
1883 if( mem_op && i == MemNode::Address && mop == Op_AddP ) { | |
309
eaf496ad4a14
6732698: crash with dead code from compressed oops in gcm
never
parents:
235
diff
changeset
|
1884 if (m->in(AddPNode::Base)->Opcode() == Op_DecodeN) { |
eaf496ad4a14
6732698: crash with dead code from compressed oops in gcm
never
parents:
235
diff
changeset
|
1885 // Bases used in addresses must be shared but since |
eaf496ad4a14
6732698: crash with dead code from compressed oops in gcm
never
parents:
235
diff
changeset
|
1886 // they are shared through a DecodeN they may appear |
eaf496ad4a14
6732698: crash with dead code from compressed oops in gcm
never
parents:
235
diff
changeset
|
1887 // to have a single use so force sharing here. |
eaf496ad4a14
6732698: crash with dead code from compressed oops in gcm
never
parents:
235
diff
changeset
|
1888 set_shared(m->in(AddPNode::Base)->in(1)); |
eaf496ad4a14
6732698: crash with dead code from compressed oops in gcm
never
parents:
235
diff
changeset
|
1889 } |
0 | 1890 Node *off = m->in(AddPNode::Offset); |
1891 if( off->is_Con() ) { | |
1892 set_visited(m); // Flag as visited now | |
1893 Node *adr = m->in(AddPNode::Address); | |
1894 | |
1895 // Intel, ARM and friends can handle 2 adds in addressing mode | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1896 if( clone_shift_expressions && adr->is_AddP() && |
0 | 1897 // AtomicAdd is not an addressing expression. |
1898 // Cheap to find it by looking for screwy base. | |
1899 !adr->in(AddPNode::Base)->is_top() ) { | |
1900 set_visited(adr); // Flag as visited now | |
1901 Node *shift = adr->in(AddPNode::Offset); | |
1902 // Check for shift by small constant as well | |
1903 if( shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && | |
1904 shift->in(2)->get_int() <= 3 ) { | |
1905 set_visited(shift); // Flag as visited now | |
1906 mstack.push(shift->in(2), Visit); | |
1907 #ifdef _LP64 | |
1908 // Allow Matcher to match the rule which bypass | |
1909 // ConvI2L operation for an array index on LP64 | |
1910 // if the index value is positive. | |
1911 if( shift->in(1)->Opcode() == Op_ConvI2L && | |
1912 shift->in(1)->as_Type()->type()->is_long()->_lo >= 0 ) { | |
1913 set_visited(shift->in(1)); // Flag as visited now | |
1914 mstack.push(shift->in(1)->in(1), Pre_Visit); | |
1915 } else | |
1916 #endif | |
1917 mstack.push(shift->in(1), Pre_Visit); | |
1918 } else { | |
1919 mstack.push(shift, Pre_Visit); | |
1920 } | |
1921 mstack.push(adr->in(AddPNode::Address), Pre_Visit); | |
1922 mstack.push(adr->in(AddPNode::Base), Pre_Visit); | |
1923 } else { // Sparc, Alpha, PPC and friends | |
1924 mstack.push(adr, Pre_Visit); | |
1925 } | |
1926 | |
1927 // Clone X+offset as it also folds into most addressing expressions | |
1928 mstack.push(off, Visit); | |
1929 mstack.push(m->in(AddPNode::Base), Pre_Visit); | |
1930 continue; // for(int i = ...) | |
1931 } // if( off->is_Con() ) | |
1932 } // if( mem_op && | |
1933 mstack.push(m, Pre_Visit); | |
1934 } // for(int i = ...) | |
1935 } | |
1936 else if (nstate == Alt_Post_Visit) { | |
1937 mstack.pop(); // Remove node from stack | |
1938 // We cannot remove the Cmp input from the Bool here, as the Bool may be | |
1939 // shared and all users of the Bool need to move the Cmp in parallel. | |
1940 // This leaves both the Bool and the If pointing at the Cmp. To | |
1941 // prevent the Matcher from trying to Match the Cmp along both paths | |
1942 // BoolNode::match_edge always returns a zero. | |
1943 | |
1944 // We reorder the Op_If in a pre-order manner, so we can visit without | |
1945 // accidently sharing the Cmp (the Bool and the If make 2 users). | |
1946 n->add_req( n->in(1)->in(1) ); // Add the Cmp next to the Bool | |
1947 } | |
1948 else if (nstate == Post_Visit) { | |
1949 mstack.pop(); // Remove node from stack | |
1950 | |
1951 // Now hack a few special opcodes | |
1952 switch( n->Opcode() ) { // Handle some opcodes special | |
1953 case Op_StorePConditional: | |
420
a1980da045cc
6462850: generate biased locking code in C2 ideal graph
kvn
parents:
368
diff
changeset
|
1954 case Op_StoreIConditional: |
0 | 1955 case Op_StoreLConditional: |
1956 case Op_CompareAndSwapI: | |
1957 case Op_CompareAndSwapL: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
1958 case Op_CompareAndSwapP: |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
1959 case Op_CompareAndSwapN: { // Convert trinary to binary-tree |
0 | 1960 Node *newval = n->in(MemNode::ValueIn ); |
1961 Node *oldval = n->in(LoadStoreNode::ExpectedIn); | |
1962 Node *pair = new (C, 3) BinaryNode( oldval, newval ); | |
1963 n->set_req(MemNode::ValueIn,pair); | |
1964 n->del_req(LoadStoreNode::ExpectedIn); | |
1965 break; | |
1966 } | |
1967 case Op_CMoveD: // Convert trinary to binary-tree | |
1968 case Op_CMoveF: | |
1969 case Op_CMoveI: | |
1970 case Op_CMoveL: | |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
163
diff
changeset
|
1971 case Op_CMoveN: |
0 | 1972 case Op_CMoveP: { |
1973 // Restructure into a binary tree for Matching. It's possible that | |
1974 // we could move this code up next to the graph reshaping for IfNodes | |
1975 // or vice-versa, but I do not want to debug this for Ladybird. | |
1976 // 10/2/2000 CNC. | |
1977 Node *pair1 = new (C, 3) BinaryNode(n->in(1),n->in(1)->in(1)); | |
1978 n->set_req(1,pair1); | |
1979 Node *pair2 = new (C, 3) BinaryNode(n->in(2),n->in(3)); | |
1980 n->set_req(2,pair2); | |
1981 n->del_req(3); | |
1982 break; | |
1983 } | |
1984 default: | |
1985 break; | |
1986 } | |
1987 } | |
1988 else { | |
1989 ShouldNotReachHere(); | |
1990 } | |
1991 } // end of while (mstack.is_nonempty()) | |
1992 } | |
1993 | |
1994 #ifdef ASSERT | |
1995 // machine-independent root to machine-dependent root | |
1996 void Matcher::dump_old2new_map() { | |
1997 _old2new_map.dump(); | |
1998 } | |
1999 #endif | |
2000 | |
2001 //---------------------------collect_null_checks------------------------------- | |
2002 // Find null checks in the ideal graph; write a machine-specific node for | |
2003 // it. Used by later implicit-null-check handling. Actually collects | |
2004 // either an IfTrue or IfFalse for the common NOT-null path, AND the ideal | |
2005 // value being tested. | |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2006 void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) { |
0 | 2007 Node *iff = proj->in(0); |
2008 if( iff->Opcode() == Op_If ) { | |
2009 // During matching If's have Bool & Cmp side-by-side | |
2010 BoolNode *b = iff->in(1)->as_Bool(); | |
2011 Node *cmp = iff->in(2); | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2012 int opc = cmp->Opcode(); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2013 if (opc != Op_CmpP && opc != Op_CmpN) return; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2014 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2015 const Type* ct = cmp->in(2)->bottom_type(); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2016 if (ct == TypePtr::NULL_PTR || |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2017 (opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) { |
0 | 2018 |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2019 bool push_it = false; |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2020 if( proj->Opcode() == Op_IfTrue ) { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2021 extern int all_null_checks_found; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2022 all_null_checks_found++; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2023 if( b->_test._test == BoolTest::ne ) { |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2024 push_it = true; |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2025 } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2026 } else { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2027 assert( proj->Opcode() == Op_IfFalse, "" ); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2028 if( b->_test._test == BoolTest::eq ) { |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2029 push_it = true; |
0 | 2030 } |
2031 } | |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2032 if( push_it ) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2033 _null_check_tests.push(proj); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2034 Node* val = cmp->in(1); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2035 #ifdef _LP64 |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2036 if (UseCompressedOops && !Matcher::clone_shift_expressions && |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2037 val->bottom_type()->isa_narrowoop()) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2038 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2039 // Look for DecodeN node which should be pinned to orig_proj. |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2040 // On platforms (Sparc) which can not handle 2 adds |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2041 // in addressing mode we have to keep a DecodeN node and |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2042 // use it to do implicit NULL check in address. |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2043 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2044 // DecodeN node was pinned to non-null path (orig_proj) during |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2045 // CastPP transformation in final_graph_reshaping_impl(). |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2046 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2047 uint cnt = orig_proj->outcnt(); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2048 for (uint i = 0; i < orig_proj->outcnt(); i++) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2049 Node* d = orig_proj->raw_out(i); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2050 if (d->is_DecodeN() && d->in(1) == val) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2051 val = d; |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2052 val->set_req(0, NULL); // Unpin now. |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2053 break; |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2054 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2055 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2056 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2057 #endif |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2058 _null_check_tests.push(val); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2059 } |
0 | 2060 } |
2061 } | |
2062 } | |
2063 | |
2064 //---------------------------validate_null_checks------------------------------ | |
2065 // Its possible that the value being NULL checked is not the root of a match | |
2066 // tree. If so, I cannot use the value in an implicit null check. | |
2067 void Matcher::validate_null_checks( ) { | |
2068 uint cnt = _null_check_tests.size(); | |
2069 for( uint i=0; i < cnt; i+=2 ) { | |
2070 Node *test = _null_check_tests[i]; | |
2071 Node *val = _null_check_tests[i+1]; | |
2072 if (has_new_node(val)) { | |
2073 // Is a match-tree root, so replace with the matched value | |
2074 _null_check_tests.map(i+1, new_node(val)); | |
2075 } else { | |
2076 // Yank from candidate list | |
2077 _null_check_tests.map(i+1,_null_check_tests[--cnt]); | |
2078 _null_check_tests.map(i,_null_check_tests[--cnt]); | |
2079 _null_check_tests.pop(); | |
2080 _null_check_tests.pop(); | |
2081 i-=2; | |
2082 } | |
2083 } | |
2084 } | |
2085 | |
2086 | |
2087 // Used by the DFA in dfa_sparc.cpp. Check for a prior FastLock | |
2088 // acting as an Acquire and thus we don't need an Acquire here. We | |
2089 // retain the Node to act as a compiler ordering barrier. | |
2090 bool Matcher::prior_fast_lock( const Node *acq ) { | |
2091 Node *r = acq->in(0); | |
2092 if( !r->is_Region() || r->req() <= 1 ) return false; | |
2093 Node *proj = r->in(1); | |
2094 if( !proj->is_Proj() ) return false; | |
2095 Node *call = proj->in(0); | |
2096 if( !call->is_Call() || call->as_Call()->entry_point() != OptoRuntime::complete_monitor_locking_Java() ) | |
2097 return false; | |
2098 | |
2099 return true; | |
2100 } | |
2101 | |
2102 // Used by the DFA in dfa_sparc.cpp. Check for a following FastUnLock | |
2103 // acting as a Release and thus we don't need a Release here. We | |
2104 // retain the Node to act as a compiler ordering barrier. | |
2105 bool Matcher::post_fast_unlock( const Node *rel ) { | |
2106 Compile *C = Compile::current(); | |
2107 assert( rel->Opcode() == Op_MemBarRelease, "" ); | |
2108 const MemBarReleaseNode *mem = (const MemBarReleaseNode*)rel; | |
2109 DUIterator_Fast imax, i = mem->fast_outs(imax); | |
2110 Node *ctrl = NULL; | |
2111 while( true ) { | |
2112 ctrl = mem->fast_out(i); // Throw out-of-bounds if proj not found | |
2113 assert( ctrl->is_Proj(), "only projections here" ); | |
2114 ProjNode *proj = (ProjNode*)ctrl; | |
2115 if( proj->_con == TypeFunc::Control && | |
2116 !C->node_arena()->contains(ctrl) ) // Unmatched old-space only | |
2117 break; | |
2118 i++; | |
2119 } | |
2120 Node *iff = NULL; | |
2121 for( DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++ ) { | |
2122 Node *x = ctrl->fast_out(j); | |
2123 if( x->is_If() && x->req() > 1 && | |
2124 !C->node_arena()->contains(x) ) { // Unmatched old-space only | |
2125 iff = x; | |
2126 break; | |
2127 } | |
2128 } | |
2129 if( !iff ) return false; | |
2130 Node *bol = iff->in(1); | |
2131 // The iff might be some random subclass of If or bol might be Con-Top | |
2132 if (!bol->is_Bool()) return false; | |
2133 assert( bol->req() > 1, "" ); | |
2134 return (bol->in(1)->Opcode() == Op_FastUnlock); | |
2135 } | |
2136 | |
2137 // Used by the DFA in dfa_xxx.cpp. Check for a following barrier or | |
2138 // atomic instruction acting as a store_load barrier without any | |
2139 // intervening volatile load, and thus we don't need a barrier here. | |
2140 // We retain the Node to act as a compiler ordering barrier. | |
2141 bool Matcher::post_store_load_barrier(const Node *vmb) { | |
2142 Compile *C = Compile::current(); | |
2143 assert( vmb->is_MemBar(), "" ); | |
2144 assert( vmb->Opcode() != Op_MemBarAcquire, "" ); | |
2145 const MemBarNode *mem = (const MemBarNode*)vmb; | |
2146 | |
2147 // Get the Proj node, ctrl, that can be used to iterate forward | |
2148 Node *ctrl = NULL; | |
2149 DUIterator_Fast imax, i = mem->fast_outs(imax); | |
2150 while( true ) { | |
2151 ctrl = mem->fast_out(i); // Throw out-of-bounds if proj not found | |
2152 assert( ctrl->is_Proj(), "only projections here" ); | |
2153 ProjNode *proj = (ProjNode*)ctrl; | |
2154 if( proj->_con == TypeFunc::Control && | |
2155 !C->node_arena()->contains(ctrl) ) // Unmatched old-space only | |
2156 break; | |
2157 i++; | |
2158 } | |
2159 | |
2160 for( DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++ ) { | |
2161 Node *x = ctrl->fast_out(j); | |
2162 int xop = x->Opcode(); | |
2163 | |
2164 // We don't need current barrier if we see another or a lock | |
2165 // before seeing volatile load. | |
2166 // | |
2167 // Op_Fastunlock previously appeared in the Op_* list below. | |
2168 // With the advent of 1-0 lock operations we're no longer guaranteed | |
2169 // that a monitor exit operation contains a serializing instruction. | |
2170 | |
2171 if (xop == Op_MemBarVolatile || | |
2172 xop == Op_FastLock || | |
2173 xop == Op_CompareAndSwapL || | |
2174 xop == Op_CompareAndSwapP || | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
63
diff
changeset
|
2175 xop == Op_CompareAndSwapN || |
0 | 2176 xop == Op_CompareAndSwapI) |
2177 return true; | |
2178 | |
2179 if (x->is_MemBar()) { | |
2180 // We must retain this membar if there is an upcoming volatile | |
2181 // load, which will be preceded by acquire membar. | |
2182 if (xop == Op_MemBarAcquire) | |
2183 return false; | |
2184 // For other kinds of barriers, check by pretending we | |
2185 // are them, and seeing if we can be removed. | |
2186 else | |
2187 return post_store_load_barrier((const MemBarNode*)x); | |
2188 } | |
2189 | |
2190 // Delicate code to detect case of an upcoming fastlock block | |
2191 if( x->is_If() && x->req() > 1 && | |
2192 !C->node_arena()->contains(x) ) { // Unmatched old-space only | |
2193 Node *iff = x; | |
2194 Node *bol = iff->in(1); | |
2195 // The iff might be some random subclass of If or bol might be Con-Top | |
2196 if (!bol->is_Bool()) return false; | |
2197 assert( bol->req() > 1, "" ); | |
2198 return (bol->in(1)->Opcode() == Op_FastUnlock); | |
2199 } | |
2200 // probably not necessary to check for these | |
2201 if (x->is_Call() || x->is_SafePoint() || x->is_block_proj()) | |
2202 return false; | |
2203 } | |
2204 return false; | |
2205 } | |
2206 | |
2207 //============================================================================= | |
2208 //---------------------------State--------------------------------------------- | |
2209 State::State(void) { | |
2210 #ifdef ASSERT | |
2211 _id = 0; | |
2212 _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe); | |
2213 _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d); | |
2214 //memset(_cost, -1, sizeof(_cost)); | |
2215 //memset(_rule, -1, sizeof(_rule)); | |
2216 #endif | |
2217 memset(_valid, 0, sizeof(_valid)); | |
2218 } | |
2219 | |
2220 #ifdef ASSERT | |
2221 State::~State() { | |
2222 _id = 99; | |
2223 _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe); | |
2224 _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d); | |
2225 memset(_cost, -3, sizeof(_cost)); | |
2226 memset(_rule, -3, sizeof(_rule)); | |
2227 } | |
2228 #endif | |
2229 | |
2230 #ifndef PRODUCT | |
2231 //---------------------------dump---------------------------------------------- | |
2232 void State::dump() { | |
2233 tty->print("\n"); | |
2234 dump(0); | |
2235 } | |
2236 | |
2237 void State::dump(int depth) { | |
2238 for( int j = 0; j < depth; j++ ) | |
2239 tty->print(" "); | |
2240 tty->print("--N: "); | |
2241 _leaf->dump(); | |
2242 uint i; | |
2243 for( i = 0; i < _LAST_MACH_OPER; i++ ) | |
2244 // Check for valid entry | |
2245 if( valid(i) ) { | |
2246 for( int j = 0; j < depth; j++ ) | |
2247 tty->print(" "); | |
2248 assert(_cost[i] != max_juint, "cost must be a valid value"); | |
2249 assert(_rule[i] < _last_Mach_Node, "rule[i] must be valid rule"); | |
2250 tty->print_cr("%s %d %s", | |
2251 ruleName[i], _cost[i], ruleName[_rule[i]] ); | |
2252 } | |
2253 tty->print_cr(""); | |
2254 | |
2255 for( i=0; i<2; i++ ) | |
2256 if( _kids[i] ) | |
2257 _kids[i]->dump(depth+1); | |
2258 } | |
2259 #endif |