Mercurial > hg > truffle
annotate src/share/vm/opto/output.cpp @ 1994:6cd6d394f280
7001033: assert(gch->gc_cause() == GCCause::_scavenge_alot || !gch->incremental_collection_failed())
7002546: regression on SpecJbb2005 on 7b118 comparing to 7b117 on small heaps
Summary: Relaxed assertion checking related to incremental_collection_failed flag to allow for ExplicitGCInvokesConcurrent behaviour where we do not want a failing scavenge to bail to a stop-world collection. Parameterized incremental_collection_will_fail() so we can selectively use, or not use, as appropriate, the statistical prediction at specific use sites. This essentially reverts the scavenge bail-out logic to what it was prior to some recent changes that had inadvertently started using the statistical prediction which can be noisy in the presence of bursty loads. Added some associated verbose non-product debugging messages.
Reviewed-by: johnc, tonyp
author | ysr |
---|---|
date | Tue, 07 Dec 2010 21:55:53 -0800 |
parents | f95d63e2154a |
children | 2f644f85485d |
rev | line source |
---|---|
0 | 1 /* |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1490
diff
changeset
|
2 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1490
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1490
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1490
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "asm/assembler.inline.hpp" | |
27 #include "code/debugInfo.hpp" | |
28 #include "code/debugInfoRec.hpp" | |
29 #include "compiler/compileBroker.hpp" | |
30 #include "compiler/oopMap.hpp" | |
31 #include "memory/allocation.inline.hpp" | |
32 #include "opto/callnode.hpp" | |
33 #include "opto/cfgnode.hpp" | |
34 #include "opto/locknode.hpp" | |
35 #include "opto/machnode.hpp" | |
36 #include "opto/output.hpp" | |
37 #include "opto/regalloc.hpp" | |
38 #include "opto/runtime.hpp" | |
39 #include "opto/subnode.hpp" | |
40 #include "opto/type.hpp" | |
41 #include "runtime/handles.inline.hpp" | |
42 #include "utilities/xmlstream.hpp" | |
0 | 43 |
44 extern uint size_java_to_interp(); | |
45 extern uint reloc_java_to_interp(); | |
46 extern uint size_exception_handler(); | |
47 extern uint size_deopt_handler(); | |
48 | |
49 #ifndef PRODUCT | |
50 #define DEBUG_ARG(x) , x | |
51 #else | |
52 #define DEBUG_ARG(x) | |
53 #endif | |
54 | |
55 extern int emit_exception_handler(CodeBuffer &cbuf); | |
56 extern int emit_deopt_handler(CodeBuffer &cbuf); | |
57 | |
58 //------------------------------Output----------------------------------------- | |
59 // Convert Nodes to instruction bits and pass off to the VM | |
60 void Compile::Output() { | |
61 // RootNode goes | |
62 assert( _cfg->_broot->_nodes.size() == 0, "" ); | |
63 | |
64 // Initialize the space for the BufferBlob used to find and verify | |
65 // instruction size in MachNode::emit_size() | |
66 init_scratch_buffer_blob(); | |
163 | 67 if (failing()) return; // Out of memory |
0 | 68 |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
69 // The number of new nodes (mostly MachNop) is proportional to |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
70 // the number of java calls and inner loops which are aligned. |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
71 if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 + |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
72 C->inner_loops()*(OptoLoopAlignment-1)), |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
73 "out of nodes before code generation" ) ) { |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
74 return; |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
75 } |
0 | 76 // Make sure I can find the Start Node |
77 Block_Array& bbs = _cfg->_bbs; | |
78 Block *entry = _cfg->_blocks[1]; | |
79 Block *broot = _cfg->_broot; | |
80 | |
81 const StartNode *start = entry->_nodes[0]->as_Start(); | |
82 | |
83 // Replace StartNode with prolog | |
84 MachPrologNode *prolog = new (this) MachPrologNode(); | |
85 entry->_nodes.map( 0, prolog ); | |
86 bbs.map( prolog->_idx, entry ); | |
87 bbs.map( start->_idx, NULL ); // start is no longer in any block | |
88 | |
89 // Virtual methods need an unverified entry point | |
90 | |
91 if( is_osr_compilation() ) { | |
92 if( PoisonOSREntry ) { | |
93 // TODO: Should use a ShouldNotReachHereNode... | |
94 _cfg->insert( broot, 0, new (this) MachBreakpointNode() ); | |
95 } | |
96 } else { | |
97 if( _method && !_method->flags().is_static() ) { | |
98 // Insert unvalidated entry point | |
99 _cfg->insert( broot, 0, new (this) MachUEPNode() ); | |
100 } | |
101 | |
102 } | |
103 | |
104 | |
105 // Break before main entry point | |
106 if( (_method && _method->break_at_execute()) | |
107 #ifndef PRODUCT | |
108 ||(OptoBreakpoint && is_method_compilation()) | |
109 ||(OptoBreakpointOSR && is_osr_compilation()) | |
110 ||(OptoBreakpointC2R && !_method) | |
111 #endif | |
112 ) { | |
113 // checking for _method means that OptoBreakpoint does not apply to | |
114 // runtime stubs or frame converters | |
115 _cfg->insert( entry, 1, new (this) MachBreakpointNode() ); | |
116 } | |
117 | |
118 // Insert epilogs before every return | |
119 for( uint i=0; i<_cfg->_num_blocks; i++ ) { | |
120 Block *b = _cfg->_blocks[i]; | |
121 if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point? | |
122 Node *m = b->end(); | |
123 if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) { | |
124 MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return); | |
125 b->add_inst( epilog ); | |
126 bbs.map(epilog->_idx, b); | |
127 //_regalloc->set_bad(epilog->_idx); // Already initialized this way. | |
128 } | |
129 } | |
130 } | |
131 | |
132 # ifdef ENABLE_ZAP_DEAD_LOCALS | |
133 if ( ZapDeadCompiledLocals ) Insert_zap_nodes(); | |
134 # endif | |
135 | |
136 ScheduleAndBundle(); | |
137 | |
138 #ifndef PRODUCT | |
139 if (trace_opto_output()) { | |
140 tty->print("\n---- After ScheduleAndBundle ----\n"); | |
141 for (uint i = 0; i < _cfg->_num_blocks; i++) { | |
142 tty->print("\nBB#%03d:\n", i); | |
143 Block *bb = _cfg->_blocks[i]; | |
144 for (uint j = 0; j < bb->_nodes.size(); j++) { | |
145 Node *n = bb->_nodes[j]; | |
146 OptoReg::Name reg = _regalloc->get_reg_first(n); | |
147 tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : ""); | |
148 n->dump(); | |
149 } | |
150 } | |
151 } | |
152 #endif | |
153 | |
154 if (failing()) return; | |
155 | |
156 BuildOopMaps(); | |
157 | |
158 if (failing()) return; | |
159 | |
160 Fill_buffer(); | |
161 } | |
162 | |
163 bool Compile::need_stack_bang(int frame_size_in_bytes) const { | |
164 // Determine if we need to generate a stack overflow check. | |
165 // Do it if the method is not a stub function and | |
166 // has java calls or has frame size > vm_page_size/8. | |
167 return (stub_function() == NULL && | |
168 (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3)); | |
169 } | |
170 | |
171 bool Compile::need_register_stack_bang() const { | |
172 // Determine if we need to generate a register stack overflow check. | |
173 // This is only used on architectures which have split register | |
174 // and memory stacks (ie. IA64). | |
175 // Bang if the method is not a stub function and has java calls | |
176 return (stub_function() == NULL && has_java_calls()); | |
177 } | |
178 | |
179 # ifdef ENABLE_ZAP_DEAD_LOCALS | |
180 | |
181 | |
182 // In order to catch compiler oop-map bugs, we have implemented | |
183 // a debugging mode called ZapDeadCompilerLocals. | |
184 // This mode causes the compiler to insert a call to a runtime routine, | |
185 // "zap_dead_locals", right before each place in compiled code | |
186 // that could potentially be a gc-point (i.e., a safepoint or oop map point). | |
187 // The runtime routine checks that locations mapped as oops are really | |
188 // oops, that locations mapped as values do not look like oops, | |
189 // and that locations mapped as dead are not used later | |
190 // (by zapping them to an invalid address). | |
191 | |
192 int Compile::_CompiledZap_count = 0; | |
193 | |
194 void Compile::Insert_zap_nodes() { | |
195 bool skip = false; | |
196 | |
197 | |
198 // Dink with static counts because code code without the extra | |
199 // runtime calls is MUCH faster for debugging purposes | |
200 | |
201 if ( CompileZapFirst == 0 ) ; // nothing special | |
202 else if ( CompileZapFirst > CompiledZap_count() ) skip = true; | |
203 else if ( CompileZapFirst == CompiledZap_count() ) | |
204 warning("starting zap compilation after skipping"); | |
205 | |
206 if ( CompileZapLast == -1 ) ; // nothing special | |
207 else if ( CompileZapLast < CompiledZap_count() ) skip = true; | |
208 else if ( CompileZapLast == CompiledZap_count() ) | |
209 warning("about to compile last zap"); | |
210 | |
211 ++_CompiledZap_count; // counts skipped zaps, too | |
212 | |
213 if ( skip ) return; | |
214 | |
215 | |
216 if ( _method == NULL ) | |
217 return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care | |
218 | |
219 // Insert call to zap runtime stub before every node with an oop map | |
220 for( uint i=0; i<_cfg->_num_blocks; i++ ) { | |
221 Block *b = _cfg->_blocks[i]; | |
222 for ( uint j = 0; j < b->_nodes.size(); ++j ) { | |
223 Node *n = b->_nodes[j]; | |
224 | |
225 // Determining if we should insert a zap-a-lot node in output. | |
226 // We do that for all nodes that has oopmap info, except for calls | |
227 // to allocation. Calls to allocation passes in the old top-of-eden pointer | |
228 // and expect the C code to reset it. Hence, there can be no safepoints between | |
229 // the inlined-allocation and the call to new_Java, etc. | |
230 // We also cannot zap monitor calls, as they must hold the microlock | |
231 // during the call to Zap, which also wants to grab the microlock. | |
232 bool insert = n->is_MachSafePoint() && (n->as_MachSafePoint()->oop_map() != NULL); | |
233 if ( insert ) { // it is MachSafePoint | |
234 if ( !n->is_MachCall() ) { | |
235 insert = false; | |
236 } else if ( n->is_MachCall() ) { | |
237 MachCallNode* call = n->as_MachCall(); | |
238 if (call->entry_point() == OptoRuntime::new_instance_Java() || | |
239 call->entry_point() == OptoRuntime::new_array_Java() || | |
240 call->entry_point() == OptoRuntime::multianewarray2_Java() || | |
241 call->entry_point() == OptoRuntime::multianewarray3_Java() || | |
242 call->entry_point() == OptoRuntime::multianewarray4_Java() || | |
243 call->entry_point() == OptoRuntime::multianewarray5_Java() || | |
244 call->entry_point() == OptoRuntime::slow_arraycopy_Java() || | |
245 call->entry_point() == OptoRuntime::complete_monitor_locking_Java() | |
246 ) { | |
247 insert = false; | |
248 } | |
249 } | |
250 if (insert) { | |
251 Node *zap = call_zap_node(n->as_MachSafePoint(), i); | |
252 b->_nodes.insert( j, zap ); | |
253 _cfg->_bbs.map( zap->_idx, b ); | |
254 ++j; | |
255 } | |
256 } | |
257 } | |
258 } | |
259 } | |
260 | |
261 | |
262 Node* Compile::call_zap_node(MachSafePointNode* node_to_check, int block_no) { | |
263 const TypeFunc *tf = OptoRuntime::zap_dead_locals_Type(); | |
264 CallStaticJavaNode* ideal_node = | |
265 new (this, tf->domain()->cnt()) CallStaticJavaNode( tf, | |
266 OptoRuntime::zap_dead_locals_stub(_method->flags().is_native()), | |
267 "call zap dead locals stub", 0, TypePtr::BOTTOM); | |
268 // We need to copy the OopMap from the site we're zapping at. | |
269 // We have to make a copy, because the zap site might not be | |
270 // a call site, and zap_dead is a call site. | |
271 OopMap* clone = node_to_check->oop_map()->deep_copy(); | |
272 | |
273 // Add the cloned OopMap to the zap node | |
274 ideal_node->set_oop_map(clone); | |
275 return _matcher->match_sfpt(ideal_node); | |
276 } | |
277 | |
278 //------------------------------is_node_getting_a_safepoint-------------------- | |
279 bool Compile::is_node_getting_a_safepoint( Node* n) { | |
280 // This code duplicates the logic prior to the call of add_safepoint | |
281 // below in this file. | |
282 if( n->is_MachSafePoint() ) return true; | |
283 return false; | |
284 } | |
285 | |
286 # endif // ENABLE_ZAP_DEAD_LOCALS | |
287 | |
288 //------------------------------compute_loop_first_inst_sizes------------------ | |
418 | 289 // Compute the size of first NumberOfLoopInstrToAlign instructions at the top |
0 | 290 // of a loop. When aligning a loop we need to provide enough instructions |
291 // in cpu's fetch buffer to feed decoders. The loop alignment could be | |
292 // avoided if we have enough instructions in fetch buffer at the head of a loop. | |
293 // By default, the size is set to 999999 by Block's constructor so that | |
294 // a loop will be aligned if the size is not reset here. | |
295 // | |
296 // Note: Mach instructions could contain several HW instructions | |
297 // so the size is estimated only. | |
298 // | |
299 void Compile::compute_loop_first_inst_sizes() { | |
300 // The next condition is used to gate the loop alignment optimization. | |
301 // Don't aligned a loop if there are enough instructions at the head of a loop | |
302 // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad | |
303 // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is | |
304 // equal to 11 bytes which is the largest address NOP instruction. | |
305 if( MaxLoopPad < OptoLoopAlignment-1 ) { | |
306 uint last_block = _cfg->_num_blocks-1; | |
307 for( uint i=1; i <= last_block; i++ ) { | |
308 Block *b = _cfg->_blocks[i]; | |
309 // Check the first loop's block which requires an alignment. | |
418 | 310 if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) { |
0 | 311 uint sum_size = 0; |
312 uint inst_cnt = NumberOfLoopInstrToAlign; | |
418 | 313 inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc); |
314 | |
315 // Check subsequent fallthrough blocks if the loop's first | |
316 // block(s) does not have enough instructions. | |
317 Block *nb = b; | |
318 while( inst_cnt > 0 && | |
319 i < last_block && | |
320 !_cfg->_blocks[i+1]->has_loop_alignment() && | |
321 !nb->has_successor(b) ) { | |
322 i++; | |
323 nb = _cfg->_blocks[i]; | |
324 inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc); | |
325 } // while( inst_cnt > 0 && i < last_block ) | |
326 | |
0 | 327 b->set_first_inst_size(sum_size); |
328 } // f( b->head()->is_Loop() ) | |
329 } // for( i <= last_block ) | |
330 } // if( MaxLoopPad < OptoLoopAlignment-1 ) | |
331 } | |
332 | |
333 //----------------------Shorten_branches--------------------------------------- | |
334 // The architecture description provides short branch variants for some long | |
335 // branch instructions. Replace eligible long branches with short branches. | |
336 void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size, int& const_size) { | |
337 | |
338 // fill in the nop array for bundling computations | |
339 MachNode *_nop_list[Bundle::_nop_count]; | |
340 Bundle::initialize_nops(_nop_list, this); | |
341 | |
342 // ------------------ | |
343 // Compute size of each block, method size, and relocation information size | |
344 uint *jmp_end = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); | |
345 uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1); | |
346 DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); ) | |
415
4d9884b01ba6
6754519: don't emit flag fixup for NaN when condition being tested doesn't need it
never
parents:
331
diff
changeset
|
347 DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); ) |
0 | 348 blk_starts[0] = 0; |
349 | |
350 // Initialize the sizes to 0 | |
351 code_size = 0; // Size in bytes of generated code | |
352 stub_size = 0; // Size in bytes of all stub entries | |
353 // Size in bytes of all relocation entries, including those in local stubs. | |
354 // Start with 2-bytes of reloc info for the unvalidated entry point | |
355 reloc_size = 1; // Number of relocation entries | |
356 const_size = 0; // size of fp constants in words | |
357 | |
358 // Make three passes. The first computes pessimistic blk_starts, | |
359 // relative jmp_end, reloc_size and const_size information. | |
360 // The second performs short branch substitution using the pessimistic | |
361 // sizing. The third inserts nops where needed. | |
362 | |
363 Node *nj; // tmp | |
364 | |
365 // Step one, perform a pessimistic sizing pass. | |
366 uint i; | |
367 uint min_offset_from_last_call = 1; // init to a positive value | |
368 uint nop_size = (new (this) MachNopNode())->size(_regalloc); | |
369 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks | |
370 Block *b = _cfg->_blocks[i]; | |
371 | |
372 // Sum all instruction sizes to compute block size | |
373 uint last_inst = b->_nodes.size(); | |
374 uint blk_size = 0; | |
375 for( uint j = 0; j<last_inst; j++ ) { | |
376 nj = b->_nodes[j]; | |
377 uint inst_size = nj->size(_regalloc); | |
378 blk_size += inst_size; | |
379 // Handle machine instruction nodes | |
380 if( nj->is_Mach() ) { | |
381 MachNode *mach = nj->as_Mach(); | |
382 blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding | |
383 reloc_size += mach->reloc(); | |
384 const_size += mach->const_size(); | |
385 if( mach->is_MachCall() ) { | |
386 MachCallNode *mcall = mach->as_MachCall(); | |
387 // This destination address is NOT PC-relative | |
388 | |
389 mcall->method_set((intptr_t)mcall->entry_point()); | |
390 | |
391 if( mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method ) { | |
392 stub_size += size_java_to_interp(); | |
393 reloc_size += reloc_java_to_interp(); | |
394 } | |
395 } else if (mach->is_MachSafePoint()) { | |
396 // If call/safepoint are adjacent, account for possible | |
397 // nop to disambiguate the two safepoints. | |
398 if (min_offset_from_last_call == 0) { | |
399 blk_size += nop_size; | |
400 } | |
1686
0e09207fc81b
6974682: CTW: assert(target != NULL) failed: must not be null
kvn
parents:
1552
diff
changeset
|
401 } else if (mach->ideal_Opcode() == Op_Jump) { |
0e09207fc81b
6974682: CTW: assert(target != NULL) failed: must not be null
kvn
parents:
1552
diff
changeset
|
402 const_size += b->_num_succs; // Address table size |
0e09207fc81b
6974682: CTW: assert(target != NULL) failed: must not be null
kvn
parents:
1552
diff
changeset
|
403 // The size is valid even for 64 bit since it is |
0e09207fc81b
6974682: CTW: assert(target != NULL) failed: must not be null
kvn
parents:
1552
diff
changeset
|
404 // multiplied by 2*jintSize on this method exit. |
0 | 405 } |
406 } | |
407 min_offset_from_last_call += inst_size; | |
408 // Remember end of call offset | |
409 if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) { | |
410 min_offset_from_last_call = 0; | |
411 } | |
412 } | |
413 | |
414 // During short branch replacement, we store the relative (to blk_starts) | |
415 // end of jump in jmp_end, rather than the absolute end of jump. This | |
416 // is so that we do not need to recompute sizes of all nodes when we compute | |
417 // correct blk_starts in our next sizing pass. | |
418 jmp_end[i] = blk_size; | |
419 DEBUG_ONLY( jmp_target[i] = 0; ) | |
420 | |
421 // When the next block starts a loop, we may insert pad NOP | |
422 // instructions. Since we cannot know our future alignment, | |
423 // assume the worst. | |
424 if( i<_cfg->_num_blocks-1 ) { | |
425 Block *nb = _cfg->_blocks[i+1]; | |
426 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); | |
427 if( max_loop_pad > 0 ) { | |
428 assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), ""); | |
429 blk_size += max_loop_pad; | |
430 } | |
431 } | |
432 | |
433 // Save block size; update total method size | |
434 blk_starts[i+1] = blk_starts[i]+blk_size; | |
435 } | |
436 | |
437 // Step two, replace eligible long jumps. | |
438 | |
439 // Note: this will only get the long branches within short branch | |
440 // range. Another pass might detect more branches that became | |
441 // candidates because the shortening in the first pass exposed | |
442 // more opportunities. Unfortunately, this would require | |
443 // recomputing the starting and ending positions for the blocks | |
444 for( i=0; i<_cfg->_num_blocks; i++ ) { | |
445 Block *b = _cfg->_blocks[i]; | |
446 | |
447 int j; | |
448 // Find the branch; ignore trailing NOPs. | |
449 for( j = b->_nodes.size()-1; j>=0; j-- ) { | |
450 nj = b->_nodes[j]; | |
451 if( !nj->is_Mach() || nj->as_Mach()->ideal_Opcode() != Op_Con ) | |
452 break; | |
453 } | |
454 | |
455 if (j >= 0) { | |
456 if( nj->is_Mach() && nj->as_Mach()->may_be_short_branch() ) { | |
457 MachNode *mach = nj->as_Mach(); | |
458 // This requires the TRUE branch target be in succs[0] | |
459 uint bnum = b->non_connector_successor(0)->_pre_order; | |
460 uintptr_t target = blk_starts[bnum]; | |
461 if( mach->is_pc_relative() ) { | |
462 int offset = target-(blk_starts[i] + jmp_end[i]); | |
415
4d9884b01ba6
6754519: don't emit flag fixup for NaN when condition being tested doesn't need it
never
parents:
331
diff
changeset
|
463 if (_matcher->is_short_branch_offset(mach->rule(), offset)) { |
0 | 464 // We've got a winner. Replace this branch. |
415
4d9884b01ba6
6754519: don't emit flag fixup for NaN when condition being tested doesn't need it
never
parents:
331
diff
changeset
|
465 MachNode* replacement = mach->short_branch_version(this); |
0 | 466 b->_nodes.map(j, replacement); |
222 | 467 mach->subsume_by(replacement); |
0 | 468 |
469 // Update the jmp_end size to save time in our | |
470 // next pass. | |
471 jmp_end[i] -= (mach->size(_regalloc) - replacement->size(_regalloc)); | |
472 DEBUG_ONLY( jmp_target[i] = bnum; ); | |
415
4d9884b01ba6
6754519: don't emit flag fixup for NaN when condition being tested doesn't need it
never
parents:
331
diff
changeset
|
473 DEBUG_ONLY( jmp_rule[i] = mach->rule(); ); |
0 | 474 } |
475 } else { | |
476 #ifndef PRODUCT | |
477 mach->dump(3); | |
478 #endif | |
479 Unimplemented(); | |
480 } | |
481 } | |
482 } | |
483 } | |
484 | |
485 // Compute the size of first NumberOfLoopInstrToAlign instructions at head | |
486 // of a loop. It is used to determine the padding for loop alignment. | |
487 compute_loop_first_inst_sizes(); | |
488 | |
489 // Step 3, compute the offsets of all the labels | |
490 uint last_call_adr = max_uint; | |
491 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks | |
492 // copy the offset of the beginning to the corresponding label | |
493 assert(labels[i].is_unused(), "cannot patch at this point"); | |
494 labels[i].bind_loc(blk_starts[i], CodeBuffer::SECT_INSTS); | |
495 | |
496 // insert padding for any instructions that need it | |
497 Block *b = _cfg->_blocks[i]; | |
498 uint last_inst = b->_nodes.size(); | |
499 uint adr = blk_starts[i]; | |
500 for( uint j = 0; j<last_inst; j++ ) { | |
501 nj = b->_nodes[j]; | |
502 if( nj->is_Mach() ) { | |
503 int padding = nj->as_Mach()->compute_padding(adr); | |
504 // If call/safepoint are adjacent insert a nop (5010568) | |
505 if (padding == 0 && nj->is_MachSafePoint() && !nj->is_MachCall() && | |
506 adr == last_call_adr ) { | |
507 padding = nop_size; | |
508 } | |
509 if(padding > 0) { | |
510 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); | |
511 int nops_cnt = padding / nop_size; | |
512 MachNode *nop = new (this) MachNopNode(nops_cnt); | |
513 b->_nodes.insert(j++, nop); | |
514 _cfg->_bbs.map( nop->_idx, b ); | |
515 adr += padding; | |
516 last_inst++; | |
517 } | |
518 } | |
519 adr += nj->size(_regalloc); | |
520 | |
521 // Remember end of call offset | |
522 if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) { | |
523 last_call_adr = adr; | |
524 } | |
525 } | |
526 | |
527 if ( i != _cfg->_num_blocks-1) { | |
528 // Get the size of the block | |
529 uint blk_size = adr - blk_starts[i]; | |
530 | |
418 | 531 // When the next block is the top of a loop, we may insert pad NOP |
0 | 532 // instructions. |
533 Block *nb = _cfg->_blocks[i+1]; | |
534 int current_offset = blk_starts[i] + blk_size; | |
535 current_offset += nb->alignment_padding(current_offset); | |
536 // Save block size; update total method size | |
537 blk_starts[i+1] = current_offset; | |
538 } | |
539 } | |
540 | |
541 #ifdef ASSERT | |
542 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks | |
543 if( jmp_target[i] != 0 ) { | |
544 int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_end[i]); | |
415
4d9884b01ba6
6754519: don't emit flag fixup for NaN when condition being tested doesn't need it
never
parents:
331
diff
changeset
|
545 if (!_matcher->is_short_branch_offset(jmp_rule[i], offset)) { |
0 | 546 tty->print_cr("target (%d) - jmp_end(%d) = offset (%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_end[i], offset, i, jmp_target[i]); |
547 } | |
415
4d9884b01ba6
6754519: don't emit flag fixup for NaN when condition being tested doesn't need it
never
parents:
331
diff
changeset
|
548 assert(_matcher->is_short_branch_offset(jmp_rule[i], offset), "Displacement too large for short jmp"); |
0 | 549 } |
550 } | |
551 #endif | |
552 | |
553 // ------------------ | |
554 // Compute size for code buffer | |
555 code_size = blk_starts[i-1] + jmp_end[i-1]; | |
556 | |
557 // Relocation records | |
558 reloc_size += 1; // Relo entry for exception handler | |
559 | |
560 // Adjust reloc_size to number of record of relocation info | |
561 // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for | |
562 // a relocation index. | |
563 // The CodeBuffer will expand the locs array if this estimate is too low. | |
564 reloc_size *= 10 / sizeof(relocInfo); | |
565 | |
566 // Adjust const_size to number of bytes | |
567 const_size *= 2*jintSize; // both float and double take two words per entry | |
568 | |
569 } | |
570 | |
571 //------------------------------FillLocArray----------------------------------- | |
572 // Create a bit of debug info and append it to the array. The mapping is from | |
573 // Java local or expression stack to constant, register or stack-slot. For | |
574 // doubles, insert 2 mappings and return 1 (to tell the caller that the next | |
575 // entry has been taken care of and caller should skip it). | |
576 static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) { | |
577 // This should never have accepted Bad before | |
578 assert(OptoReg::is_valid(regnum), "location must be valid"); | |
579 return (OptoReg::is_reg(regnum)) | |
580 ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) ) | |
581 : new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum))); | |
582 } | |
583 | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
584 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
585 ObjectValue* |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
586 Compile::sv_for_node_id(GrowableArray<ScopeValue*> *objs, int id) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
587 for (int i = 0; i < objs->length(); i++) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
588 assert(objs->at(i)->is_object(), "corrupt object cache"); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
589 ObjectValue* sv = (ObjectValue*) objs->at(i); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
590 if (sv->id() == id) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
591 return sv; |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
592 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
593 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
594 // Otherwise.. |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
595 return NULL; |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
596 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
597 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
598 void Compile::set_sv_for_object_node(GrowableArray<ScopeValue*> *objs, |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
599 ObjectValue* sv ) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
600 assert(sv_for_node_id(objs, sv->id()) == NULL, "Precondition"); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
601 objs->append(sv); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
602 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
603 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
604 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
605 void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local, |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
606 GrowableArray<ScopeValue*> *array, |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
607 GrowableArray<ScopeValue*> *objs ) { |
0 | 608 assert( local, "use _top instead of null" ); |
609 if (array->length() != idx) { | |
610 assert(array->length() == idx + 1, "Unexpected array count"); | |
611 // Old functionality: | |
612 // return | |
613 // New functionality: | |
614 // Assert if the local is not top. In product mode let the new node | |
615 // override the old entry. | |
616 assert(local == top(), "LocArray collision"); | |
617 if (local == top()) { | |
618 return; | |
619 } | |
620 array->pop(); | |
621 } | |
622 const Type *t = local->bottom_type(); | |
623 | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
624 // Is it a safepoint scalar object node? |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
625 if (local->is_SafePointScalarObject()) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
626 SafePointScalarObjectNode* spobj = local->as_SafePointScalarObject(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
627 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
628 ObjectValue* sv = Compile::sv_for_node_id(objs, spobj->_idx); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
629 if (sv == NULL) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
630 ciKlass* cik = t->is_oopptr()->klass(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
631 assert(cik->is_instance_klass() || |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
632 cik->is_array_klass(), "Not supported allocation."); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
633 sv = new ObjectValue(spobj->_idx, |
989
148e5441d916
6863023: need non-perm oops in code cache for JSR 292
jrose
parents:
903
diff
changeset
|
634 new ConstantOopWriteValue(cik->constant_encoding())); |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
635 Compile::set_sv_for_object_node(objs, sv); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
636 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
637 uint first_ind = spobj->first_index(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
638 for (uint i = 0; i < spobj->n_fields(); i++) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
639 Node* fld_node = sfpt->in(first_ind+i); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
640 (void)FillLocArray(sv->field_values()->length(), sfpt, fld_node, sv->field_values(), objs); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
641 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
642 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
643 array->append(sv); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
644 return; |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
645 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
646 |
0 | 647 // Grab the register number for the local |
648 OptoReg::Name regnum = _regalloc->get_reg_first(local); | |
649 if( OptoReg::is_valid(regnum) ) {// Got a register/stack? | |
650 // Record the double as two float registers. | |
651 // The register mask for such a value always specifies two adjacent | |
652 // float registers, with the lower register number even. | |
653 // Normally, the allocation of high and low words to these registers | |
654 // is irrelevant, because nearly all operations on register pairs | |
655 // (e.g., StoreD) treat them as a single unit. | |
656 // Here, we assume in addition that the words in these two registers | |
657 // stored "naturally" (by operations like StoreD and double stores | |
658 // within the interpreter) such that the lower-numbered register | |
659 // is written to the lower memory address. This may seem like | |
660 // a machine dependency, but it is not--it is a requirement on | |
661 // the author of the <arch>.ad file to ensure that, for every | |
662 // even/odd double-register pair to which a double may be allocated, | |
663 // the word in the even single-register is stored to the first | |
664 // memory word. (Note that register numbers are completely | |
665 // arbitrary, and are not tied to any machine-level encodings.) | |
666 #ifdef _LP64 | |
667 if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon ) { | |
668 array->append(new ConstantIntValue(0)); | |
669 array->append(new_loc_value( _regalloc, regnum, Location::dbl )); | |
670 } else if ( t->base() == Type::Long ) { | |
671 array->append(new ConstantIntValue(0)); | |
672 array->append(new_loc_value( _regalloc, regnum, Location::lng )); | |
673 } else if ( t->base() == Type::RawPtr ) { | |
674 // jsr/ret return address which must be restored into a the full | |
675 // width 64-bit stack slot. | |
676 array->append(new_loc_value( _regalloc, regnum, Location::lng )); | |
677 } | |
678 #else //_LP64 | |
679 #ifdef SPARC | |
680 if (t->base() == Type::Long && OptoReg::is_reg(regnum)) { | |
681 // For SPARC we have to swap high and low words for | |
682 // long values stored in a single-register (g0-g7). | |
683 array->append(new_loc_value( _regalloc, regnum , Location::normal )); | |
684 array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal )); | |
685 } else | |
686 #endif //SPARC | |
687 if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon || t->base() == Type::Long ) { | |
688 // Repack the double/long as two jints. | |
689 // The convention the interpreter uses is that the second local | |
690 // holds the first raw word of the native double representation. | |
691 // This is actually reasonable, since locals and stack arrays | |
692 // grow downwards in all implementations. | |
693 // (If, on some machine, the interpreter's Java locals or stack | |
694 // were to grow upwards, the embedded doubles would be word-swapped.) | |
695 array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal )); | |
696 array->append(new_loc_value( _regalloc, regnum , Location::normal )); | |
697 } | |
698 #endif //_LP64 | |
699 else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) && | |
700 OptoReg::is_reg(regnum) ) { | |
1274
2883969d09e7
6910664: C2: java/util/Arrays/Sorting.java fails with DeoptimizeALot flag
kvn
parents:
1265
diff
changeset
|
701 array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double() |
0 | 702 ? Location::float_in_dbl : Location::normal )); |
703 } else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) { | |
704 array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long | |
705 ? Location::int_in_long : Location::normal )); | |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
706 } else if( t->base() == Type::NarrowOop ) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
707 array->append(new_loc_value( _regalloc, regnum, Location::narrowoop )); |
0 | 708 } else { |
709 array->append(new_loc_value( _regalloc, regnum, _regalloc->is_oop(local) ? Location::oop : Location::normal )); | |
710 } | |
711 return; | |
712 } | |
713 | |
714 // No register. It must be constant data. | |
715 switch (t->base()) { | |
716 case Type::Half: // Second half of a double | |
717 ShouldNotReachHere(); // Caller should skip 2nd halves | |
718 break; | |
719 case Type::AnyPtr: | |
720 array->append(new ConstantOopWriteValue(NULL)); | |
721 break; | |
722 case Type::AryPtr: | |
723 case Type::InstPtr: | |
724 case Type::KlassPtr: // fall through | |
989
148e5441d916
6863023: need non-perm oops in code cache for JSR 292
jrose
parents:
903
diff
changeset
|
725 array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding())); |
0 | 726 break; |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
727 case Type::NarrowOop: |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
728 if (t == TypeNarrowOop::NULL_PTR) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
729 array->append(new ConstantOopWriteValue(NULL)); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
730 } else { |
989
148e5441d916
6863023: need non-perm oops in code cache for JSR 292
jrose
parents:
903
diff
changeset
|
731 array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding())); |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
732 } |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
733 break; |
0 | 734 case Type::Int: |
735 array->append(new ConstantIntValue(t->is_int()->get_con())); | |
736 break; | |
737 case Type::RawPtr: | |
738 // A return address (T_ADDRESS). | |
739 assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI"); | |
740 #ifdef _LP64 | |
741 // Must be restored to the full-width 64-bit stack slot. | |
742 array->append(new ConstantLongValue(t->is_ptr()->get_con())); | |
743 #else | |
744 array->append(new ConstantIntValue(t->is_ptr()->get_con())); | |
745 #endif | |
746 break; | |
747 case Type::FloatCon: { | |
748 float f = t->is_float_constant()->getf(); | |
749 array->append(new ConstantIntValue(jint_cast(f))); | |
750 break; | |
751 } | |
752 case Type::DoubleCon: { | |
753 jdouble d = t->is_double_constant()->getd(); | |
754 #ifdef _LP64 | |
755 array->append(new ConstantIntValue(0)); | |
756 array->append(new ConstantDoubleValue(d)); | |
757 #else | |
758 // Repack the double as two jints. | |
759 // The convention the interpreter uses is that the second local | |
760 // holds the first raw word of the native double representation. | |
761 // This is actually reasonable, since locals and stack arrays | |
762 // grow downwards in all implementations. | |
763 // (If, on some machine, the interpreter's Java locals or stack | |
764 // were to grow upwards, the embedded doubles would be word-swapped.) | |
765 jint *dp = (jint*)&d; | |
766 array->append(new ConstantIntValue(dp[1])); | |
767 array->append(new ConstantIntValue(dp[0])); | |
768 #endif | |
769 break; | |
770 } | |
771 case Type::Long: { | |
772 jlong d = t->is_long()->get_con(); | |
773 #ifdef _LP64 | |
774 array->append(new ConstantIntValue(0)); | |
775 array->append(new ConstantLongValue(d)); | |
776 #else | |
777 // Repack the long as two jints. | |
778 // The convention the interpreter uses is that the second local | |
779 // holds the first raw word of the native double representation. | |
780 // This is actually reasonable, since locals and stack arrays | |
781 // grow downwards in all implementations. | |
782 // (If, on some machine, the interpreter's Java locals or stack | |
783 // were to grow upwards, the embedded doubles would be word-swapped.) | |
784 jint *dp = (jint*)&d; | |
785 array->append(new ConstantIntValue(dp[1])); | |
786 array->append(new ConstantIntValue(dp[0])); | |
787 #endif | |
788 break; | |
789 } | |
790 case Type::Top: // Add an illegal value here | |
791 array->append(new LocationValue(Location())); | |
792 break; | |
793 default: | |
794 ShouldNotReachHere(); | |
795 break; | |
796 } | |
797 } | |
798 | |
799 // Determine if this node starts a bundle | |
800 bool Compile::starts_bundle(const Node *n) const { | |
801 return (_node_bundling_limit > n->_idx && | |
802 _node_bundling_base[n->_idx].starts_bundle()); | |
803 } | |
804 | |
805 //--------------------------Process_OopMap_Node-------------------------------- | |
806 void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) { | |
807 | |
808 // Handle special safepoint nodes for synchronization | |
809 MachSafePointNode *sfn = mach->as_MachSafePoint(); | |
810 MachCallNode *mcall; | |
811 | |
812 #ifdef ENABLE_ZAP_DEAD_LOCALS | |
813 assert( is_node_getting_a_safepoint(mach), "logic does not match; false negative"); | |
814 #endif | |
815 | |
816 int safepoint_pc_offset = current_offset; | |
1137
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1135
diff
changeset
|
817 bool is_method_handle_invoke = false; |
1253
f70b0d9ab095
6910618: C2: Error: assert(d->is_oop(),"JVM_ArrayCopy: dst not an oop")
kvn
parents:
1204
diff
changeset
|
818 bool return_oop = false; |
0 | 819 |
820 // Add the safepoint in the DebugInfoRecorder | |
821 if( !mach->is_MachCall() ) { | |
822 mcall = NULL; | |
823 debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map); | |
824 } else { | |
825 mcall = mach->as_MachCall(); | |
1137
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1135
diff
changeset
|
826 |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1135
diff
changeset
|
827 // Is the call a MethodHandle call? |
1265 | 828 if (mcall->is_MachCallJava()) { |
829 if (mcall->as_MachCallJava()->_method_handle_invoke) { | |
830 assert(has_method_handle_invokes(), "must have been set during call generation"); | |
831 is_method_handle_invoke = true; | |
832 } | |
833 } | |
1137
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1135
diff
changeset
|
834 |
1253
f70b0d9ab095
6910618: C2: Error: assert(d->is_oop(),"JVM_ArrayCopy: dst not an oop")
kvn
parents:
1204
diff
changeset
|
835 // Check if a call returns an object. |
f70b0d9ab095
6910618: C2: Error: assert(d->is_oop(),"JVM_ArrayCopy: dst not an oop")
kvn
parents:
1204
diff
changeset
|
836 if (mcall->return_value_is_used() && |
f70b0d9ab095
6910618: C2: Error: assert(d->is_oop(),"JVM_ArrayCopy: dst not an oop")
kvn
parents:
1204
diff
changeset
|
837 mcall->tf()->range()->field_at(TypeFunc::Parms)->isa_ptr()) { |
f70b0d9ab095
6910618: C2: Error: assert(d->is_oop(),"JVM_ArrayCopy: dst not an oop")
kvn
parents:
1204
diff
changeset
|
838 return_oop = true; |
f70b0d9ab095
6910618: C2: Error: assert(d->is_oop(),"JVM_ArrayCopy: dst not an oop")
kvn
parents:
1204
diff
changeset
|
839 } |
0 | 840 safepoint_pc_offset += mcall->ret_addr_offset(); |
841 debug_info()->add_safepoint(safepoint_pc_offset, mcall->_oop_map); | |
842 } | |
843 | |
844 // Loop over the JVMState list to add scope information | |
845 // Do not skip safepoints with a NULL method, they need monitor info | |
846 JVMState* youngest_jvms = sfn->jvms(); | |
847 int max_depth = youngest_jvms->depth(); | |
848 | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
849 // Allocate the object pool for scalar-replaced objects -- the map from |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
850 // small-integer keys (which can be recorded in the local and ostack |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
851 // arrays) to descriptions of the object state. |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
852 GrowableArray<ScopeValue*> *objs = new GrowableArray<ScopeValue*>(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
853 |
0 | 854 // Visit scopes from oldest to youngest. |
855 for (int depth = 1; depth <= max_depth; depth++) { | |
856 JVMState* jvms = youngest_jvms->of_depth(depth); | |
857 int idx; | |
858 ciMethod* method = jvms->has_method() ? jvms->method() : NULL; | |
859 // Safepoints that do not have method() set only provide oop-map and monitor info | |
860 // to support GC; these do not support deoptimization. | |
861 int num_locs = (method == NULL) ? 0 : jvms->loc_size(); | |
862 int num_exps = (method == NULL) ? 0 : jvms->stk_size(); | |
863 int num_mon = jvms->nof_monitors(); | |
864 assert(method == NULL || jvms->bci() < 0 || num_locs == method->max_locals(), | |
865 "JVMS local count must match that of the method"); | |
866 | |
867 // Add Local and Expression Stack Information | |
868 | |
869 // Insert locals into the locarray | |
870 GrowableArray<ScopeValue*> *locarray = new GrowableArray<ScopeValue*>(num_locs); | |
871 for( idx = 0; idx < num_locs; idx++ ) { | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
872 FillLocArray( idx, sfn, sfn->local(jvms, idx), locarray, objs ); |
0 | 873 } |
874 | |
875 // Insert expression stack entries into the exparray | |
876 GrowableArray<ScopeValue*> *exparray = new GrowableArray<ScopeValue*>(num_exps); | |
877 for( idx = 0; idx < num_exps; idx++ ) { | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
878 FillLocArray( idx, sfn, sfn->stack(jvms, idx), exparray, objs ); |
0 | 879 } |
880 | |
881 // Add in mappings of the monitors | |
882 assert( !method || | |
883 !method->is_synchronized() || | |
884 method->is_native() || | |
885 num_mon > 0 || | |
886 !GenerateSynchronizationCode, | |
887 "monitors must always exist for synchronized methods"); | |
888 | |
889 // Build the growable array of ScopeValues for exp stack | |
890 GrowableArray<MonitorValue*> *monarray = new GrowableArray<MonitorValue*>(num_mon); | |
891 | |
892 // Loop over monitors and insert into array | |
893 for(idx = 0; idx < num_mon; idx++) { | |
894 // Grab the node that defines this monitor | |
460
424f9bfe6b96
6775880: EA +DeoptimizeALot: assert(mon_info->owner()->is_locked(),"object must be locked now")
kvn
parents:
418
diff
changeset
|
895 Node* box_node = sfn->monitor_box(jvms, idx); |
424f9bfe6b96
6775880: EA +DeoptimizeALot: assert(mon_info->owner()->is_locked(),"object must be locked now")
kvn
parents:
418
diff
changeset
|
896 Node* obj_node = sfn->monitor_obj(jvms, idx); |
0 | 897 |
898 // Create ScopeValue for object | |
899 ScopeValue *scval = NULL; | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
900 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
901 if( obj_node->is_SafePointScalarObject() ) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
902 SafePointScalarObjectNode* spobj = obj_node->as_SafePointScalarObject(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
903 scval = Compile::sv_for_node_id(objs, spobj->_idx); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
904 if (scval == NULL) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
905 const Type *t = obj_node->bottom_type(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
906 ciKlass* cik = t->is_oopptr()->klass(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
907 assert(cik->is_instance_klass() || |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
908 cik->is_array_klass(), "Not supported allocation."); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
909 ObjectValue* sv = new ObjectValue(spobj->_idx, |
989
148e5441d916
6863023: need non-perm oops in code cache for JSR 292
jrose
parents:
903
diff
changeset
|
910 new ConstantOopWriteValue(cik->constant_encoding())); |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
911 Compile::set_sv_for_object_node(objs, sv); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
912 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
913 uint first_ind = spobj->first_index(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
914 for (uint i = 0; i < spobj->n_fields(); i++) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
915 Node* fld_node = sfn->in(first_ind+i); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
916 (void)FillLocArray(sv->field_values()->length(), sfn, fld_node, sv->field_values(), objs); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
917 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
918 scval = sv; |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
919 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
920 } else if( !obj_node->is_Con() ) { |
0 | 921 OptoReg::Name obj_reg = _regalloc->get_reg_first(obj_node); |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
922 if( obj_node->bottom_type()->base() == Type::NarrowOop ) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
923 scval = new_loc_value( _regalloc, obj_reg, Location::narrowoop ); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
924 } else { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
925 scval = new_loc_value( _regalloc, obj_reg, Location::oop ); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
926 } |
0 | 927 } else { |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
928 const TypePtr *tp = obj_node->bottom_type()->make_ptr(); |
989
148e5441d916
6863023: need non-perm oops in code cache for JSR 292
jrose
parents:
903
diff
changeset
|
929 scval = new ConstantOopWriteValue(tp->is_instptr()->const_oop()->constant_encoding()); |
0 | 930 } |
931 | |
932 OptoReg::Name box_reg = BoxLockNode::stack_slot(box_node); | |
66
6dbf1a175d6b
6672848: (Escape Analysis) improve lock elimination with EA
kvn
parents:
63
diff
changeset
|
933 Location basic_lock = Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg)); |
460
424f9bfe6b96
6775880: EA +DeoptimizeALot: assert(mon_info->owner()->is_locked(),"object must be locked now")
kvn
parents:
418
diff
changeset
|
934 while( !box_node->is_BoxLock() ) box_node = box_node->in(1); |
66
6dbf1a175d6b
6672848: (Escape Analysis) improve lock elimination with EA
kvn
parents:
63
diff
changeset
|
935 monarray->append(new MonitorValue(scval, basic_lock, box_node->as_BoxLock()->is_eliminated())); |
0 | 936 } |
937 | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
938 // We dump the object pool first, since deoptimization reads it in first. |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
939 debug_info()->dump_object_pool(objs); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
940 |
0 | 941 // Build first class objects to pass to scope |
942 DebugToken *locvals = debug_info()->create_scope_values(locarray); | |
943 DebugToken *expvals = debug_info()->create_scope_values(exparray); | |
944 DebugToken *monvals = debug_info()->create_monitor_values(monarray); | |
945 | |
946 // Make method available for all Safepoints | |
947 ciMethod* scope_method = method ? method : _method; | |
948 // Describe the scope here | |
949 assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI"); | |
1135
e66fd840cb6b
6893081: method handle & invokedynamic code needs additional cleanup (post 6815692, 6858164)
twisti
parents:
989
diff
changeset
|
950 assert(!jvms->should_reexecute() || depth == max_depth, "reexecute allowed only for the youngest"); |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
951 // Now we can describe the scope. |
1253
f70b0d9ab095
6910618: C2: Error: assert(d->is_oop(),"JVM_ArrayCopy: dst not an oop")
kvn
parents:
1204
diff
changeset
|
952 debug_info()->describe_scope(safepoint_pc_offset, scope_method, jvms->bci(), jvms->should_reexecute(), is_method_handle_invoke, return_oop, locvals, expvals, monvals); |
0 | 953 } // End jvms loop |
954 | |
955 // Mark the end of the scope set. | |
956 debug_info()->end_safepoint(safepoint_pc_offset); | |
957 } | |
958 | |
959 | |
960 | |
961 // A simplified version of Process_OopMap_Node, to handle non-safepoints. | |
962 class NonSafepointEmitter { | |
963 Compile* C; | |
964 JVMState* _pending_jvms; | |
965 int _pending_offset; | |
966 | |
967 void emit_non_safepoint(); | |
968 | |
969 public: | |
970 NonSafepointEmitter(Compile* compile) { | |
971 this->C = compile; | |
972 _pending_jvms = NULL; | |
973 _pending_offset = 0; | |
974 } | |
975 | |
976 void observe_instruction(Node* n, int pc_offset) { | |
977 if (!C->debug_info()->recording_non_safepoints()) return; | |
978 | |
979 Node_Notes* nn = C->node_notes_at(n->_idx); | |
980 if (nn == NULL || nn->jvms() == NULL) return; | |
981 if (_pending_jvms != NULL && | |
982 _pending_jvms->same_calls_as(nn->jvms())) { | |
983 // Repeated JVMS? Stretch it up here. | |
984 _pending_offset = pc_offset; | |
985 } else { | |
986 if (_pending_jvms != NULL && | |
987 _pending_offset < pc_offset) { | |
988 emit_non_safepoint(); | |
989 } | |
990 _pending_jvms = NULL; | |
991 if (pc_offset > C->debug_info()->last_pc_offset()) { | |
992 // This is the only way _pending_jvms can become non-NULL: | |
993 _pending_jvms = nn->jvms(); | |
994 _pending_offset = pc_offset; | |
995 } | |
996 } | |
997 } | |
998 | |
999 // Stay out of the way of real safepoints: | |
1000 void observe_safepoint(JVMState* jvms, int pc_offset) { | |
1001 if (_pending_jvms != NULL && | |
1002 !_pending_jvms->same_calls_as(jvms) && | |
1003 _pending_offset < pc_offset) { | |
1004 emit_non_safepoint(); | |
1005 } | |
1006 _pending_jvms = NULL; | |
1007 } | |
1008 | |
1009 void flush_at_end() { | |
1010 if (_pending_jvms != NULL) { | |
1011 emit_non_safepoint(); | |
1012 } | |
1013 _pending_jvms = NULL; | |
1014 } | |
1015 }; | |
1016 | |
1017 void NonSafepointEmitter::emit_non_safepoint() { | |
1018 JVMState* youngest_jvms = _pending_jvms; | |
1019 int pc_offset = _pending_offset; | |
1020 | |
1021 // Clear it now: | |
1022 _pending_jvms = NULL; | |
1023 | |
1024 DebugInformationRecorder* debug_info = C->debug_info(); | |
1025 assert(debug_info->recording_non_safepoints(), "sanity"); | |
1026 | |
1027 debug_info->add_non_safepoint(pc_offset); | |
1028 int max_depth = youngest_jvms->depth(); | |
1029 | |
1030 // Visit scopes from oldest to youngest. | |
1031 for (int depth = 1; depth <= max_depth; depth++) { | |
1032 JVMState* jvms = youngest_jvms->of_depth(depth); | |
1033 ciMethod* method = jvms->has_method() ? jvms->method() : NULL; | |
900
9987d9d5eb0e
6833129: specjvm98 fails with NullPointerException in the compiler with -XX:DeoptimizeALot
cfang
parents:
859
diff
changeset
|
1034 assert(!jvms->should_reexecute() || depth==max_depth, "reexecute allowed only for the youngest"); |
9987d9d5eb0e
6833129: specjvm98 fails with NullPointerException in the compiler with -XX:DeoptimizeALot
cfang
parents:
859
diff
changeset
|
1035 debug_info->describe_scope(pc_offset, method, jvms->bci(), jvms->should_reexecute()); |
0 | 1036 } |
1037 | |
1038 // Mark the end of the scope set. | |
1039 debug_info->end_non_safepoint(pc_offset); | |
1040 } | |
1041 | |
1042 | |
1043 | |
1044 // helper for Fill_buffer bailout logic | |
1045 static void turn_off_compiler(Compile* C) { | |
1046 if (CodeCache::unallocated_capacity() >= CodeCacheMinimumFreeSpace*10) { | |
1047 // Do not turn off compilation if a single giant method has | |
1048 // blown the code cache size. | |
1049 C->record_failure("excessive request to CodeCache"); | |
1050 } else { | |
28 | 1051 // Let CompilerBroker disable further compilations. |
0 | 1052 C->record_failure("CodeCache is full"); |
1053 } | |
1054 } | |
1055 | |
1056 | |
1057 //------------------------------Fill_buffer------------------------------------ | |
1058 void Compile::Fill_buffer() { | |
1059 | |
1060 // Set the initially allocated size | |
1061 int code_req = initial_code_capacity; | |
1062 int locs_req = initial_locs_capacity; | |
1063 int stub_req = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity; | |
1064 int const_req = initial_const_capacity; | |
1065 bool labels_not_set = true; | |
1066 | |
1067 int pad_req = NativeCall::instruction_size; | |
1068 // The extra spacing after the code is necessary on some platforms. | |
1069 // Sometimes we need to patch in a jump after the last instruction, | |
1070 // if the nmethod has been deoptimized. (See 4932387, 4894843.) | |
1071 | |
1072 uint i; | |
1073 // Compute the byte offset where we can store the deopt pc. | |
1074 if (fixed_slots() != 0) { | |
1075 _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot)); | |
1076 } | |
1077 | |
1078 // Compute prolog code size | |
1079 _method_size = 0; | |
1080 _frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize; | |
1081 #ifdef IA64 | |
1082 if (save_argument_registers()) { | |
1083 // 4815101: this is a stub with implicit and unknown precision fp args. | |
1084 // The usual spill mechanism can only generate stfd's in this case, which | |
1085 // doesn't work if the fp reg to spill contains a single-precision denorm. | |
1086 // Instead, we hack around the normal spill mechanism using stfspill's and | |
1087 // ldffill's in the MachProlog and MachEpilog emit methods. We allocate | |
1088 // space here for the fp arg regs (f8-f15) we're going to thusly spill. | |
1089 // | |
1090 // If we ever implement 16-byte 'registers' == stack slots, we can | |
1091 // get rid of this hack and have SpillCopy generate stfspill/ldffill | |
1092 // instead of stfd/stfs/ldfd/ldfs. | |
1093 _frame_slots += 8*(16/BytesPerInt); | |
1094 } | |
1095 #endif | |
1096 assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" ); | |
1097 | |
1098 // Create an array of unused labels, one for each basic block | |
1099 Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1); | |
1100 | |
1101 for( i=0; i <= _cfg->_num_blocks; i++ ) { | |
1102 blk_labels[i].init(); | |
1103 } | |
1104 | |
1105 // If this machine supports different size branch offsets, then pre-compute | |
1106 // the length of the blocks | |
415
4d9884b01ba6
6754519: don't emit flag fixup for NaN when condition being tested doesn't need it
never
parents:
331
diff
changeset
|
1107 if( _matcher->is_short_branch_offset(-1, 0) ) { |
0 | 1108 Shorten_branches(blk_labels, code_req, locs_req, stub_req, const_req); |
1109 labels_not_set = false; | |
1110 } | |
1111 | |
1112 // nmethod and CodeBuffer count stubs & constants as part of method's code. | |
1113 int exception_handler_req = size_exception_handler(); | |
1114 int deopt_handler_req = size_deopt_handler(); | |
1115 exception_handler_req += MAX_stubs_size; // add marginal slop for handler | |
1116 deopt_handler_req += MAX_stubs_size; // add marginal slop for handler | |
1117 stub_req += MAX_stubs_size; // ensure per-stub margin | |
1118 code_req += MAX_inst_size; // ensure per-instruction margin | |
1265 | 1119 |
0 | 1120 if (StressCodeBuffers) |
1121 code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion | |
1265 | 1122 |
1123 int total_req = | |
1124 code_req + | |
1125 pad_req + | |
1126 stub_req + | |
1127 exception_handler_req + | |
1128 deopt_handler_req + // deopt handler | |
1129 const_req; | |
1130 | |
1131 if (has_method_handle_invokes()) | |
1132 total_req += deopt_handler_req; // deopt MH handler | |
1133 | |
0 | 1134 CodeBuffer* cb = code_buffer(); |
1135 cb->initialize(total_req, locs_req); | |
1136 | |
1137 // Have we run out of code space? | |
1202 | 1138 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { |
0 | 1139 turn_off_compiler(this); |
1140 return; | |
1141 } | |
1142 // Configure the code buffer. | |
1143 cb->initialize_consts_size(const_req); | |
1144 cb->initialize_stubs_size(stub_req); | |
1145 cb->initialize_oop_recorder(env()->oop_recorder()); | |
1146 | |
1147 // fill in the nop array for bundling computations | |
1148 MachNode *_nop_list[Bundle::_nop_count]; | |
1149 Bundle::initialize_nops(_nop_list, this); | |
1150 | |
1151 // Create oopmap set. | |
1152 _oop_map_set = new OopMapSet(); | |
1153 | |
1154 // !!!!! This preserves old handling of oopmaps for now | |
1155 debug_info()->set_oopmaps(_oop_map_set); | |
1156 | |
1157 // Count and start of implicit null check instructions | |
1158 uint inct_cnt = 0; | |
1159 uint *inct_starts = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1); | |
1160 | |
1161 // Count and start of calls | |
1162 uint *call_returns = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1); | |
1163 | |
1164 uint return_offset = 0; | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
1165 int nop_size = (new (this) MachNopNode())->size(_regalloc); |
0 | 1166 |
1167 int previous_offset = 0; | |
1168 int current_offset = 0; | |
1169 int last_call_offset = -1; | |
1170 | |
1171 // Create an array of unused labels, one for each basic block, if printing is enabled | |
1172 #ifndef PRODUCT | |
1173 int *node_offsets = NULL; | |
1174 uint node_offset_limit = unique(); | |
1175 | |
1176 | |
1177 if ( print_assembly() ) | |
1178 node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit); | |
1179 #endif | |
1180 | |
1181 NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily | |
1182 | |
1183 // ------------------ | |
1184 // Now fill in the code buffer | |
1185 Node *delay_slot = NULL; | |
1186 | |
1187 for( i=0; i < _cfg->_num_blocks; i++ ) { | |
1188 Block *b = _cfg->_blocks[i]; | |
1189 | |
1190 Node *head = b->head(); | |
1191 | |
1192 // If this block needs to start aligned (i.e, can be reached other | |
1193 // than by falling-thru from the previous block), then force the | |
1194 // start of a new bundle. | |
1195 if( Pipeline::requires_bundling() && starts_bundle(head) ) | |
1196 cb->flush_bundle(true); | |
1197 | |
1198 // Define the label at the beginning of the basic block | |
1199 if( labels_not_set ) | |
1200 MacroAssembler(cb).bind( blk_labels[b->_pre_order] ); | |
1201 | |
1202 else | |
1748 | 1203 assert( blk_labels[b->_pre_order].loc_pos() == cb->insts_size(), |
0 | 1204 "label position does not match code offset" ); |
1205 | |
1206 uint last_inst = b->_nodes.size(); | |
1207 | |
1208 // Emit block normally, except for last instruction. | |
1209 // Emit means "dump code bits into code buffer". | |
1210 for( uint j = 0; j<last_inst; j++ ) { | |
1211 | |
1212 // Get the node | |
1213 Node* n = b->_nodes[j]; | |
1214 | |
1215 // See if delay slots are supported | |
1216 if (valid_bundle_info(n) && | |
1217 node_bundling(n)->used_in_unconditional_delay()) { | |
1218 assert(delay_slot == NULL, "no use of delay slot node"); | |
1219 assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size"); | |
1220 | |
1221 delay_slot = n; | |
1222 continue; | |
1223 } | |
1224 | |
1225 // If this starts a new instruction group, then flush the current one | |
1226 // (but allow split bundles) | |
1227 if( Pipeline::requires_bundling() && starts_bundle(n) ) | |
1228 cb->flush_bundle(false); | |
1229 | |
1230 // The following logic is duplicated in the code ifdeffed for | |
605 | 1231 // ENABLE_ZAP_DEAD_LOCALS which appears above in this file. It |
0 | 1232 // should be factored out. Or maybe dispersed to the nodes? |
1233 | |
1234 // Special handling for SafePoint/Call Nodes | |
1235 bool is_mcall = false; | |
1236 if( n->is_Mach() ) { | |
1237 MachNode *mach = n->as_Mach(); | |
1238 is_mcall = n->is_MachCall(); | |
1239 bool is_sfn = n->is_MachSafePoint(); | |
1240 | |
1241 // If this requires all previous instructions be flushed, then do so | |
1242 if( is_sfn || is_mcall || mach->alignment_required() != 1) { | |
1243 cb->flush_bundle(true); | |
1748 | 1244 current_offset = cb->insts_size(); |
0 | 1245 } |
1246 | |
1247 // align the instruction if necessary | |
1248 int padding = mach->compute_padding(current_offset); | |
1249 // Make sure safepoint node for polling is distinct from a call's | |
1250 // return by adding a nop if needed. | |
1251 if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset ) { | |
1252 padding = nop_size; | |
1253 } | |
1489
cff162798819
6888953: some calls to function-like macros are missing semicolons
jcoomes
parents:
1274
diff
changeset
|
1254 assert( labels_not_set || padding == 0, "instruction should already be aligned"); |
0 | 1255 |
1256 if(padding > 0) { | |
1257 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); | |
1258 int nops_cnt = padding / nop_size; | |
1259 MachNode *nop = new (this) MachNopNode(nops_cnt); | |
1260 b->_nodes.insert(j++, nop); | |
1261 last_inst++; | |
1262 _cfg->_bbs.map( nop->_idx, b ); | |
1263 nop->emit(*cb, _regalloc); | |
1264 cb->flush_bundle(true); | |
1748 | 1265 current_offset = cb->insts_size(); |
0 | 1266 } |
1267 | |
1268 // Remember the start of the last call in a basic block | |
1269 if (is_mcall) { | |
1270 MachCallNode *mcall = mach->as_MachCall(); | |
1271 | |
1272 // This destination address is NOT PC-relative | |
1273 mcall->method_set((intptr_t)mcall->entry_point()); | |
1274 | |
1275 // Save the return address | |
1276 call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset(); | |
1277 | |
1278 if (!mcall->is_safepoint_node()) { | |
1279 is_mcall = false; | |
1280 is_sfn = false; | |
1281 } | |
1282 } | |
1283 | |
1284 // sfn will be valid whenever mcall is valid now because of inheritance | |
1285 if( is_sfn || is_mcall ) { | |
1286 | |
1287 // Handle special safepoint nodes for synchronization | |
1288 if( !is_mcall ) { | |
1289 MachSafePointNode *sfn = mach->as_MachSafePoint(); | |
1290 // !!!!! Stubs only need an oopmap right now, so bail out | |
1291 if( sfn->jvms()->method() == NULL) { | |
1292 // Write the oopmap directly to the code blob??!! | |
1293 # ifdef ENABLE_ZAP_DEAD_LOCALS | |
1294 assert( !is_node_getting_a_safepoint(sfn), "logic does not match; false positive"); | |
1295 # endif | |
1296 continue; | |
1297 } | |
1298 } // End synchronization | |
1299 | |
1300 non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(), | |
1301 current_offset); | |
1302 Process_OopMap_Node(mach, current_offset); | |
1303 } // End if safepoint | |
1304 | |
1305 // If this is a null check, then add the start of the previous instruction to the list | |
1306 else if( mach->is_MachNullCheck() ) { | |
1307 inct_starts[inct_cnt++] = previous_offset; | |
1308 } | |
1309 | |
1310 // If this is a branch, then fill in the label with the target BB's label | |
1311 else if ( mach->is_Branch() ) { | |
1312 | |
1313 if ( mach->ideal_Opcode() == Op_Jump ) { | |
1314 for (uint h = 0; h < b->_num_succs; h++ ) { | |
1315 Block* succs_block = b->_succs[h]; | |
1316 for (uint j = 1; j < succs_block->num_preds(); j++) { | |
1317 Node* jpn = succs_block->pred(j); | |
1318 if ( jpn->is_JumpProj() && jpn->in(0) == mach ) { | |
1319 uint block_num = succs_block->non_connector()->_pre_order; | |
1320 Label *blkLabel = &blk_labels[block_num]; | |
1321 mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel); | |
1322 } | |
1323 } | |
1324 } | |
1325 } else { | |
1326 // For Branchs | |
1327 // This requires the TRUE branch target be in succs[0] | |
1328 uint block_num = b->non_connector_successor(0)->_pre_order; | |
1329 mach->label_set( blk_labels[block_num], block_num ); | |
1330 } | |
1331 } | |
1332 | |
1333 #ifdef ASSERT | |
605 | 1334 // Check that oop-store precedes the card-mark |
0 | 1335 else if( mach->ideal_Opcode() == Op_StoreCM ) { |
1336 uint storeCM_idx = j; | |
1337 Node *oop_store = mach->in(mach->_cnt); // First precedence edge | |
1338 assert( oop_store != NULL, "storeCM expects a precedence edge"); | |
1339 uint i4; | |
1340 for( i4 = 0; i4 < last_inst; ++i4 ) { | |
1341 if( b->_nodes[i4] == oop_store ) break; | |
1342 } | |
1343 // Note: This test can provide a false failure if other precedence | |
1344 // edges have been added to the storeCMNode. | |
1345 assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store"); | |
1346 } | |
1347 #endif | |
1348 | |
1349 else if( !n->is_Proj() ) { | |
605 | 1350 // Remember the beginning of the previous instruction, in case |
0 | 1351 // it's followed by a flag-kill and a null-check. Happens on |
1352 // Intel all the time, with add-to-memory kind of opcodes. | |
1353 previous_offset = current_offset; | |
1354 } | |
1355 } | |
1356 | |
1357 // Verify that there is sufficient space remaining | |
1358 cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size); | |
1202 | 1359 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { |
0 | 1360 turn_off_compiler(this); |
1361 return; | |
1362 } | |
1363 | |
1364 // Save the offset for the listing | |
1365 #ifndef PRODUCT | |
1366 if( node_offsets && n->_idx < node_offset_limit ) | |
1748 | 1367 node_offsets[n->_idx] = cb->insts_size(); |
0 | 1368 #endif |
1369 | |
1370 // "Normal" instruction case | |
1371 n->emit(*cb, _regalloc); | |
1748 | 1372 current_offset = cb->insts_size(); |
0 | 1373 non_safepoints.observe_instruction(n, current_offset); |
1374 | |
1375 // mcall is last "call" that can be a safepoint | |
1376 // record it so we can see if a poll will directly follow it | |
1377 // in which case we'll need a pad to make the PcDesc sites unique | |
1378 // see 5010568. This can be slightly inaccurate but conservative | |
1379 // in the case that return address is not actually at current_offset. | |
1380 // This is a small price to pay. | |
1381 | |
1382 if (is_mcall) { | |
1383 last_call_offset = current_offset; | |
1384 } | |
1385 | |
1386 // See if this instruction has a delay slot | |
1387 if ( valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) { | |
1388 assert(delay_slot != NULL, "expecting delay slot node"); | |
1389 | |
1390 // Back up 1 instruction | |
1748 | 1391 cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size()); |
0 | 1392 |
1393 // Save the offset for the listing | |
1394 #ifndef PRODUCT | |
1395 if( node_offsets && delay_slot->_idx < node_offset_limit ) | |
1748 | 1396 node_offsets[delay_slot->_idx] = cb->insts_size(); |
0 | 1397 #endif |
1398 | |
1399 // Support a SafePoint in the delay slot | |
1400 if( delay_slot->is_MachSafePoint() ) { | |
1401 MachNode *mach = delay_slot->as_Mach(); | |
1402 // !!!!! Stubs only need an oopmap right now, so bail out | |
1403 if( !mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL ) { | |
1404 // Write the oopmap directly to the code blob??!! | |
1405 # ifdef ENABLE_ZAP_DEAD_LOCALS | |
1406 assert( !is_node_getting_a_safepoint(mach), "logic does not match; false positive"); | |
1407 # endif | |
1408 delay_slot = NULL; | |
1409 continue; | |
1410 } | |
1411 | |
1412 int adjusted_offset = current_offset - Pipeline::instr_unit_size(); | |
1413 non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(), | |
1414 adjusted_offset); | |
1415 // Generate an OopMap entry | |
1416 Process_OopMap_Node(mach, adjusted_offset); | |
1417 } | |
1418 | |
1419 // Insert the delay slot instruction | |
1420 delay_slot->emit(*cb, _regalloc); | |
1421 | |
1422 // Don't reuse it | |
1423 delay_slot = NULL; | |
1424 } | |
1425 | |
1426 } // End for all instructions in block | |
1427 | |
418 | 1428 // If the next block is the top of a loop, pad this block out to align |
1429 // the loop top a little. Helps prevent pipe stalls at loop back branches. | |
0 | 1430 if( i<_cfg->_num_blocks-1 ) { |
1431 Block *nb = _cfg->_blocks[i+1]; | |
1432 uint padding = nb->alignment_padding(current_offset); | |
1433 if( padding > 0 ) { | |
1434 MachNode *nop = new (this) MachNopNode(padding / nop_size); | |
1435 b->_nodes.insert( b->_nodes.size(), nop ); | |
1436 _cfg->_bbs.map( nop->_idx, b ); | |
1437 nop->emit(*cb, _regalloc); | |
1748 | 1438 current_offset = cb->insts_size(); |
0 | 1439 } |
1440 } | |
1441 | |
1442 } // End of for all blocks | |
1443 | |
1444 non_safepoints.flush_at_end(); | |
1445 | |
1446 // Offset too large? | |
1447 if (failing()) return; | |
1448 | |
1449 // Define a pseudo-label at the end of the code | |
1450 MacroAssembler(cb).bind( blk_labels[_cfg->_num_blocks] ); | |
1451 | |
1452 // Compute the size of the first block | |
1453 _first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos(); | |
1454 | |
1748 | 1455 assert(cb->insts_size() < 500000, "method is unreasonably large"); |
0 | 1456 |
1457 // ------------------ | |
1458 | |
1459 #ifndef PRODUCT | |
1460 // Information on the size of the method, without the extraneous code | |
1748 | 1461 Scheduling::increment_method_size(cb->insts_size()); |
0 | 1462 #endif |
1463 | |
1464 // ------------------ | |
1465 // Fill in exception table entries. | |
1466 FillExceptionTables(inct_cnt, call_returns, inct_starts, blk_labels); | |
1467 | |
1468 // Only java methods have exception handlers and deopt handlers | |
1469 if (_method) { | |
1470 // Emit the exception handler code. | |
1471 _code_offsets.set_value(CodeOffsets::Exceptions, emit_exception_handler(*cb)); | |
1472 // Emit the deopt handler code. | |
1473 _code_offsets.set_value(CodeOffsets::Deopt, emit_deopt_handler(*cb)); | |
1265 | 1474 |
1475 // Emit the MethodHandle deopt handler code (if required). | |
1476 if (has_method_handle_invokes()) { | |
1477 // We can use the same code as for the normal deopt handler, we | |
1478 // just need a different entry point address. | |
1479 _code_offsets.set_value(CodeOffsets::DeoptMH, emit_deopt_handler(*cb)); | |
1480 } | |
0 | 1481 } |
1482 | |
1483 // One last check for failed CodeBuffer::expand: | |
1202 | 1484 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { |
0 | 1485 turn_off_compiler(this); |
1486 return; | |
1487 } | |
1488 | |
1489 #ifndef PRODUCT | |
1490 // Dump the assembly code, including basic-block numbers | |
1491 if (print_assembly()) { | |
1492 ttyLocker ttyl; // keep the following output all in one block | |
1493 if (!VMThread::should_terminate()) { // test this under the tty lock | |
1494 // This output goes directly to the tty, not the compiler log. | |
1495 // To enable tools to match it up with the compilation activity, | |
1496 // be sure to tag this tty output with the compile ID. | |
1497 if (xtty != NULL) { | |
1498 xtty->head("opto_assembly compile_id='%d'%s", compile_id(), | |
1499 is_osr_compilation() ? " compile_kind='osr'" : | |
1500 ""); | |
1501 } | |
1502 if (method() != NULL) { | |
1503 method()->print_oop(); | |
1504 print_codes(); | |
1505 } | |
1506 dump_asm(node_offsets, node_offset_limit); | |
1507 if (xtty != NULL) { | |
1508 xtty->tail("opto_assembly"); | |
1509 } | |
1510 } | |
1511 } | |
1512 #endif | |
1513 | |
1514 } | |
1515 | |
1516 void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels) { | |
1517 _inc_table.set_size(cnt); | |
1518 | |
1519 uint inct_cnt = 0; | |
1520 for( uint i=0; i<_cfg->_num_blocks; i++ ) { | |
1521 Block *b = _cfg->_blocks[i]; | |
1522 Node *n = NULL; | |
1523 int j; | |
1524 | |
1525 // Find the branch; ignore trailing NOPs. | |
1526 for( j = b->_nodes.size()-1; j>=0; j-- ) { | |
1527 n = b->_nodes[j]; | |
1528 if( !n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con ) | |
1529 break; | |
1530 } | |
1531 | |
1532 // If we didn't find anything, continue | |
1533 if( j < 0 ) continue; | |
1534 | |
1535 // Compute ExceptionHandlerTable subtable entry and add it | |
1536 // (skip empty blocks) | |
1537 if( n->is_Catch() ) { | |
1538 | |
1539 // Get the offset of the return from the call | |
1540 uint call_return = call_returns[b->_pre_order]; | |
1541 #ifdef ASSERT | |
1542 assert( call_return > 0, "no call seen for this basic block" ); | |
1543 while( b->_nodes[--j]->Opcode() == Op_MachProj ) ; | |
1544 assert( b->_nodes[j]->is_Call(), "CatchProj must follow call" ); | |
1545 #endif | |
1546 // last instruction is a CatchNode, find it's CatchProjNodes | |
1547 int nof_succs = b->_num_succs; | |
1548 // allocate space | |
1549 GrowableArray<intptr_t> handler_bcis(nof_succs); | |
1550 GrowableArray<intptr_t> handler_pcos(nof_succs); | |
1551 // iterate through all successors | |
1552 for (int j = 0; j < nof_succs; j++) { | |
1553 Block* s = b->_succs[j]; | |
1554 bool found_p = false; | |
1555 for( uint k = 1; k < s->num_preds(); k++ ) { | |
1556 Node *pk = s->pred(k); | |
1557 if( pk->is_CatchProj() && pk->in(0) == n ) { | |
1558 const CatchProjNode* p = pk->as_CatchProj(); | |
1559 found_p = true; | |
1560 // add the corresponding handler bci & pco information | |
1561 if( p->_con != CatchProjNode::fall_through_index ) { | |
1562 // p leads to an exception handler (and is not fall through) | |
1563 assert(s == _cfg->_blocks[s->_pre_order],"bad numbering"); | |
1564 // no duplicates, please | |
1565 if( !handler_bcis.contains(p->handler_bci()) ) { | |
1566 uint block_num = s->non_connector()->_pre_order; | |
1567 handler_bcis.append(p->handler_bci()); | |
1568 handler_pcos.append(blk_labels[block_num].loc_pos()); | |
1569 } | |
1570 } | |
1571 } | |
1572 } | |
1573 assert(found_p, "no matching predecessor found"); | |
1574 // Note: Due to empty block removal, one block may have | |
1575 // several CatchProj inputs, from the same Catch. | |
1576 } | |
1577 | |
1578 // Set the offset of the return from the call | |
1579 _handler_table.add_subtable(call_return, &handler_bcis, NULL, &handler_pcos); | |
1580 continue; | |
1581 } | |
1582 | |
1583 // Handle implicit null exception table updates | |
1584 if( n->is_MachNullCheck() ) { | |
1585 uint block_num = b->non_connector_successor(0)->_pre_order; | |
1586 _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() ); | |
1587 continue; | |
1588 } | |
1589 } // End of for all blocks fill in exception table entries | |
1590 } | |
1591 | |
1592 // Static Variables | |
1593 #ifndef PRODUCT | |
1594 uint Scheduling::_total_nop_size = 0; | |
1595 uint Scheduling::_total_method_size = 0; | |
1596 uint Scheduling::_total_branches = 0; | |
1597 uint Scheduling::_total_unconditional_delays = 0; | |
1598 uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1]; | |
1599 #endif | |
1600 | |
1601 // Initializer for class Scheduling | |
1602 | |
1603 Scheduling::Scheduling(Arena *arena, Compile &compile) | |
1604 : _arena(arena), | |
1605 _cfg(compile.cfg()), | |
1606 _bbs(compile.cfg()->_bbs), | |
1607 _regalloc(compile.regalloc()), | |
1608 _reg_node(arena), | |
1609 _bundle_instr_count(0), | |
1610 _bundle_cycle_number(0), | |
1611 _scheduled(arena), | |
1612 _available(arena), | |
1613 _next_node(NULL), | |
1614 _bundle_use(0, 0, resource_count, &_bundle_use_elements[0]), | |
1615 _pinch_free_list(arena) | |
1616 #ifndef PRODUCT | |
1617 , _branches(0) | |
1618 , _unconditional_delays(0) | |
1619 #endif | |
1620 { | |
1621 // Create a MachNopNode | |
1622 _nop = new (&compile) MachNopNode(); | |
1623 | |
1624 // Now that the nops are in the array, save the count | |
1625 // (but allow entries for the nops) | |
1626 _node_bundling_limit = compile.unique(); | |
1627 uint node_max = _regalloc->node_regs_max_index(); | |
1628 | |
1629 compile.set_node_bundling_limit(_node_bundling_limit); | |
1630 | |
605 | 1631 // This one is persistent within the Compile class |
0 | 1632 _node_bundling_base = NEW_ARENA_ARRAY(compile.comp_arena(), Bundle, node_max); |
1633 | |
1634 // Allocate space for fixed-size arrays | |
1635 _node_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max); | |
1636 _uses = NEW_ARENA_ARRAY(arena, short, node_max); | |
1637 _current_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max); | |
1638 | |
1639 // Clear the arrays | |
1640 memset(_node_bundling_base, 0, node_max * sizeof(Bundle)); | |
1641 memset(_node_latency, 0, node_max * sizeof(unsigned short)); | |
1642 memset(_uses, 0, node_max * sizeof(short)); | |
1643 memset(_current_latency, 0, node_max * sizeof(unsigned short)); | |
1644 | |
1645 // Clear the bundling information | |
1646 memcpy(_bundle_use_elements, | |
1647 Pipeline_Use::elaborated_elements, | |
1648 sizeof(Pipeline_Use::elaborated_elements)); | |
1649 | |
1650 // Get the last node | |
1651 Block *bb = _cfg->_blocks[_cfg->_blocks.size()-1]; | |
1652 | |
1653 _next_node = bb->_nodes[bb->_nodes.size()-1]; | |
1654 } | |
1655 | |
1656 #ifndef PRODUCT | |
1657 // Scheduling destructor | |
1658 Scheduling::~Scheduling() { | |
1659 _total_branches += _branches; | |
1660 _total_unconditional_delays += _unconditional_delays; | |
1661 } | |
1662 #endif | |
1663 | |
1664 // Step ahead "i" cycles | |
1665 void Scheduling::step(uint i) { | |
1666 | |
1667 Bundle *bundle = node_bundling(_next_node); | |
1668 bundle->set_starts_bundle(); | |
1669 | |
1670 // Update the bundle record, but leave the flags information alone | |
1671 if (_bundle_instr_count > 0) { | |
1672 bundle->set_instr_count(_bundle_instr_count); | |
1673 bundle->set_resources_used(_bundle_use.resourcesUsed()); | |
1674 } | |
1675 | |
1676 // Update the state information | |
1677 _bundle_instr_count = 0; | |
1678 _bundle_cycle_number += i; | |
1679 _bundle_use.step(i); | |
1680 } | |
1681 | |
1682 void Scheduling::step_and_clear() { | |
1683 Bundle *bundle = node_bundling(_next_node); | |
1684 bundle->set_starts_bundle(); | |
1685 | |
1686 // Update the bundle record | |
1687 if (_bundle_instr_count > 0) { | |
1688 bundle->set_instr_count(_bundle_instr_count); | |
1689 bundle->set_resources_used(_bundle_use.resourcesUsed()); | |
1690 | |
1691 _bundle_cycle_number += 1; | |
1692 } | |
1693 | |
1694 // Clear the bundling information | |
1695 _bundle_instr_count = 0; | |
1696 _bundle_use.reset(); | |
1697 | |
1698 memcpy(_bundle_use_elements, | |
1699 Pipeline_Use::elaborated_elements, | |
1700 sizeof(Pipeline_Use::elaborated_elements)); | |
1701 } | |
1702 | |
1703 //------------------------------ScheduleAndBundle------------------------------ | |
1704 // Perform instruction scheduling and bundling over the sequence of | |
1705 // instructions in backwards order. | |
1706 void Compile::ScheduleAndBundle() { | |
1707 | |
1708 // Don't optimize this if it isn't a method | |
1709 if (!_method) | |
1710 return; | |
1711 | |
1712 // Don't optimize this if scheduling is disabled | |
1713 if (!do_scheduling()) | |
1714 return; | |
1715 | |
1716 NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); ) | |
1717 | |
1718 // Create a data structure for all the scheduling information | |
1719 Scheduling scheduling(Thread::current()->resource_area(), *this); | |
1720 | |
1721 // Walk backwards over each basic block, computing the needed alignment | |
1722 // Walk over all the basic blocks | |
1723 scheduling.DoScheduling(); | |
1724 } | |
1725 | |
1726 //------------------------------ComputeLocalLatenciesForward------------------- | |
1727 // Compute the latency of all the instructions. This is fairly simple, | |
1728 // because we already have a legal ordering. Walk over the instructions | |
1729 // from first to last, and compute the latency of the instruction based | |
605 | 1730 // on the latency of the preceding instruction(s). |
0 | 1731 void Scheduling::ComputeLocalLatenciesForward(const Block *bb) { |
1732 #ifndef PRODUCT | |
1733 if (_cfg->C->trace_opto_output()) | |
1734 tty->print("# -> ComputeLocalLatenciesForward\n"); | |
1735 #endif | |
1736 | |
1737 // Walk over all the schedulable instructions | |
1738 for( uint j=_bb_start; j < _bb_end; j++ ) { | |
1739 | |
1740 // This is a kludge, forcing all latency calculations to start at 1. | |
1741 // Used to allow latency 0 to force an instruction to the beginning | |
1742 // of the bb | |
1743 uint latency = 1; | |
1744 Node *use = bb->_nodes[j]; | |
1745 uint nlen = use->len(); | |
1746 | |
1747 // Walk over all the inputs | |
1748 for ( uint k=0; k < nlen; k++ ) { | |
1749 Node *def = use->in(k); | |
1750 if (!def) | |
1751 continue; | |
1752 | |
1753 uint l = _node_latency[def->_idx] + use->latency(k); | |
1754 if (latency < l) | |
1755 latency = l; | |
1756 } | |
1757 | |
1758 _node_latency[use->_idx] = latency; | |
1759 | |
1760 #ifndef PRODUCT | |
1761 if (_cfg->C->trace_opto_output()) { | |
1762 tty->print("# latency %4d: ", latency); | |
1763 use->dump(); | |
1764 } | |
1765 #endif | |
1766 } | |
1767 | |
1768 #ifndef PRODUCT | |
1769 if (_cfg->C->trace_opto_output()) | |
1770 tty->print("# <- ComputeLocalLatenciesForward\n"); | |
1771 #endif | |
1772 | |
1773 } // end ComputeLocalLatenciesForward | |
1774 | |
1775 // See if this node fits into the present instruction bundle | |
1776 bool Scheduling::NodeFitsInBundle(Node *n) { | |
1777 uint n_idx = n->_idx; | |
1778 | |
1779 // If this is the unconditional delay instruction, then it fits | |
1780 if (n == _unconditional_delay_slot) { | |
1781 #ifndef PRODUCT | |
1782 if (_cfg->C->trace_opto_output()) | |
1783 tty->print("# NodeFitsInBundle [%4d]: TRUE; is in unconditional delay slot\n", n->_idx); | |
1784 #endif | |
1785 return (true); | |
1786 } | |
1787 | |
1788 // If the node cannot be scheduled this cycle, skip it | |
1789 if (_current_latency[n_idx] > _bundle_cycle_number) { | |
1790 #ifndef PRODUCT | |
1791 if (_cfg->C->trace_opto_output()) | |
1792 tty->print("# NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n", | |
1793 n->_idx, _current_latency[n_idx], _bundle_cycle_number); | |
1794 #endif | |
1795 return (false); | |
1796 } | |
1797 | |
1798 const Pipeline *node_pipeline = n->pipeline(); | |
1799 | |
1800 uint instruction_count = node_pipeline->instructionCount(); | |
1801 if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0) | |
1802 instruction_count = 0; | |
1803 else if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot) | |
1804 instruction_count++; | |
1805 | |
1806 if (_bundle_instr_count + instruction_count > Pipeline::_max_instrs_per_cycle) { | |
1807 #ifndef PRODUCT | |
1808 if (_cfg->C->trace_opto_output()) | |
1809 tty->print("# NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n", | |
1810 n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle); | |
1811 #endif | |
1812 return (false); | |
1813 } | |
1814 | |
1815 // Don't allow non-machine nodes to be handled this way | |
1816 if (!n->is_Mach() && instruction_count == 0) | |
1817 return (false); | |
1818 | |
1819 // See if there is any overlap | |
1820 uint delay = _bundle_use.full_latency(0, node_pipeline->resourceUse()); | |
1821 | |
1822 if (delay > 0) { | |
1823 #ifndef PRODUCT | |
1824 if (_cfg->C->trace_opto_output()) | |
1825 tty->print("# NodeFitsInBundle [%4d]: FALSE; functional units overlap\n", n_idx); | |
1826 #endif | |
1827 return false; | |
1828 } | |
1829 | |
1830 #ifndef PRODUCT | |
1831 if (_cfg->C->trace_opto_output()) | |
1832 tty->print("# NodeFitsInBundle [%4d]: TRUE\n", n_idx); | |
1833 #endif | |
1834 | |
1835 return true; | |
1836 } | |
1837 | |
1838 Node * Scheduling::ChooseNodeToBundle() { | |
1839 uint siz = _available.size(); | |
1840 | |
1841 if (siz == 0) { | |
1842 | |
1843 #ifndef PRODUCT | |
1844 if (_cfg->C->trace_opto_output()) | |
1845 tty->print("# ChooseNodeToBundle: NULL\n"); | |
1846 #endif | |
1847 return (NULL); | |
1848 } | |
1849 | |
1850 // Fast path, if only 1 instruction in the bundle | |
1851 if (siz == 1) { | |
1852 #ifndef PRODUCT | |
1853 if (_cfg->C->trace_opto_output()) { | |
1854 tty->print("# ChooseNodeToBundle (only 1): "); | |
1855 _available[0]->dump(); | |
1856 } | |
1857 #endif | |
1858 return (_available[0]); | |
1859 } | |
1860 | |
1861 // Don't bother, if the bundle is already full | |
1862 if (_bundle_instr_count < Pipeline::_max_instrs_per_cycle) { | |
1863 for ( uint i = 0; i < siz; i++ ) { | |
1864 Node *n = _available[i]; | |
1865 | |
1866 // Skip projections, we'll handle them another way | |
1867 if (n->is_Proj()) | |
1868 continue; | |
1869 | |
1870 // This presupposed that instructions are inserted into the | |
1871 // available list in a legality order; i.e. instructions that | |
1872 // must be inserted first are at the head of the list | |
1873 if (NodeFitsInBundle(n)) { | |
1874 #ifndef PRODUCT | |
1875 if (_cfg->C->trace_opto_output()) { | |
1876 tty->print("# ChooseNodeToBundle: "); | |
1877 n->dump(); | |
1878 } | |
1879 #endif | |
1880 return (n); | |
1881 } | |
1882 } | |
1883 } | |
1884 | |
1885 // Nothing fits in this bundle, choose the highest priority | |
1886 #ifndef PRODUCT | |
1887 if (_cfg->C->trace_opto_output()) { | |
1888 tty->print("# ChooseNodeToBundle: "); | |
1889 _available[0]->dump(); | |
1890 } | |
1891 #endif | |
1892 | |
1893 return _available[0]; | |
1894 } | |
1895 | |
1896 //------------------------------AddNodeToAvailableList------------------------- | |
1897 void Scheduling::AddNodeToAvailableList(Node *n) { | |
1898 assert( !n->is_Proj(), "projections never directly made available" ); | |
1899 #ifndef PRODUCT | |
1900 if (_cfg->C->trace_opto_output()) { | |
1901 tty->print("# AddNodeToAvailableList: "); | |
1902 n->dump(); | |
1903 } | |
1904 #endif | |
1905 | |
1906 int latency = _current_latency[n->_idx]; | |
1907 | |
1908 // Insert in latency order (insertion sort) | |
1909 uint i; | |
1910 for ( i=0; i < _available.size(); i++ ) | |
1911 if (_current_latency[_available[i]->_idx] > latency) | |
1912 break; | |
1913 | |
1914 // Special Check for compares following branches | |
1915 if( n->is_Mach() && _scheduled.size() > 0 ) { | |
1916 int op = n->as_Mach()->ideal_Opcode(); | |
1917 Node *last = _scheduled[0]; | |
1918 if( last->is_MachIf() && last->in(1) == n && | |
1919 ( op == Op_CmpI || | |
1920 op == Op_CmpU || | |
1921 op == Op_CmpP || | |
1922 op == Op_CmpF || | |
1923 op == Op_CmpD || | |
1924 op == Op_CmpL ) ) { | |
1925 | |
1926 // Recalculate position, moving to front of same latency | |
1927 for ( i=0 ; i < _available.size(); i++ ) | |
1928 if (_current_latency[_available[i]->_idx] >= latency) | |
1929 break; | |
1930 } | |
1931 } | |
1932 | |
1933 // Insert the node in the available list | |
1934 _available.insert(i, n); | |
1935 | |
1936 #ifndef PRODUCT | |
1937 if (_cfg->C->trace_opto_output()) | |
1938 dump_available(); | |
1939 #endif | |
1940 } | |
1941 | |
1942 //------------------------------DecrementUseCounts----------------------------- | |
1943 void Scheduling::DecrementUseCounts(Node *n, const Block *bb) { | |
1944 for ( uint i=0; i < n->len(); i++ ) { | |
1945 Node *def = n->in(i); | |
1946 if (!def) continue; | |
1947 if( def->is_Proj() ) // If this is a machine projection, then | |
1948 def = def->in(0); // propagate usage thru to the base instruction | |
1949 | |
1950 if( _bbs[def->_idx] != bb ) // Ignore if not block-local | |
1951 continue; | |
1952 | |
1953 // Compute the latency | |
1954 uint l = _bundle_cycle_number + n->latency(i); | |
1955 if (_current_latency[def->_idx] < l) | |
1956 _current_latency[def->_idx] = l; | |
1957 | |
1958 // If this does not have uses then schedule it | |
1959 if ((--_uses[def->_idx]) == 0) | |
1960 AddNodeToAvailableList(def); | |
1961 } | |
1962 } | |
1963 | |
1964 //------------------------------AddNodeToBundle-------------------------------- | |
1965 void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { | |
1966 #ifndef PRODUCT | |
1967 if (_cfg->C->trace_opto_output()) { | |
1968 tty->print("# AddNodeToBundle: "); | |
1969 n->dump(); | |
1970 } | |
1971 #endif | |
1972 | |
1973 // Remove this from the available list | |
1974 uint i; | |
1975 for (i = 0; i < _available.size(); i++) | |
1976 if (_available[i] == n) | |
1977 break; | |
1978 assert(i < _available.size(), "entry in _available list not found"); | |
1979 _available.remove(i); | |
1980 | |
1981 // See if this fits in the current bundle | |
1982 const Pipeline *node_pipeline = n->pipeline(); | |
1983 const Pipeline_Use& node_usage = node_pipeline->resourceUse(); | |
1984 | |
1985 // Check for instructions to be placed in the delay slot. We | |
1986 // do this before we actually schedule the current instruction, | |
1987 // because the delay slot follows the current instruction. | |
1988 if (Pipeline::_branch_has_delay_slot && | |
1989 node_pipeline->hasBranchDelay() && | |
1990 !_unconditional_delay_slot) { | |
1991 | |
1992 uint siz = _available.size(); | |
1993 | |
1994 // Conditional branches can support an instruction that | |
605 | 1995 // is unconditionally executed and not dependent by the |
0 | 1996 // branch, OR a conditionally executed instruction if |
1997 // the branch is taken. In practice, this means that | |
1998 // the first instruction at the branch target is | |
1999 // copied to the delay slot, and the branch goes to | |
2000 // the instruction after that at the branch target | |
2001 if ( n->is_Mach() && n->is_Branch() ) { | |
2002 | |
2003 assert( !n->is_MachNullCheck(), "should not look for delay slot for Null Check" ); | |
2004 assert( !n->is_Catch(), "should not look for delay slot for Catch" ); | |
2005 | |
2006 #ifndef PRODUCT | |
2007 _branches++; | |
2008 #endif | |
2009 | |
2010 // At least 1 instruction is on the available list | |
605 | 2011 // that is not dependent on the branch |
0 | 2012 for (uint i = 0; i < siz; i++) { |
2013 Node *d = _available[i]; | |
2014 const Pipeline *avail_pipeline = d->pipeline(); | |
2015 | |
2016 // Don't allow safepoints in the branch shadow, that will | |
2017 // cause a number of difficulties | |
2018 if ( avail_pipeline->instructionCount() == 1 && | |
2019 !avail_pipeline->hasMultipleBundles() && | |
2020 !avail_pipeline->hasBranchDelay() && | |
2021 Pipeline::instr_has_unit_size() && | |
2022 d->size(_regalloc) == Pipeline::instr_unit_size() && | |
2023 NodeFitsInBundle(d) && | |
2024 !node_bundling(d)->used_in_delay()) { | |
2025 | |
2026 if (d->is_Mach() && !d->is_MachSafePoint()) { | |
2027 // A node that fits in the delay slot was found, so we need to | |
2028 // set the appropriate bits in the bundle pipeline information so | |
2029 // that it correctly indicates resource usage. Later, when we | |
2030 // attempt to add this instruction to the bundle, we will skip | |
2031 // setting the resource usage. | |
2032 _unconditional_delay_slot = d; | |
2033 node_bundling(n)->set_use_unconditional_delay(); | |
2034 node_bundling(d)->set_used_in_unconditional_delay(); | |
2035 _bundle_use.add_usage(avail_pipeline->resourceUse()); | |
2036 _current_latency[d->_idx] = _bundle_cycle_number; | |
2037 _next_node = d; | |
2038 ++_bundle_instr_count; | |
2039 #ifndef PRODUCT | |
2040 _unconditional_delays++; | |
2041 #endif | |
2042 break; | |
2043 } | |
2044 } | |
2045 } | |
2046 } | |
2047 | |
2048 // No delay slot, add a nop to the usage | |
2049 if (!_unconditional_delay_slot) { | |
2050 // See if adding an instruction in the delay slot will overflow | |
2051 // the bundle. | |
2052 if (!NodeFitsInBundle(_nop)) { | |
2053 #ifndef PRODUCT | |
2054 if (_cfg->C->trace_opto_output()) | |
2055 tty->print("# *** STEP(1 instruction for delay slot) ***\n"); | |
2056 #endif | |
2057 step(1); | |
2058 } | |
2059 | |
2060 _bundle_use.add_usage(_nop->pipeline()->resourceUse()); | |
2061 _next_node = _nop; | |
2062 ++_bundle_instr_count; | |
2063 } | |
2064 | |
2065 // See if the instruction in the delay slot requires a | |
2066 // step of the bundles | |
2067 if (!NodeFitsInBundle(n)) { | |
2068 #ifndef PRODUCT | |
2069 if (_cfg->C->trace_opto_output()) | |
2070 tty->print("# *** STEP(branch won't fit) ***\n"); | |
2071 #endif | |
2072 // Update the state information | |
2073 _bundle_instr_count = 0; | |
2074 _bundle_cycle_number += 1; | |
2075 _bundle_use.step(1); | |
2076 } | |
2077 } | |
2078 | |
2079 // Get the number of instructions | |
2080 uint instruction_count = node_pipeline->instructionCount(); | |
2081 if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0) | |
2082 instruction_count = 0; | |
2083 | |
2084 // Compute the latency information | |
2085 uint delay = 0; | |
2086 | |
2087 if (instruction_count > 0 || !node_pipeline->mayHaveNoCode()) { | |
2088 int relative_latency = _current_latency[n->_idx] - _bundle_cycle_number; | |
2089 if (relative_latency < 0) | |
2090 relative_latency = 0; | |
2091 | |
2092 delay = _bundle_use.full_latency(relative_latency, node_usage); | |
2093 | |
2094 // Does not fit in this bundle, start a new one | |
2095 if (delay > 0) { | |
2096 step(delay); | |
2097 | |
2098 #ifndef PRODUCT | |
2099 if (_cfg->C->trace_opto_output()) | |
2100 tty->print("# *** STEP(%d) ***\n", delay); | |
2101 #endif | |
2102 } | |
2103 } | |
2104 | |
2105 // If this was placed in the delay slot, ignore it | |
2106 if (n != _unconditional_delay_slot) { | |
2107 | |
2108 if (delay == 0) { | |
2109 if (node_pipeline->hasMultipleBundles()) { | |
2110 #ifndef PRODUCT | |
2111 if (_cfg->C->trace_opto_output()) | |
2112 tty->print("# *** STEP(multiple instructions) ***\n"); | |
2113 #endif | |
2114 step(1); | |
2115 } | |
2116 | |
2117 else if (instruction_count + _bundle_instr_count > Pipeline::_max_instrs_per_cycle) { | |
2118 #ifndef PRODUCT | |
2119 if (_cfg->C->trace_opto_output()) | |
2120 tty->print("# *** STEP(%d >= %d instructions) ***\n", | |
2121 instruction_count + _bundle_instr_count, | |
2122 Pipeline::_max_instrs_per_cycle); | |
2123 #endif | |
2124 step(1); | |
2125 } | |
2126 } | |
2127 | |
2128 if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot) | |
2129 _bundle_instr_count++; | |
2130 | |
2131 // Set the node's latency | |
2132 _current_latency[n->_idx] = _bundle_cycle_number; | |
2133 | |
2134 // Now merge the functional unit information | |
2135 if (instruction_count > 0 || !node_pipeline->mayHaveNoCode()) | |
2136 _bundle_use.add_usage(node_usage); | |
2137 | |
2138 // Increment the number of instructions in this bundle | |
2139 _bundle_instr_count += instruction_count; | |
2140 | |
2141 // Remember this node for later | |
2142 if (n->is_Mach()) | |
2143 _next_node = n; | |
2144 } | |
2145 | |
2146 // It's possible to have a BoxLock in the graph and in the _bbs mapping but | |
2147 // not in the bb->_nodes array. This happens for debug-info-only BoxLocks. | |
2148 // 'Schedule' them (basically ignore in the schedule) but do not insert them | |
2149 // into the block. All other scheduled nodes get put in the schedule here. | |
2150 int op = n->Opcode(); | |
2151 if( (op == Op_Node && n->req() == 0) || // anti-dependence node OR | |
2152 (op != Op_Node && // Not an unused antidepedence node and | |
2153 // not an unallocated boxlock | |
2154 (OptoReg::is_valid(_regalloc->get_reg_first(n)) || op != Op_BoxLock)) ) { | |
2155 | |
2156 // Push any trailing projections | |
2157 if( bb->_nodes[bb->_nodes.size()-1] != n ) { | |
2158 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |
2159 Node *foi = n->fast_out(i); | |
2160 if( foi->is_Proj() ) | |
2161 _scheduled.push(foi); | |
2162 } | |
2163 } | |
2164 | |
2165 // Put the instruction in the schedule list | |
2166 _scheduled.push(n); | |
2167 } | |
2168 | |
2169 #ifndef PRODUCT | |
2170 if (_cfg->C->trace_opto_output()) | |
2171 dump_available(); | |
2172 #endif | |
2173 | |
2174 // Walk all the definitions, decrementing use counts, and | |
2175 // if a definition has a 0 use count, place it in the available list. | |
2176 DecrementUseCounts(n,bb); | |
2177 } | |
2178 | |
2179 //------------------------------ComputeUseCount-------------------------------- | |
2180 // This method sets the use count within a basic block. We will ignore all | |
2181 // uses outside the current basic block. As we are doing a backwards walk, | |
2182 // any node we reach that has a use count of 0 may be scheduled. This also | |
2183 // avoids the problem of cyclic references from phi nodes, as long as phi | |
2184 // nodes are at the front of the basic block. This method also initializes | |
2185 // the available list to the set of instructions that have no uses within this | |
2186 // basic block. | |
2187 void Scheduling::ComputeUseCount(const Block *bb) { | |
2188 #ifndef PRODUCT | |
2189 if (_cfg->C->trace_opto_output()) | |
2190 tty->print("# -> ComputeUseCount\n"); | |
2191 #endif | |
2192 | |
2193 // Clear the list of available and scheduled instructions, just in case | |
2194 _available.clear(); | |
2195 _scheduled.clear(); | |
2196 | |
2197 // No delay slot specified | |
2198 _unconditional_delay_slot = NULL; | |
2199 | |
2200 #ifdef ASSERT | |
2201 for( uint i=0; i < bb->_nodes.size(); i++ ) | |
2202 assert( _uses[bb->_nodes[i]->_idx] == 0, "_use array not clean" ); | |
2203 #endif | |
2204 | |
2205 // Force the _uses count to never go to zero for unscheduable pieces | |
2206 // of the block | |
2207 for( uint k = 0; k < _bb_start; k++ ) | |
2208 _uses[bb->_nodes[k]->_idx] = 1; | |
2209 for( uint l = _bb_end; l < bb->_nodes.size(); l++ ) | |
2210 _uses[bb->_nodes[l]->_idx] = 1; | |
2211 | |
2212 // Iterate backwards over the instructions in the block. Don't count the | |
2213 // branch projections at end or the block header instructions. | |
2214 for( uint j = _bb_end-1; j >= _bb_start; j-- ) { | |
2215 Node *n = bb->_nodes[j]; | |
2216 if( n->is_Proj() ) continue; // Projections handled another way | |
2217 | |
2218 // Account for all uses | |
2219 for ( uint k = 0; k < n->len(); k++ ) { | |
2220 Node *inp = n->in(k); | |
2221 if (!inp) continue; | |
2222 assert(inp != n, "no cycles allowed" ); | |
2223 if( _bbs[inp->_idx] == bb ) { // Block-local use? | |
2224 if( inp->is_Proj() ) // Skip through Proj's | |
2225 inp = inp->in(0); | |
2226 ++_uses[inp->_idx]; // Count 1 block-local use | |
2227 } | |
2228 } | |
2229 | |
2230 // If this instruction has a 0 use count, then it is available | |
2231 if (!_uses[n->_idx]) { | |
2232 _current_latency[n->_idx] = _bundle_cycle_number; | |
2233 AddNodeToAvailableList(n); | |
2234 } | |
2235 | |
2236 #ifndef PRODUCT | |
2237 if (_cfg->C->trace_opto_output()) { | |
2238 tty->print("# uses: %3d: ", _uses[n->_idx]); | |
2239 n->dump(); | |
2240 } | |
2241 #endif | |
2242 } | |
2243 | |
2244 #ifndef PRODUCT | |
2245 if (_cfg->C->trace_opto_output()) | |
2246 tty->print("# <- ComputeUseCount\n"); | |
2247 #endif | |
2248 } | |
2249 | |
2250 // This routine performs scheduling on each basic block in reverse order, | |
2251 // using instruction latencies and taking into account function unit | |
2252 // availability. | |
2253 void Scheduling::DoScheduling() { | |
2254 #ifndef PRODUCT | |
2255 if (_cfg->C->trace_opto_output()) | |
2256 tty->print("# -> DoScheduling\n"); | |
2257 #endif | |
2258 | |
2259 Block *succ_bb = NULL; | |
2260 Block *bb; | |
2261 | |
2262 // Walk over all the basic blocks in reverse order | |
2263 for( int i=_cfg->_num_blocks-1; i >= 0; succ_bb = bb, i-- ) { | |
2264 bb = _cfg->_blocks[i]; | |
2265 | |
2266 #ifndef PRODUCT | |
2267 if (_cfg->C->trace_opto_output()) { | |
2268 tty->print("# Schedule BB#%03d (initial)\n", i); | |
2269 for (uint j = 0; j < bb->_nodes.size(); j++) | |
2270 bb->_nodes[j]->dump(); | |
2271 } | |
2272 #endif | |
2273 | |
2274 // On the head node, skip processing | |
2275 if( bb == _cfg->_broot ) | |
2276 continue; | |
2277 | |
2278 // Skip empty, connector blocks | |
2279 if (bb->is_connector()) | |
2280 continue; | |
2281 | |
2282 // If the following block is not the sole successor of | |
2283 // this one, then reset the pipeline information | |
2284 if (bb->_num_succs != 1 || bb->non_connector_successor(0) != succ_bb) { | |
2285 #ifndef PRODUCT | |
2286 if (_cfg->C->trace_opto_output()) { | |
2287 tty->print("*** bundle start of next BB, node %d, for %d instructions\n", | |
2288 _next_node->_idx, _bundle_instr_count); | |
2289 } | |
2290 #endif | |
2291 step_and_clear(); | |
2292 } | |
2293 | |
2294 // Leave untouched the starting instruction, any Phis, a CreateEx node | |
2295 // or Top. bb->_nodes[_bb_start] is the first schedulable instruction. | |
2296 _bb_end = bb->_nodes.size()-1; | |
2297 for( _bb_start=1; _bb_start <= _bb_end; _bb_start++ ) { | |
2298 Node *n = bb->_nodes[_bb_start]; | |
2299 // Things not matched, like Phinodes and ProjNodes don't get scheduled. | |
2300 // Also, MachIdealNodes do not get scheduled | |
2301 if( !n->is_Mach() ) continue; // Skip non-machine nodes | |
2302 MachNode *mach = n->as_Mach(); | |
2303 int iop = mach->ideal_Opcode(); | |
2304 if( iop == Op_CreateEx ) continue; // CreateEx is pinned | |
2305 if( iop == Op_Con ) continue; // Do not schedule Top | |
2306 if( iop == Op_Node && // Do not schedule PhiNodes, ProjNodes | |
2307 mach->pipeline() == MachNode::pipeline_class() && | |
2308 !n->is_SpillCopy() ) // Breakpoints, Prolog, etc | |
2309 continue; | |
2310 break; // Funny loop structure to be sure... | |
2311 } | |
2312 // Compute last "interesting" instruction in block - last instruction we | |
2313 // might schedule. _bb_end points just after last schedulable inst. We | |
2314 // normally schedule conditional branches (despite them being forced last | |
2315 // in the block), because they have delay slots we can fill. Calls all | |
2316 // have their delay slots filled in the template expansions, so we don't | |
2317 // bother scheduling them. | |
2318 Node *last = bb->_nodes[_bb_end]; | |
2319 if( last->is_Catch() || | |
707
4ec1257180ec
6826960: C2 Sparc: assert(bb->_nodes(_bb_end)->is_Proj(),"skipping projections after expected call")
kvn
parents:
605
diff
changeset
|
2320 // Exclude unreachable path case when Halt node is in a separate block. |
4ec1257180ec
6826960: C2 Sparc: assert(bb->_nodes(_bb_end)->is_Proj(),"skipping projections after expected call")
kvn
parents:
605
diff
changeset
|
2321 (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) { |
0 | 2322 // There must be a prior call. Skip it. |
2323 while( !bb->_nodes[--_bb_end]->is_Call() ) { | |
2324 assert( bb->_nodes[_bb_end]->is_Proj(), "skipping projections after expected call" ); | |
2325 } | |
2326 } else if( last->is_MachNullCheck() ) { | |
2327 // Backup so the last null-checked memory instruction is | |
2328 // outside the schedulable range. Skip over the nullcheck, | |
2329 // projection, and the memory nodes. | |
2330 Node *mem = last->in(1); | |
2331 do { | |
2332 _bb_end--; | |
2333 } while (mem != bb->_nodes[_bb_end]); | |
2334 } else { | |
2335 // Set _bb_end to point after last schedulable inst. | |
2336 _bb_end++; | |
2337 } | |
2338 | |
2339 assert( _bb_start <= _bb_end, "inverted block ends" ); | |
2340 | |
2341 // Compute the register antidependencies for the basic block | |
2342 ComputeRegisterAntidependencies(bb); | |
2343 if (_cfg->C->failing()) return; // too many D-U pinch points | |
2344 | |
2345 // Compute intra-bb latencies for the nodes | |
2346 ComputeLocalLatenciesForward(bb); | |
2347 | |
2348 // Compute the usage within the block, and set the list of all nodes | |
2349 // in the block that have no uses within the block. | |
2350 ComputeUseCount(bb); | |
2351 | |
2352 // Schedule the remaining instructions in the block | |
2353 while ( _available.size() > 0 ) { | |
2354 Node *n = ChooseNodeToBundle(); | |
2355 AddNodeToBundle(n,bb); | |
2356 } | |
2357 | |
2358 assert( _scheduled.size() == _bb_end - _bb_start, "wrong number of instructions" ); | |
2359 #ifdef ASSERT | |
2360 for( uint l = _bb_start; l < _bb_end; l++ ) { | |
2361 Node *n = bb->_nodes[l]; | |
2362 uint m; | |
2363 for( m = 0; m < _bb_end-_bb_start; m++ ) | |
2364 if( _scheduled[m] == n ) | |
2365 break; | |
2366 assert( m < _bb_end-_bb_start, "instruction missing in schedule" ); | |
2367 } | |
2368 #endif | |
2369 | |
2370 // Now copy the instructions (in reverse order) back to the block | |
2371 for ( uint k = _bb_start; k < _bb_end; k++ ) | |
2372 bb->_nodes.map(k, _scheduled[_bb_end-k-1]); | |
2373 | |
2374 #ifndef PRODUCT | |
2375 if (_cfg->C->trace_opto_output()) { | |
2376 tty->print("# Schedule BB#%03d (final)\n", i); | |
2377 uint current = 0; | |
2378 for (uint j = 0; j < bb->_nodes.size(); j++) { | |
2379 Node *n = bb->_nodes[j]; | |
2380 if( valid_bundle_info(n) ) { | |
2381 Bundle *bundle = node_bundling(n); | |
2382 if (bundle->instr_count() > 0 || bundle->flags() > 0) { | |
2383 tty->print("*** Bundle: "); | |
2384 bundle->dump(); | |
2385 } | |
2386 n->dump(); | |
2387 } | |
2388 } | |
2389 } | |
2390 #endif | |
2391 #ifdef ASSERT | |
2392 verify_good_schedule(bb,"after block local scheduling"); | |
2393 #endif | |
2394 } | |
2395 | |
2396 #ifndef PRODUCT | |
2397 if (_cfg->C->trace_opto_output()) | |
2398 tty->print("# <- DoScheduling\n"); | |
2399 #endif | |
2400 | |
2401 // Record final node-bundling array location | |
2402 _regalloc->C->set_node_bundling_base(_node_bundling_base); | |
2403 | |
2404 } // end DoScheduling | |
2405 | |
2406 //------------------------------verify_good_schedule--------------------------- | |
2407 // Verify that no live-range used in the block is killed in the block by a | |
2408 // wrong DEF. This doesn't verify live-ranges that span blocks. | |
2409 | |
2410 // Check for edge existence. Used to avoid adding redundant precedence edges. | |
2411 static bool edge_from_to( Node *from, Node *to ) { | |
2412 for( uint i=0; i<from->len(); i++ ) | |
2413 if( from->in(i) == to ) | |
2414 return true; | |
2415 return false; | |
2416 } | |
2417 | |
2418 #ifdef ASSERT | |
2419 //------------------------------verify_do_def---------------------------------- | |
2420 void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) { | |
2421 // Check for bad kills | |
2422 if( OptoReg::is_valid(def) ) { // Ignore stores & control flow | |
2423 Node *prior_use = _reg_node[def]; | |
2424 if( prior_use && !edge_from_to(prior_use,n) ) { | |
2425 tty->print("%s = ",OptoReg::as_VMReg(def)->name()); | |
2426 n->dump(); | |
2427 tty->print_cr("..."); | |
2428 prior_use->dump(); | |
1490
f03d0a26bf83
6888954: argument formatting for assert() and friends
jcoomes
parents:
1489
diff
changeset
|
2429 assert(edge_from_to(prior_use,n),msg); |
0 | 2430 } |
2431 _reg_node.map(def,NULL); // Kill live USEs | |
2432 } | |
2433 } | |
2434 | |
2435 //------------------------------verify_good_schedule--------------------------- | |
2436 void Scheduling::verify_good_schedule( Block *b, const char *msg ) { | |
2437 | |
2438 // Zap to something reasonable for the verify code | |
2439 _reg_node.clear(); | |
2440 | |
2441 // Walk over the block backwards. Check to make sure each DEF doesn't | |
2442 // kill a live value (other than the one it's supposed to). Add each | |
2443 // USE to the live set. | |
2444 for( uint i = b->_nodes.size()-1; i >= _bb_start; i-- ) { | |
2445 Node *n = b->_nodes[i]; | |
2446 int n_op = n->Opcode(); | |
2447 if( n_op == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) { | |
2448 // Fat-proj kills a slew of registers | |
2449 RegMask rm = n->out_RegMask();// Make local copy | |
2450 while( rm.is_NotEmpty() ) { | |
2451 OptoReg::Name kill = rm.find_first_elem(); | |
2452 rm.Remove(kill); | |
2453 verify_do_def( n, kill, msg ); | |
2454 } | |
2455 } else if( n_op != Op_Node ) { // Avoid brand new antidependence nodes | |
2456 // Get DEF'd registers the normal way | |
2457 verify_do_def( n, _regalloc->get_reg_first(n), msg ); | |
2458 verify_do_def( n, _regalloc->get_reg_second(n), msg ); | |
2459 } | |
2460 | |
2461 // Now make all USEs live | |
2462 for( uint i=1; i<n->req(); i++ ) { | |
2463 Node *def = n->in(i); | |
2464 assert(def != 0, "input edge required"); | |
2465 OptoReg::Name reg_lo = _regalloc->get_reg_first(def); | |
2466 OptoReg::Name reg_hi = _regalloc->get_reg_second(def); | |
2467 if( OptoReg::is_valid(reg_lo) ) { | |
1490
f03d0a26bf83
6888954: argument formatting for assert() and friends
jcoomes
parents:
1489
diff
changeset
|
2468 assert(!_reg_node[reg_lo] || edge_from_to(_reg_node[reg_lo],def), msg); |
0 | 2469 _reg_node.map(reg_lo,n); |
2470 } | |
2471 if( OptoReg::is_valid(reg_hi) ) { | |
1490
f03d0a26bf83
6888954: argument formatting for assert() and friends
jcoomes
parents:
1489
diff
changeset
|
2472 assert(!_reg_node[reg_hi] || edge_from_to(_reg_node[reg_hi],def), msg); |
0 | 2473 _reg_node.map(reg_hi,n); |
2474 } | |
2475 } | |
2476 | |
2477 } | |
2478 | |
2479 // Zap to something reasonable for the Antidependence code | |
2480 _reg_node.clear(); | |
2481 } | |
2482 #endif | |
2483 | |
2484 // Conditionally add precedence edges. Avoid putting edges on Projs. | |
2485 static void add_prec_edge_from_to( Node *from, Node *to ) { | |
2486 if( from->is_Proj() ) { // Put precedence edge on Proj's input | |
2487 assert( from->req() == 1 && (from->len() == 1 || from->in(1)==0), "no precedence edges on projections" ); | |
2488 from = from->in(0); | |
2489 } | |
2490 if( from != to && // No cycles (for things like LD L0,[L0+4] ) | |
2491 !edge_from_to( from, to ) ) // Avoid duplicate edge | |
2492 from->add_prec(to); | |
2493 } | |
2494 | |
2495 //------------------------------anti_do_def------------------------------------ | |
2496 void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) { | |
2497 if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow | |
2498 return; | |
2499 | |
2500 Node *pinch = _reg_node[def_reg]; // Get pinch point | |
2501 if( !pinch || _bbs[pinch->_idx] != b || // No pinch-point yet? | |
2502 is_def ) { // Check for a true def (not a kill) | |
2503 _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point | |
2504 return; | |
2505 } | |
2506 | |
2507 Node *kill = def; // Rename 'def' to more descriptive 'kill' | |
2508 debug_only( def = (Node*)0xdeadbeef; ) | |
2509 | |
2510 // After some number of kills there _may_ be a later def | |
2511 Node *later_def = NULL; | |
2512 | |
2513 // Finding a kill requires a real pinch-point. | |
2514 // Check for not already having a pinch-point. | |
2515 // Pinch points are Op_Node's. | |
2516 if( pinch->Opcode() != Op_Node ) { // Or later-def/kill as pinch-point? | |
2517 later_def = pinch; // Must be def/kill as optimistic pinch-point | |
2518 if ( _pinch_free_list.size() > 0) { | |
2519 pinch = _pinch_free_list.pop(); | |
2520 } else { | |
2521 pinch = new (_cfg->C, 1) Node(1); // Pinch point to-be | |
2522 } | |
2523 if (pinch->_idx >= _regalloc->node_regs_max_index()) { | |
2524 _cfg->C->record_method_not_compilable("too many D-U pinch points"); | |
2525 return; | |
2526 } | |
2527 _bbs.map(pinch->_idx,b); // Pretend it's valid in this block (lazy init) | |
2528 _reg_node.map(def_reg,pinch); // Record pinch-point | |
2529 //_regalloc->set_bad(pinch->_idx); // Already initialized this way. | |
2530 if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill | |
2531 pinch->init_req(0, _cfg->C->top()); // set not NULL for the next call | |
2532 add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch | |
2533 later_def = NULL; // and no later def | |
2534 } | |
2535 pinch->set_req(0,later_def); // Hook later def so we can find it | |
2536 } else { // Else have valid pinch point | |
2537 if( pinch->in(0) ) // If there is a later-def | |
2538 later_def = pinch->in(0); // Get it | |
2539 } | |
2540 | |
2541 // Add output-dependence edge from later def to kill | |
2542 if( later_def ) // If there is some original def | |
2543 add_prec_edge_from_to(later_def,kill); // Add edge from def to kill | |
2544 | |
2545 // See if current kill is also a use, and so is forced to be the pinch-point. | |
2546 if( pinch->Opcode() == Op_Node ) { | |
2547 Node *uses = kill->is_Proj() ? kill->in(0) : kill; | |
2548 for( uint i=1; i<uses->req(); i++ ) { | |
2549 if( _regalloc->get_reg_first(uses->in(i)) == def_reg || | |
2550 _regalloc->get_reg_second(uses->in(i)) == def_reg ) { | |
2551 // Yes, found a use/kill pinch-point | |
2552 pinch->set_req(0,NULL); // | |
2553 pinch->replace_by(kill); // Move anti-dep edges up | |
2554 pinch = kill; | |
2555 _reg_node.map(def_reg,pinch); | |
2556 return; | |
2557 } | |
2558 } | |
2559 } | |
2560 | |
2561 // Add edge from kill to pinch-point | |
2562 add_prec_edge_from_to(kill,pinch); | |
2563 } | |
2564 | |
2565 //------------------------------anti_do_use------------------------------------ | |
2566 void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) { | |
2567 if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow | |
2568 return; | |
2569 Node *pinch = _reg_node[use_reg]; // Get pinch point | |
2570 // Check for no later def_reg/kill in block | |
2571 if( pinch && _bbs[pinch->_idx] == b && | |
2572 // Use has to be block-local as well | |
2573 _bbs[use->_idx] == b ) { | |
2574 if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?) | |
2575 pinch->req() == 1 ) { // pinch not yet in block? | |
2576 pinch->del_req(0); // yank pointer to later-def, also set flag | |
2577 // Insert the pinch-point in the block just after the last use | |
2578 b->_nodes.insert(b->find_node(use)+1,pinch); | |
2579 _bb_end++; // Increase size scheduled region in block | |
2580 } | |
2581 | |
2582 add_prec_edge_from_to(pinch,use); | |
2583 } | |
2584 } | |
2585 | |
2586 //------------------------------ComputeRegisterAntidependences----------------- | |
2587 // We insert antidependences between the reads and following write of | |
2588 // allocated registers to prevent illegal code motion. Hopefully, the | |
2589 // number of added references should be fairly small, especially as we | |
2590 // are only adding references within the current basic block. | |
2591 void Scheduling::ComputeRegisterAntidependencies(Block *b) { | |
2592 | |
2593 #ifdef ASSERT | |
2594 verify_good_schedule(b,"before block local scheduling"); | |
2595 #endif | |
2596 | |
2597 // A valid schedule, for each register independently, is an endless cycle | |
2598 // of: a def, then some uses (connected to the def by true dependencies), | |
2599 // then some kills (defs with no uses), finally the cycle repeats with a new | |
2600 // def. The uses are allowed to float relative to each other, as are the | |
2601 // kills. No use is allowed to slide past a kill (or def). This requires | |
2602 // antidependencies between all uses of a single def and all kills that | |
2603 // follow, up to the next def. More edges are redundant, because later defs | |
2604 // & kills are already serialized with true or antidependencies. To keep | |
2605 // the edge count down, we add a 'pinch point' node if there's more than | |
2606 // one use or more than one kill/def. | |
2607 | |
2608 // We add dependencies in one bottom-up pass. | |
2609 | |
2610 // For each instruction we handle it's DEFs/KILLs, then it's USEs. | |
2611 | |
2612 // For each DEF/KILL, we check to see if there's a prior DEF/KILL for this | |
2613 // register. If not, we record the DEF/KILL in _reg_node, the | |
2614 // register-to-def mapping. If there is a prior DEF/KILL, we insert a | |
2615 // "pinch point", a new Node that's in the graph but not in the block. | |
2616 // We put edges from the prior and current DEF/KILLs to the pinch point. | |
2617 // We put the pinch point in _reg_node. If there's already a pinch point | |
2618 // we merely add an edge from the current DEF/KILL to the pinch point. | |
2619 | |
2620 // After doing the DEF/KILLs, we handle USEs. For each used register, we | |
2621 // put an edge from the pinch point to the USE. | |
2622 | |
2623 // To be expedient, the _reg_node array is pre-allocated for the whole | |
2624 // compilation. _reg_node is lazily initialized; it either contains a NULL, | |
2625 // or a valid def/kill/pinch-point, or a leftover node from some prior | |
2626 // block. Leftover node from some prior block is treated like a NULL (no | |
2627 // prior def, so no anti-dependence needed). Valid def is distinguished by | |
2628 // it being in the current block. | |
2629 bool fat_proj_seen = false; | |
2630 uint last_safept = _bb_end-1; | |
2631 Node* end_node = (_bb_end-1 >= _bb_start) ? b->_nodes[last_safept] : NULL; | |
2632 Node* last_safept_node = end_node; | |
2633 for( uint i = _bb_end-1; i >= _bb_start; i-- ) { | |
2634 Node *n = b->_nodes[i]; | |
2635 int is_def = n->outcnt(); // def if some uses prior to adding precedence edges | |
2636 if( n->Opcode() == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) { | |
2637 // Fat-proj kills a slew of registers | |
2638 // This can add edges to 'n' and obscure whether or not it was a def, | |
2639 // hence the is_def flag. | |
2640 fat_proj_seen = true; | |
2641 RegMask rm = n->out_RegMask();// Make local copy | |
2642 while( rm.is_NotEmpty() ) { | |
2643 OptoReg::Name kill = rm.find_first_elem(); | |
2644 rm.Remove(kill); | |
2645 anti_do_def( b, n, kill, is_def ); | |
2646 } | |
2647 } else { | |
2648 // Get DEF'd registers the normal way | |
2649 anti_do_def( b, n, _regalloc->get_reg_first(n), is_def ); | |
2650 anti_do_def( b, n, _regalloc->get_reg_second(n), is_def ); | |
2651 } | |
2652 | |
2653 // Check each register used by this instruction for a following DEF/KILL | |
2654 // that must occur afterward and requires an anti-dependence edge. | |
2655 for( uint j=0; j<n->req(); j++ ) { | |
2656 Node *def = n->in(j); | |
2657 if( def ) { | |
2658 assert( def->Opcode() != Op_MachProj || def->ideal_reg() != MachProjNode::fat_proj, "" ); | |
2659 anti_do_use( b, n, _regalloc->get_reg_first(def) ); | |
2660 anti_do_use( b, n, _regalloc->get_reg_second(def) ); | |
2661 } | |
2662 } | |
2663 // Do not allow defs of new derived values to float above GC | |
2664 // points unless the base is definitely available at the GC point. | |
2665 | |
2666 Node *m = b->_nodes[i]; | |
2667 | |
2668 // Add precedence edge from following safepoint to use of derived pointer | |
2669 if( last_safept_node != end_node && | |
2670 m != last_safept_node) { | |
2671 for (uint k = 1; k < m->req(); k++) { | |
2672 const Type *t = m->in(k)->bottom_type(); | |
2673 if( t->isa_oop_ptr() && | |
2674 t->is_ptr()->offset() != 0 ) { | |
2675 last_safept_node->add_prec( m ); | |
2676 break; | |
2677 } | |
2678 } | |
2679 } | |
2680 | |
2681 if( n->jvms() ) { // Precedence edge from derived to safept | |
2682 // Check if last_safept_node was moved by pinch-point insertion in anti_do_use() | |
2683 if( b->_nodes[last_safept] != last_safept_node ) { | |
2684 last_safept = b->find_node(last_safept_node); | |
2685 } | |
2686 for( uint j=last_safept; j > i; j-- ) { | |
2687 Node *mach = b->_nodes[j]; | |
2688 if( mach->is_Mach() && mach->as_Mach()->ideal_Opcode() == Op_AddP ) | |
2689 mach->add_prec( n ); | |
2690 } | |
2691 last_safept = i; | |
2692 last_safept_node = m; | |
2693 } | |
2694 } | |
2695 | |
2696 if (fat_proj_seen) { | |
2697 // Garbage collect pinch nodes that were not consumed. | |
2698 // They are usually created by a fat kill MachProj for a call. | |
2699 garbage_collect_pinch_nodes(); | |
2700 } | |
2701 } | |
2702 | |
2703 //------------------------------garbage_collect_pinch_nodes------------------------------- | |
2704 | |
2705 // Garbage collect pinch nodes for reuse by other blocks. | |
2706 // | |
2707 // The block scheduler's insertion of anti-dependence | |
2708 // edges creates many pinch nodes when the block contains | |
2709 // 2 or more Calls. A pinch node is used to prevent a | |
2710 // combinatorial explosion of edges. If a set of kills for a | |
2711 // register is anti-dependent on a set of uses (or defs), rather | |
2712 // than adding an edge in the graph between each pair of kill | |
2713 // and use (or def), a pinch is inserted between them: | |
2714 // | |
2715 // use1 use2 use3 | |
2716 // \ | / | |
2717 // \ | / | |
2718 // pinch | |
2719 // / | \ | |
2720 // / | \ | |
2721 // kill1 kill2 kill3 | |
2722 // | |
2723 // One pinch node is created per register killed when | |
2724 // the second call is encountered during a backwards pass | |
2725 // over the block. Most of these pinch nodes are never | |
2726 // wired into the graph because the register is never | |
2727 // used or def'ed in the block. | |
2728 // | |
2729 void Scheduling::garbage_collect_pinch_nodes() { | |
2730 #ifndef PRODUCT | |
2731 if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:"); | |
2732 #endif | |
2733 int trace_cnt = 0; | |
2734 for (uint k = 0; k < _reg_node.Size(); k++) { | |
2735 Node* pinch = _reg_node[k]; | |
2736 if (pinch != NULL && pinch->Opcode() == Op_Node && | |
2737 // no predecence input edges | |
2738 (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) { | |
2739 cleanup_pinch(pinch); | |
2740 _pinch_free_list.push(pinch); | |
2741 _reg_node.map(k, NULL); | |
2742 #ifndef PRODUCT | |
2743 if (_cfg->C->trace_opto_output()) { | |
2744 trace_cnt++; | |
2745 if (trace_cnt > 40) { | |
2746 tty->print("\n"); | |
2747 trace_cnt = 0; | |
2748 } | |
2749 tty->print(" %d", pinch->_idx); | |
2750 } | |
2751 #endif | |
2752 } | |
2753 } | |
2754 #ifndef PRODUCT | |
2755 if (_cfg->C->trace_opto_output()) tty->print("\n"); | |
2756 #endif | |
2757 } | |
2758 | |
2759 // Clean up a pinch node for reuse. | |
2760 void Scheduling::cleanup_pinch( Node *pinch ) { | |
2761 assert (pinch && pinch->Opcode() == Op_Node && pinch->req() == 1, "just checking"); | |
2762 | |
2763 for (DUIterator_Last imin, i = pinch->last_outs(imin); i >= imin; ) { | |
2764 Node* use = pinch->last_out(i); | |
2765 uint uses_found = 0; | |
2766 for (uint j = use->req(); j < use->len(); j++) { | |
2767 if (use->in(j) == pinch) { | |
2768 use->rm_prec(j); | |
2769 uses_found++; | |
2770 } | |
2771 } | |
2772 assert(uses_found > 0, "must be a precedence edge"); | |
2773 i -= uses_found; // we deleted 1 or more copies of this edge | |
2774 } | |
2775 // May have a later_def entry | |
2776 pinch->set_req(0, NULL); | |
2777 } | |
2778 | |
2779 //------------------------------print_statistics------------------------------- | |
2780 #ifndef PRODUCT | |
2781 | |
2782 void Scheduling::dump_available() const { | |
2783 tty->print("#Availist "); | |
2784 for (uint i = 0; i < _available.size(); i++) | |
2785 tty->print(" N%d/l%d", _available[i]->_idx,_current_latency[_available[i]->_idx]); | |
2786 tty->cr(); | |
2787 } | |
2788 | |
2789 // Print Scheduling Statistics | |
2790 void Scheduling::print_statistics() { | |
2791 // Print the size added by nops for bundling | |
2792 tty->print("Nops added %d bytes to total of %d bytes", | |
2793 _total_nop_size, _total_method_size); | |
2794 if (_total_method_size > 0) | |
2795 tty->print(", for %.2f%%", | |
2796 ((double)_total_nop_size) / ((double) _total_method_size) * 100.0); | |
2797 tty->print("\n"); | |
2798 | |
2799 // Print the number of branch shadows filled | |
2800 if (Pipeline::_branch_has_delay_slot) { | |
2801 tty->print("Of %d branches, %d had unconditional delay slots filled", | |
2802 _total_branches, _total_unconditional_delays); | |
2803 if (_total_branches > 0) | |
2804 tty->print(", for %.2f%%", | |
2805 ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0); | |
2806 tty->print("\n"); | |
2807 } | |
2808 | |
2809 uint total_instructions = 0, total_bundles = 0; | |
2810 | |
2811 for (uint i = 1; i <= Pipeline::_max_instrs_per_cycle; i++) { | |
2812 uint bundle_count = _total_instructions_per_bundle[i]; | |
2813 total_instructions += bundle_count * i; | |
2814 total_bundles += bundle_count; | |
2815 } | |
2816 | |
2817 if (total_bundles > 0) | |
2818 tty->print("Average ILP (excluding nops) is %.2f\n", | |
2819 ((double)total_instructions) / ((double)total_bundles)); | |
2820 } | |
2821 #endif |