Mercurial > hg > truffle
annotate src/share/vm/opto/output.cpp @ 9126:bc26f978b0ce
HotSpotResolvedObjectType: implement hasFinalizeSubclass() correctly
don't use the (wrong) cached value, but ask the runtime on each request.
Fixes regression on xml.* benchmarks @ specjvm2008. The problem was:
After the constructor of Object was deoptimized due to an assumption violation,
it was recompiled again after some time. However, on recompilation, the value
of hasFinalizeSubclass for the class was not updated and it was compiled again
with a, now wrong, assumption, which then triggers deoptimization again.
This was repeated until it hit the recompilation limit (defined by
PerMethodRecompilationCutoff), and therefore only executed by the interpreter
from now on, causing the performance regression.
author | Bernhard Urban <bernhard.urban@jku.at> |
---|---|
date | Mon, 15 Apr 2013 19:54:58 +0200 |
parents | b9a918201d47 |
children | 836a62f43af9 |
rev | line source |
---|---|
0 | 1 /* |
6725
da91efe96a93
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
6620
diff
changeset
|
2 * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1490
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1490
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1490
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "asm/assembler.inline.hpp" | |
27 #include "code/debugInfo.hpp" | |
28 #include "code/debugInfoRec.hpp" | |
29 #include "compiler/compileBroker.hpp" | |
30 #include "compiler/oopMap.hpp" | |
31 #include "memory/allocation.inline.hpp" | |
32 #include "opto/callnode.hpp" | |
33 #include "opto/cfgnode.hpp" | |
34 #include "opto/locknode.hpp" | |
35 #include "opto/machnode.hpp" | |
36 #include "opto/output.hpp" | |
37 #include "opto/regalloc.hpp" | |
38 #include "opto/runtime.hpp" | |
39 #include "opto/subnode.hpp" | |
40 #include "opto/type.hpp" | |
41 #include "runtime/handles.inline.hpp" | |
42 #include "utilities/xmlstream.hpp" | |
0 | 43 |
44 extern uint size_java_to_interp(); | |
45 extern uint reloc_java_to_interp(); | |
46 extern uint size_exception_handler(); | |
47 extern uint size_deopt_handler(); | |
48 | |
49 #ifndef PRODUCT | |
50 #define DEBUG_ARG(x) , x | |
51 #else | |
52 #define DEBUG_ARG(x) | |
53 #endif | |
54 | |
55 extern int emit_exception_handler(CodeBuffer &cbuf); | |
56 extern int emit_deopt_handler(CodeBuffer &cbuf); | |
57 | |
58 //------------------------------Output----------------------------------------- | |
59 // Convert Nodes to instruction bits and pass off to the VM | |
60 void Compile::Output() { | |
61 // RootNode goes | |
62 assert( _cfg->_broot->_nodes.size() == 0, "" ); | |
63 | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
64 // The number of new nodes (mostly MachNop) is proportional to |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
65 // the number of java calls and inner loops which are aligned. |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
66 if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 + |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
67 C->inner_loops()*(OptoLoopAlignment-1)), |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
68 "out of nodes before code generation" ) ) { |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
69 return; |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
70 } |
0 | 71 // Make sure I can find the Start Node |
72 Block_Array& bbs = _cfg->_bbs; | |
73 Block *entry = _cfg->_blocks[1]; | |
74 Block *broot = _cfg->_broot; | |
75 | |
76 const StartNode *start = entry->_nodes[0]->as_Start(); | |
77 | |
78 // Replace StartNode with prolog | |
79 MachPrologNode *prolog = new (this) MachPrologNode(); | |
80 entry->_nodes.map( 0, prolog ); | |
81 bbs.map( prolog->_idx, entry ); | |
82 bbs.map( start->_idx, NULL ); // start is no longer in any block | |
83 | |
84 // Virtual methods need an unverified entry point | |
85 | |
86 if( is_osr_compilation() ) { | |
87 if( PoisonOSREntry ) { | |
88 // TODO: Should use a ShouldNotReachHereNode... | |
89 _cfg->insert( broot, 0, new (this) MachBreakpointNode() ); | |
90 } | |
91 } else { | |
92 if( _method && !_method->flags().is_static() ) { | |
93 // Insert unvalidated entry point | |
94 _cfg->insert( broot, 0, new (this) MachUEPNode() ); | |
95 } | |
96 | |
97 } | |
98 | |
99 | |
100 // Break before main entry point | |
101 if( (_method && _method->break_at_execute()) | |
102 #ifndef PRODUCT | |
103 ||(OptoBreakpoint && is_method_compilation()) | |
104 ||(OptoBreakpointOSR && is_osr_compilation()) | |
105 ||(OptoBreakpointC2R && !_method) | |
106 #endif | |
107 ) { | |
108 // checking for _method means that OptoBreakpoint does not apply to | |
109 // runtime stubs or frame converters | |
110 _cfg->insert( entry, 1, new (this) MachBreakpointNode() ); | |
111 } | |
112 | |
113 // Insert epilogs before every return | |
114 for( uint i=0; i<_cfg->_num_blocks; i++ ) { | |
115 Block *b = _cfg->_blocks[i]; | |
116 if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point? | |
117 Node *m = b->end(); | |
118 if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) { | |
119 MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return); | |
120 b->add_inst( epilog ); | |
121 bbs.map(epilog->_idx, b); | |
122 //_regalloc->set_bad(epilog->_idx); // Already initialized this way. | |
123 } | |
124 } | |
125 } | |
126 | |
127 # ifdef ENABLE_ZAP_DEAD_LOCALS | |
128 if ( ZapDeadCompiledLocals ) Insert_zap_nodes(); | |
129 # endif | |
130 | |
3851 | 131 uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1); |
132 blk_starts[0] = 0; | |
133 | |
134 // Initialize code buffer and process short branches. | |
135 CodeBuffer* cb = init_buffer(blk_starts); | |
136 | |
137 if (cb == NULL || failing()) return; | |
138 | |
0 | 139 ScheduleAndBundle(); |
140 | |
141 #ifndef PRODUCT | |
142 if (trace_opto_output()) { | |
143 tty->print("\n---- After ScheduleAndBundle ----\n"); | |
144 for (uint i = 0; i < _cfg->_num_blocks; i++) { | |
145 tty->print("\nBB#%03d:\n", i); | |
146 Block *bb = _cfg->_blocks[i]; | |
147 for (uint j = 0; j < bb->_nodes.size(); j++) { | |
148 Node *n = bb->_nodes[j]; | |
149 OptoReg::Name reg = _regalloc->get_reg_first(n); | |
150 tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : ""); | |
151 n->dump(); | |
152 } | |
153 } | |
154 } | |
155 #endif | |
156 | |
157 if (failing()) return; | |
158 | |
159 BuildOopMaps(); | |
160 | |
161 if (failing()) return; | |
162 | |
3851 | 163 fill_buffer(cb, blk_starts); |
0 | 164 } |
165 | |
166 bool Compile::need_stack_bang(int frame_size_in_bytes) const { | |
167 // Determine if we need to generate a stack overflow check. | |
168 // Do it if the method is not a stub function and | |
169 // has java calls or has frame size > vm_page_size/8. | |
4947
fd8114661503
7125136: SIGILL on linux amd64 in gc/ArrayJuggle/Juggle29
kvn
parents:
4777
diff
changeset
|
170 return (UseStackBanging && stub_function() == NULL && |
0 | 171 (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3)); |
172 } | |
173 | |
174 bool Compile::need_register_stack_bang() const { | |
175 // Determine if we need to generate a register stack overflow check. | |
176 // This is only used on architectures which have split register | |
177 // and memory stacks (ie. IA64). | |
178 // Bang if the method is not a stub function and has java calls | |
179 return (stub_function() == NULL && has_java_calls()); | |
180 } | |
181 | |
182 # ifdef ENABLE_ZAP_DEAD_LOCALS | |
183 | |
184 | |
185 // In order to catch compiler oop-map bugs, we have implemented | |
186 // a debugging mode called ZapDeadCompilerLocals. | |
187 // This mode causes the compiler to insert a call to a runtime routine, | |
188 // "zap_dead_locals", right before each place in compiled code | |
189 // that could potentially be a gc-point (i.e., a safepoint or oop map point). | |
190 // The runtime routine checks that locations mapped as oops are really | |
191 // oops, that locations mapped as values do not look like oops, | |
192 // and that locations mapped as dead are not used later | |
193 // (by zapping them to an invalid address). | |
194 | |
195 int Compile::_CompiledZap_count = 0; | |
196 | |
197 void Compile::Insert_zap_nodes() { | |
198 bool skip = false; | |
199 | |
200 | |
201 // Dink with static counts because code code without the extra | |
202 // runtime calls is MUCH faster for debugging purposes | |
203 | |
204 if ( CompileZapFirst == 0 ) ; // nothing special | |
205 else if ( CompileZapFirst > CompiledZap_count() ) skip = true; | |
206 else if ( CompileZapFirst == CompiledZap_count() ) | |
207 warning("starting zap compilation after skipping"); | |
208 | |
209 if ( CompileZapLast == -1 ) ; // nothing special | |
210 else if ( CompileZapLast < CompiledZap_count() ) skip = true; | |
211 else if ( CompileZapLast == CompiledZap_count() ) | |
212 warning("about to compile last zap"); | |
213 | |
214 ++_CompiledZap_count; // counts skipped zaps, too | |
215 | |
216 if ( skip ) return; | |
217 | |
218 | |
219 if ( _method == NULL ) | |
220 return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care | |
221 | |
222 // Insert call to zap runtime stub before every node with an oop map | |
223 for( uint i=0; i<_cfg->_num_blocks; i++ ) { | |
224 Block *b = _cfg->_blocks[i]; | |
225 for ( uint j = 0; j < b->_nodes.size(); ++j ) { | |
226 Node *n = b->_nodes[j]; | |
227 | |
228 // Determining if we should insert a zap-a-lot node in output. | |
229 // We do that for all nodes that has oopmap info, except for calls | |
230 // to allocation. Calls to allocation passes in the old top-of-eden pointer | |
231 // and expect the C code to reset it. Hence, there can be no safepoints between | |
232 // the inlined-allocation and the call to new_Java, etc. | |
233 // We also cannot zap monitor calls, as they must hold the microlock | |
234 // during the call to Zap, which also wants to grab the microlock. | |
235 bool insert = n->is_MachSafePoint() && (n->as_MachSafePoint()->oop_map() != NULL); | |
236 if ( insert ) { // it is MachSafePoint | |
237 if ( !n->is_MachCall() ) { | |
238 insert = false; | |
239 } else if ( n->is_MachCall() ) { | |
240 MachCallNode* call = n->as_MachCall(); | |
241 if (call->entry_point() == OptoRuntime::new_instance_Java() || | |
242 call->entry_point() == OptoRuntime::new_array_Java() || | |
243 call->entry_point() == OptoRuntime::multianewarray2_Java() || | |
244 call->entry_point() == OptoRuntime::multianewarray3_Java() || | |
245 call->entry_point() == OptoRuntime::multianewarray4_Java() || | |
246 call->entry_point() == OptoRuntime::multianewarray5_Java() || | |
247 call->entry_point() == OptoRuntime::slow_arraycopy_Java() || | |
248 call->entry_point() == OptoRuntime::complete_monitor_locking_Java() | |
249 ) { | |
250 insert = false; | |
251 } | |
252 } | |
253 if (insert) { | |
254 Node *zap = call_zap_node(n->as_MachSafePoint(), i); | |
255 b->_nodes.insert( j, zap ); | |
256 _cfg->_bbs.map( zap->_idx, b ); | |
257 ++j; | |
258 } | |
259 } | |
260 } | |
261 } | |
262 } | |
263 | |
264 | |
265 Node* Compile::call_zap_node(MachSafePointNode* node_to_check, int block_no) { | |
266 const TypeFunc *tf = OptoRuntime::zap_dead_locals_Type(); | |
267 CallStaticJavaNode* ideal_node = | |
6804
e626685e9f6c
7193318: C2: remove number of inputs requirement from Node's new operator
kvn
parents:
6792
diff
changeset
|
268 new (this) CallStaticJavaNode( tf, |
0 | 269 OptoRuntime::zap_dead_locals_stub(_method->flags().is_native()), |
6804
e626685e9f6c
7193318: C2: remove number of inputs requirement from Node's new operator
kvn
parents:
6792
diff
changeset
|
270 "call zap dead locals stub", 0, TypePtr::BOTTOM); |
0 | 271 // We need to copy the OopMap from the site we're zapping at. |
272 // We have to make a copy, because the zap site might not be | |
273 // a call site, and zap_dead is a call site. | |
274 OopMap* clone = node_to_check->oop_map()->deep_copy(); | |
275 | |
276 // Add the cloned OopMap to the zap node | |
277 ideal_node->set_oop_map(clone); | |
278 return _matcher->match_sfpt(ideal_node); | |
279 } | |
280 | |
281 //------------------------------is_node_getting_a_safepoint-------------------- | |
282 bool Compile::is_node_getting_a_safepoint( Node* n) { | |
283 // This code duplicates the logic prior to the call of add_safepoint | |
284 // below in this file. | |
285 if( n->is_MachSafePoint() ) return true; | |
286 return false; | |
287 } | |
288 | |
289 # endif // ENABLE_ZAP_DEAD_LOCALS | |
290 | |
291 //------------------------------compute_loop_first_inst_sizes------------------ | |
418 | 292 // Compute the size of first NumberOfLoopInstrToAlign instructions at the top |
0 | 293 // of a loop. When aligning a loop we need to provide enough instructions |
294 // in cpu's fetch buffer to feed decoders. The loop alignment could be | |
295 // avoided if we have enough instructions in fetch buffer at the head of a loop. | |
296 // By default, the size is set to 999999 by Block's constructor so that | |
297 // a loop will be aligned if the size is not reset here. | |
298 // | |
299 // Note: Mach instructions could contain several HW instructions | |
300 // so the size is estimated only. | |
301 // | |
302 void Compile::compute_loop_first_inst_sizes() { | |
303 // The next condition is used to gate the loop alignment optimization. | |
304 // Don't aligned a loop if there are enough instructions at the head of a loop | |
305 // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad | |
306 // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is | |
307 // equal to 11 bytes which is the largest address NOP instruction. | |
308 if( MaxLoopPad < OptoLoopAlignment-1 ) { | |
309 uint last_block = _cfg->_num_blocks-1; | |
310 for( uint i=1; i <= last_block; i++ ) { | |
311 Block *b = _cfg->_blocks[i]; | |
312 // Check the first loop's block which requires an alignment. | |
418 | 313 if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) { |
0 | 314 uint sum_size = 0; |
315 uint inst_cnt = NumberOfLoopInstrToAlign; | |
418 | 316 inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc); |
317 | |
318 // Check subsequent fallthrough blocks if the loop's first | |
319 // block(s) does not have enough instructions. | |
320 Block *nb = b; | |
321 while( inst_cnt > 0 && | |
322 i < last_block && | |
323 !_cfg->_blocks[i+1]->has_loop_alignment() && | |
324 !nb->has_successor(b) ) { | |
325 i++; | |
326 nb = _cfg->_blocks[i]; | |
327 inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc); | |
328 } // while( inst_cnt > 0 && i < last_block ) | |
329 | |
0 | 330 b->set_first_inst_size(sum_size); |
331 } // f( b->head()->is_Loop() ) | |
332 } // for( i <= last_block ) | |
333 } // if( MaxLoopPad < OptoLoopAlignment-1 ) | |
334 } | |
335 | |
3851 | 336 //----------------------shorten_branches--------------------------------------- |
0 | 337 // The architecture description provides short branch variants for some long |
338 // branch instructions. Replace eligible long branches with short branches. | |
3851 | 339 void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) { |
0 | 340 |
341 // ------------------ | |
342 // Compute size of each block, method size, and relocation information size | |
3851 | 343 uint nblocks = _cfg->_num_blocks; |
344 | |
345 uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks); | |
346 uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks); | |
347 int* jmp_nidx = NEW_RESOURCE_ARRAY(int ,nblocks); | |
348 DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); ) | |
349 DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); ) | |
350 | |
351 bool has_short_branch_candidate = false; | |
0 | 352 |
353 // Initialize the sizes to 0 | |
354 code_size = 0; // Size in bytes of generated code | |
355 stub_size = 0; // Size in bytes of all stub entries | |
356 // Size in bytes of all relocation entries, including those in local stubs. | |
357 // Start with 2-bytes of reloc info for the unvalidated entry point | |
358 reloc_size = 1; // Number of relocation entries | |
359 | |
360 // Make three passes. The first computes pessimistic blk_starts, | |
3851 | 361 // relative jmp_offset and reloc_size information. The second performs |
2008 | 362 // short branch substitution using the pessimistic sizing. The |
363 // third inserts nops where needed. | |
0 | 364 |
365 // Step one, perform a pessimistic sizing pass. | |
3851 | 366 uint last_call_adr = max_uint; |
367 uint last_avoid_back_to_back_adr = max_uint; | |
0 | 368 uint nop_size = (new (this) MachNopNode())->size(_regalloc); |
3851 | 369 for (uint i = 0; i < nblocks; i++) { // For all blocks |
0 | 370 Block *b = _cfg->_blocks[i]; |
371 | |
3851 | 372 // During short branch replacement, we store the relative (to blk_starts) |
373 // offset of jump in jmp_offset, rather than the absolute offset of jump. | |
374 // This is so that we do not need to recompute sizes of all nodes when | |
375 // we compute correct blk_starts in our next sizing pass. | |
376 jmp_offset[i] = 0; | |
377 jmp_size[i] = 0; | |
378 jmp_nidx[i] = -1; | |
379 DEBUG_ONLY( jmp_target[i] = 0; ) | |
380 DEBUG_ONLY( jmp_rule[i] = 0; ) | |
381 | |
0 | 382 // Sum all instruction sizes to compute block size |
383 uint last_inst = b->_nodes.size(); | |
384 uint blk_size = 0; | |
3851 | 385 for (uint j = 0; j < last_inst; j++) { |
386 Node* nj = b->_nodes[j]; | |
0 | 387 // Handle machine instruction nodes |
3851 | 388 if (nj->is_Mach()) { |
0 | 389 MachNode *mach = nj->as_Mach(); |
390 blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding | |
391 reloc_size += mach->reloc(); | |
392 if( mach->is_MachCall() ) { | |
393 MachCallNode *mcall = mach->as_MachCall(); | |
394 // This destination address is NOT PC-relative | |
395 | |
396 mcall->method_set((intptr_t)mcall->entry_point()); | |
397 | |
398 if( mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method ) { | |
399 stub_size += size_java_to_interp(); | |
400 reloc_size += reloc_java_to_interp(); | |
401 } | |
402 } else if (mach->is_MachSafePoint()) { | |
403 // If call/safepoint are adjacent, account for possible | |
404 // nop to disambiguate the two safepoints. | |
3851 | 405 // ScheduleAndBundle() can rearrange nodes in a block, |
406 // check for all offsets inside this block. | |
407 if (last_call_adr >= blk_starts[i]) { | |
408 blk_size += nop_size; | |
409 } | |
410 } | |
411 if (mach->avoid_back_to_back()) { | |
412 // Nop is inserted between "avoid back to back" instructions. | |
413 // ScheduleAndBundle() can rearrange nodes in a block, | |
414 // check for all offsets inside this block. | |
415 if (last_avoid_back_to_back_adr >= blk_starts[i]) { | |
0 | 416 blk_size += nop_size; |
417 } | |
418 } | |
3851 | 419 if (mach->may_be_short_branch()) { |
3853
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
420 if (!nj->is_MachBranch()) { |
3851 | 421 #ifndef PRODUCT |
422 nj->dump(3); | |
423 #endif | |
424 Unimplemented(); | |
425 } | |
426 assert(jmp_nidx[i] == -1, "block should have only one branch"); | |
427 jmp_offset[i] = blk_size; | |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
428 jmp_size[i] = nj->size(_regalloc); |
3851 | 429 jmp_nidx[i] = j; |
430 has_short_branch_candidate = true; | |
431 } | |
0 | 432 } |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
433 blk_size += nj->size(_regalloc); |
0 | 434 // Remember end of call offset |
3842 | 435 if (nj->is_MachCall() && !nj->is_MachCallLeaf()) { |
3851 | 436 last_call_adr = blk_starts[i]+blk_size; |
437 } | |
438 // Remember end of avoid_back_to_back offset | |
439 if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back()) { | |
440 last_avoid_back_to_back_adr = blk_starts[i]+blk_size; | |
0 | 441 } |
442 } | |
443 | |
444 // When the next block starts a loop, we may insert pad NOP | |
445 // instructions. Since we cannot know our future alignment, | |
446 // assume the worst. | |
3851 | 447 if (i< nblocks-1) { |
0 | 448 Block *nb = _cfg->_blocks[i+1]; |
449 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); | |
3851 | 450 if (max_loop_pad > 0) { |
0 | 451 assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), ""); |
8874
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
452 // Adjust last_call_adr and/or last_avoid_back_to_back_adr. |
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
453 // If either is the last instruction in this block, bump by |
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
454 // max_loop_pad in lock-step with blk_size, so sizing |
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
455 // calculations in subsequent blocks still can conservatively |
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
456 // detect that it may the last instruction in this block. |
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
457 if (last_call_adr == blk_starts[i]+blk_size) { |
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
458 last_call_adr += max_loop_pad; |
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
459 } |
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
460 if (last_avoid_back_to_back_adr == blk_starts[i]+blk_size) { |
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
461 last_avoid_back_to_back_adr += max_loop_pad; |
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
462 } |
0 | 463 blk_size += max_loop_pad; |
464 } | |
465 } | |
466 | |
467 // Save block size; update total method size | |
468 blk_starts[i+1] = blk_starts[i]+blk_size; | |
469 } | |
470 | |
471 // Step two, replace eligible long jumps. | |
3851 | 472 bool progress = true; |
473 uint last_may_be_short_branch_adr = max_uint; | |
474 while (has_short_branch_candidate && progress) { | |
475 progress = false; | |
476 has_short_branch_candidate = false; | |
477 int adjust_block_start = 0; | |
478 for (uint i = 0; i < nblocks; i++) { | |
479 Block *b = _cfg->_blocks[i]; | |
480 int idx = jmp_nidx[i]; | |
481 MachNode* mach = (idx == -1) ? NULL: b->_nodes[idx]->as_Mach(); | |
482 if (mach != NULL && mach->may_be_short_branch()) { | |
483 #ifdef ASSERT | |
3853
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
484 assert(jmp_size[i] > 0 && mach->is_MachBranch(), "sanity"); |
3851 | 485 int j; |
486 // Find the branch; ignore trailing NOPs. | |
487 for (j = b->_nodes.size()-1; j>=0; j--) { | |
488 Node* n = b->_nodes[j]; | |
489 if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) | |
490 break; | |
491 } | |
492 assert(j >= 0 && j == idx && b->_nodes[j] == (Node*)mach, "sanity"); | |
493 #endif | |
494 int br_size = jmp_size[i]; | |
495 int br_offs = blk_starts[i] + jmp_offset[i]; | |
496 | |
0 | 497 // This requires the TRUE branch target be in succs[0] |
498 uint bnum = b->non_connector_successor(0)->_pre_order; | |
3851 | 499 int offset = blk_starts[bnum] - br_offs; |
500 if (bnum > i) { // adjust following block's offset | |
501 offset -= adjust_block_start; | |
502 } | |
503 // In the following code a nop could be inserted before | |
504 // the branch which will increase the backward distance. | |
505 bool needs_padding = ((uint)br_offs == last_may_be_short_branch_adr); | |
506 if (needs_padding && offset <= 0) | |
507 offset -= nop_size; | |
508 | |
509 if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) { | |
510 // We've got a winner. Replace this branch. | |
3853
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
511 MachNode* replacement = mach->as_MachBranch()->short_branch_version(this); |
3851 | 512 |
513 // Update the jmp_size. | |
514 int new_size = replacement->size(_regalloc); | |
515 int diff = br_size - new_size; | |
516 assert(diff >= (int)nop_size, "short_branch size should be smaller"); | |
517 // Conservatively take into accound padding between | |
518 // avoid_back_to_back branches. Previous branch could be | |
519 // converted into avoid_back_to_back branch during next | |
520 // rounds. | |
521 if (needs_padding && replacement->avoid_back_to_back()) { | |
522 jmp_offset[i] += nop_size; | |
523 diff -= nop_size; | |
0 | 524 } |
3851 | 525 adjust_block_start += diff; |
526 b->_nodes.map(idx, replacement); | |
7196
2aff40cb4703
7092905: C2: Keep track of the number of dead nodes
bharadwaj
parents:
6804
diff
changeset
|
527 mach->subsume_by(replacement, C); |
3851 | 528 mach = replacement; |
529 progress = true; | |
530 | |
531 jmp_size[i] = new_size; | |
532 DEBUG_ONLY( jmp_target[i] = bnum; ); | |
533 DEBUG_ONLY( jmp_rule[i] = mach->rule(); ); | |
0 | 534 } else { |
3851 | 535 // The jump distance is not short, try again during next iteration. |
536 has_short_branch_candidate = true; | |
0 | 537 } |
3851 | 538 } // (mach->may_be_short_branch()) |
539 if (mach != NULL && (mach->may_be_short_branch() || | |
540 mach->avoid_back_to_back())) { | |
541 last_may_be_short_branch_adr = blk_starts[i] + jmp_offset[i] + jmp_size[i]; | |
0 | 542 } |
3851 | 543 blk_starts[i+1] -= adjust_block_start; |
0 | 544 } |
545 } | |
546 | |
547 #ifdef ASSERT | |
3851 | 548 for (uint i = 0; i < nblocks; i++) { // For all blocks |
549 if (jmp_target[i] != 0) { | |
550 int br_size = jmp_size[i]; | |
551 int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]); | |
552 if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) { | |
553 tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]); | |
0 | 554 } |
3851 | 555 assert(_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset), "Displacement too large for short jmp"); |
0 | 556 } |
557 } | |
558 #endif | |
559 | |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
560 // Step 3, compute the offsets of all blocks, will be done in fill_buffer() |
3851 | 561 // after ScheduleAndBundle(). |
562 | |
0 | 563 // ------------------ |
564 // Compute size for code buffer | |
3851 | 565 code_size = blk_starts[nblocks]; |
0 | 566 |
567 // Relocation records | |
568 reloc_size += 1; // Relo entry for exception handler | |
569 | |
570 // Adjust reloc_size to number of record of relocation info | |
571 // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for | |
572 // a relocation index. | |
573 // The CodeBuffer will expand the locs array if this estimate is too low. | |
3851 | 574 reloc_size *= 10 / sizeof(relocInfo); |
575 } | |
576 | |
0 | 577 //------------------------------FillLocArray----------------------------------- |
578 // Create a bit of debug info and append it to the array. The mapping is from | |
579 // Java local or expression stack to constant, register or stack-slot. For | |
580 // doubles, insert 2 mappings and return 1 (to tell the caller that the next | |
581 // entry has been taken care of and caller should skip it). | |
582 static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) { | |
583 // This should never have accepted Bad before | |
584 assert(OptoReg::is_valid(regnum), "location must be valid"); | |
585 return (OptoReg::is_reg(regnum)) | |
586 ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) ) | |
587 : new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum))); | |
588 } | |
589 | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
590 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
591 ObjectValue* |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
592 Compile::sv_for_node_id(GrowableArray<ScopeValue*> *objs, int id) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
593 for (int i = 0; i < objs->length(); i++) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
594 assert(objs->at(i)->is_object(), "corrupt object cache"); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
595 ObjectValue* sv = (ObjectValue*) objs->at(i); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
596 if (sv->id() == id) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
597 return sv; |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
598 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
599 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
600 // Otherwise.. |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
601 return NULL; |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
602 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
603 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
604 void Compile::set_sv_for_object_node(GrowableArray<ScopeValue*> *objs, |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
605 ObjectValue* sv ) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
606 assert(sv_for_node_id(objs, sv->id()) == NULL, "Precondition"); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
607 objs->append(sv); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
608 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
609 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
610 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
611 void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local, |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
612 GrowableArray<ScopeValue*> *array, |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
613 GrowableArray<ScopeValue*> *objs ) { |
0 | 614 assert( local, "use _top instead of null" ); |
615 if (array->length() != idx) { | |
616 assert(array->length() == idx + 1, "Unexpected array count"); | |
617 // Old functionality: | |
618 // return | |
619 // New functionality: | |
620 // Assert if the local is not top. In product mode let the new node | |
621 // override the old entry. | |
622 assert(local == top(), "LocArray collision"); | |
623 if (local == top()) { | |
624 return; | |
625 } | |
626 array->pop(); | |
627 } | |
628 const Type *t = local->bottom_type(); | |
629 | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
630 // Is it a safepoint scalar object node? |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
631 if (local->is_SafePointScalarObject()) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
632 SafePointScalarObjectNode* spobj = local->as_SafePointScalarObject(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
633 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
634 ObjectValue* sv = Compile::sv_for_node_id(objs, spobj->_idx); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
635 if (sv == NULL) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
636 ciKlass* cik = t->is_oopptr()->klass(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
637 assert(cik->is_instance_klass() || |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
638 cik->is_array_klass(), "Not supported allocation."); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
639 sv = new ObjectValue(spobj->_idx, |
6725
da91efe96a93
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
6620
diff
changeset
|
640 new ConstantOopWriteValue(cik->java_mirror()->constant_encoding())); |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
641 Compile::set_sv_for_object_node(objs, sv); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
642 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
643 uint first_ind = spobj->first_index(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
644 for (uint i = 0; i < spobj->n_fields(); i++) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
645 Node* fld_node = sfpt->in(first_ind+i); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
646 (void)FillLocArray(sv->field_values()->length(), sfpt, fld_node, sv->field_values(), objs); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
647 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
648 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
649 array->append(sv); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
650 return; |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
651 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
652 |
0 | 653 // Grab the register number for the local |
654 OptoReg::Name regnum = _regalloc->get_reg_first(local); | |
655 if( OptoReg::is_valid(regnum) ) {// Got a register/stack? | |
656 // Record the double as two float registers. | |
657 // The register mask for such a value always specifies two adjacent | |
658 // float registers, with the lower register number even. | |
659 // Normally, the allocation of high and low words to these registers | |
660 // is irrelevant, because nearly all operations on register pairs | |
661 // (e.g., StoreD) treat them as a single unit. | |
662 // Here, we assume in addition that the words in these two registers | |
663 // stored "naturally" (by operations like StoreD and double stores | |
664 // within the interpreter) such that the lower-numbered register | |
665 // is written to the lower memory address. This may seem like | |
666 // a machine dependency, but it is not--it is a requirement on | |
667 // the author of the <arch>.ad file to ensure that, for every | |
668 // even/odd double-register pair to which a double may be allocated, | |
669 // the word in the even single-register is stored to the first | |
670 // memory word. (Note that register numbers are completely | |
671 // arbitrary, and are not tied to any machine-level encodings.) | |
672 #ifdef _LP64 | |
673 if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon ) { | |
674 array->append(new ConstantIntValue(0)); | |
675 array->append(new_loc_value( _regalloc, regnum, Location::dbl )); | |
676 } else if ( t->base() == Type::Long ) { | |
677 array->append(new ConstantIntValue(0)); | |
678 array->append(new_loc_value( _regalloc, regnum, Location::lng )); | |
679 } else if ( t->base() == Type::RawPtr ) { | |
680 // jsr/ret return address which must be restored into a the full | |
681 // width 64-bit stack slot. | |
682 array->append(new_loc_value( _regalloc, regnum, Location::lng )); | |
683 } | |
684 #else //_LP64 | |
685 #ifdef SPARC | |
686 if (t->base() == Type::Long && OptoReg::is_reg(regnum)) { | |
687 // For SPARC we have to swap high and low words for | |
688 // long values stored in a single-register (g0-g7). | |
689 array->append(new_loc_value( _regalloc, regnum , Location::normal )); | |
690 array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal )); | |
691 } else | |
692 #endif //SPARC | |
693 if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon || t->base() == Type::Long ) { | |
694 // Repack the double/long as two jints. | |
695 // The convention the interpreter uses is that the second local | |
696 // holds the first raw word of the native double representation. | |
697 // This is actually reasonable, since locals and stack arrays | |
698 // grow downwards in all implementations. | |
699 // (If, on some machine, the interpreter's Java locals or stack | |
700 // were to grow upwards, the embedded doubles would be word-swapped.) | |
701 array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal )); | |
702 array->append(new_loc_value( _regalloc, regnum , Location::normal )); | |
703 } | |
704 #endif //_LP64 | |
705 else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) && | |
706 OptoReg::is_reg(regnum) ) { | |
1274
2883969d09e7
6910664: C2: java/util/Arrays/Sorting.java fails with DeoptimizeALot flag
kvn
parents:
1265
diff
changeset
|
707 array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double() |
0 | 708 ? Location::float_in_dbl : Location::normal )); |
709 } else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) { | |
710 array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long | |
711 ? Location::int_in_long : Location::normal )); | |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
712 } else if( t->base() == Type::NarrowOop ) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
713 array->append(new_loc_value( _regalloc, regnum, Location::narrowoop )); |
0 | 714 } else { |
715 array->append(new_loc_value( _regalloc, regnum, _regalloc->is_oop(local) ? Location::oop : Location::normal )); | |
716 } | |
717 return; | |
718 } | |
719 | |
720 // No register. It must be constant data. | |
721 switch (t->base()) { | |
722 case Type::Half: // Second half of a double | |
723 ShouldNotReachHere(); // Caller should skip 2nd halves | |
724 break; | |
725 case Type::AnyPtr: | |
726 array->append(new ConstantOopWriteValue(NULL)); | |
727 break; | |
728 case Type::AryPtr: | |
6725
da91efe96a93
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
6620
diff
changeset
|
729 case Type::InstPtr: // fall through |
989
148e5441d916
6863023: need non-perm oops in code cache for JSR 292
jrose
parents:
903
diff
changeset
|
730 array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding())); |
0 | 731 break; |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
732 case Type::NarrowOop: |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
733 if (t == TypeNarrowOop::NULL_PTR) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
734 array->append(new ConstantOopWriteValue(NULL)); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
735 } else { |
989
148e5441d916
6863023: need non-perm oops in code cache for JSR 292
jrose
parents:
903
diff
changeset
|
736 array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding())); |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
737 } |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
738 break; |
0 | 739 case Type::Int: |
740 array->append(new ConstantIntValue(t->is_int()->get_con())); | |
741 break; | |
742 case Type::RawPtr: | |
743 // A return address (T_ADDRESS). | |
744 assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI"); | |
745 #ifdef _LP64 | |
746 // Must be restored to the full-width 64-bit stack slot. | |
747 array->append(new ConstantLongValue(t->is_ptr()->get_con())); | |
748 #else | |
749 array->append(new ConstantIntValue(t->is_ptr()->get_con())); | |
750 #endif | |
751 break; | |
752 case Type::FloatCon: { | |
753 float f = t->is_float_constant()->getf(); | |
754 array->append(new ConstantIntValue(jint_cast(f))); | |
755 break; | |
756 } | |
757 case Type::DoubleCon: { | |
758 jdouble d = t->is_double_constant()->getd(); | |
759 #ifdef _LP64 | |
760 array->append(new ConstantIntValue(0)); | |
761 array->append(new ConstantDoubleValue(d)); | |
762 #else | |
763 // Repack the double as two jints. | |
764 // The convention the interpreter uses is that the second local | |
765 // holds the first raw word of the native double representation. | |
766 // This is actually reasonable, since locals and stack arrays | |
767 // grow downwards in all implementations. | |
768 // (If, on some machine, the interpreter's Java locals or stack | |
769 // were to grow upwards, the embedded doubles would be word-swapped.) | |
770 jint *dp = (jint*)&d; | |
771 array->append(new ConstantIntValue(dp[1])); | |
772 array->append(new ConstantIntValue(dp[0])); | |
773 #endif | |
774 break; | |
775 } | |
776 case Type::Long: { | |
777 jlong d = t->is_long()->get_con(); | |
778 #ifdef _LP64 | |
779 array->append(new ConstantIntValue(0)); | |
780 array->append(new ConstantLongValue(d)); | |
781 #else | |
782 // Repack the long as two jints. | |
783 // The convention the interpreter uses is that the second local | |
784 // holds the first raw word of the native double representation. | |
785 // This is actually reasonable, since locals and stack arrays | |
786 // grow downwards in all implementations. | |
787 // (If, on some machine, the interpreter's Java locals or stack | |
788 // were to grow upwards, the embedded doubles would be word-swapped.) | |
789 jint *dp = (jint*)&d; | |
790 array->append(new ConstantIntValue(dp[1])); | |
791 array->append(new ConstantIntValue(dp[0])); | |
792 #endif | |
793 break; | |
794 } | |
795 case Type::Top: // Add an illegal value here | |
796 array->append(new LocationValue(Location())); | |
797 break; | |
798 default: | |
799 ShouldNotReachHere(); | |
800 break; | |
801 } | |
802 } | |
803 | |
804 // Determine if this node starts a bundle | |
805 bool Compile::starts_bundle(const Node *n) const { | |
806 return (_node_bundling_limit > n->_idx && | |
807 _node_bundling_base[n->_idx].starts_bundle()); | |
808 } | |
809 | |
810 //--------------------------Process_OopMap_Node-------------------------------- | |
811 void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) { | |
812 | |
813 // Handle special safepoint nodes for synchronization | |
814 MachSafePointNode *sfn = mach->as_MachSafePoint(); | |
815 MachCallNode *mcall; | |
816 | |
817 #ifdef ENABLE_ZAP_DEAD_LOCALS | |
818 assert( is_node_getting_a_safepoint(mach), "logic does not match; false negative"); | |
819 #endif | |
820 | |
821 int safepoint_pc_offset = current_offset; | |
1137
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1135
diff
changeset
|
822 bool is_method_handle_invoke = false; |
1253
f70b0d9ab095
6910618: C2: Error: assert(d->is_oop(),"JVM_ArrayCopy: dst not an oop")
kvn
parents:
1204
diff
changeset
|
823 bool return_oop = false; |
0 | 824 |
825 // Add the safepoint in the DebugInfoRecorder | |
826 if( !mach->is_MachCall() ) { | |
827 mcall = NULL; | |
828 debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map); | |
829 } else { | |
830 mcall = mach->as_MachCall(); | |
1137
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1135
diff
changeset
|
831 |
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1135
diff
changeset
|
832 // Is the call a MethodHandle call? |
1265 | 833 if (mcall->is_MachCallJava()) { |
834 if (mcall->as_MachCallJava()->_method_handle_invoke) { | |
835 assert(has_method_handle_invokes(), "must have been set during call generation"); | |
836 is_method_handle_invoke = true; | |
837 } | |
838 } | |
1137
97125851f396
6829187: compiler optimizations required for JSR 292
twisti
parents:
1135
diff
changeset
|
839 |
1253
f70b0d9ab095
6910618: C2: Error: assert(d->is_oop(),"JVM_ArrayCopy: dst not an oop")
kvn
parents:
1204
diff
changeset
|
840 // Check if a call returns an object. |
f70b0d9ab095
6910618: C2: Error: assert(d->is_oop(),"JVM_ArrayCopy: dst not an oop")
kvn
parents:
1204
diff
changeset
|
841 if (mcall->return_value_is_used() && |
f70b0d9ab095
6910618: C2: Error: assert(d->is_oop(),"JVM_ArrayCopy: dst not an oop")
kvn
parents:
1204
diff
changeset
|
842 mcall->tf()->range()->field_at(TypeFunc::Parms)->isa_ptr()) { |
f70b0d9ab095
6910618: C2: Error: assert(d->is_oop(),"JVM_ArrayCopy: dst not an oop")
kvn
parents:
1204
diff
changeset
|
843 return_oop = true; |
f70b0d9ab095
6910618: C2: Error: assert(d->is_oop(),"JVM_ArrayCopy: dst not an oop")
kvn
parents:
1204
diff
changeset
|
844 } |
0 | 845 safepoint_pc_offset += mcall->ret_addr_offset(); |
846 debug_info()->add_safepoint(safepoint_pc_offset, mcall->_oop_map); | |
847 } | |
848 | |
849 // Loop over the JVMState list to add scope information | |
850 // Do not skip safepoints with a NULL method, they need monitor info | |
851 JVMState* youngest_jvms = sfn->jvms(); | |
852 int max_depth = youngest_jvms->depth(); | |
853 | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
854 // Allocate the object pool for scalar-replaced objects -- the map from |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
855 // small-integer keys (which can be recorded in the local and ostack |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
856 // arrays) to descriptions of the object state. |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
857 GrowableArray<ScopeValue*> *objs = new GrowableArray<ScopeValue*>(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
858 |
0 | 859 // Visit scopes from oldest to youngest. |
860 for (int depth = 1; depth <= max_depth; depth++) { | |
861 JVMState* jvms = youngest_jvms->of_depth(depth); | |
862 int idx; | |
863 ciMethod* method = jvms->has_method() ? jvms->method() : NULL; | |
864 // Safepoints that do not have method() set only provide oop-map and monitor info | |
865 // to support GC; these do not support deoptimization. | |
866 int num_locs = (method == NULL) ? 0 : jvms->loc_size(); | |
867 int num_exps = (method == NULL) ? 0 : jvms->stk_size(); | |
868 int num_mon = jvms->nof_monitors(); | |
869 assert(method == NULL || jvms->bci() < 0 || num_locs == method->max_locals(), | |
870 "JVMS local count must match that of the method"); | |
871 | |
872 // Add Local and Expression Stack Information | |
873 | |
874 // Insert locals into the locarray | |
875 GrowableArray<ScopeValue*> *locarray = new GrowableArray<ScopeValue*>(num_locs); | |
876 for( idx = 0; idx < num_locs; idx++ ) { | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
877 FillLocArray( idx, sfn, sfn->local(jvms, idx), locarray, objs ); |
0 | 878 } |
879 | |
880 // Insert expression stack entries into the exparray | |
881 GrowableArray<ScopeValue*> *exparray = new GrowableArray<ScopeValue*>(num_exps); | |
882 for( idx = 0; idx < num_exps; idx++ ) { | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
883 FillLocArray( idx, sfn, sfn->stack(jvms, idx), exparray, objs ); |
0 | 884 } |
885 | |
886 // Add in mappings of the monitors | |
887 assert( !method || | |
888 !method->is_synchronized() || | |
889 method->is_native() || | |
890 num_mon > 0 || | |
891 !GenerateSynchronizationCode, | |
892 "monitors must always exist for synchronized methods"); | |
893 | |
894 // Build the growable array of ScopeValues for exp stack | |
895 GrowableArray<MonitorValue*> *monarray = new GrowableArray<MonitorValue*>(num_mon); | |
896 | |
897 // Loop over monitors and insert into array | |
898 for(idx = 0; idx < num_mon; idx++) { | |
899 // Grab the node that defines this monitor | |
460
424f9bfe6b96
6775880: EA +DeoptimizeALot: assert(mon_info->owner()->is_locked(),"object must be locked now")
kvn
parents:
418
diff
changeset
|
900 Node* box_node = sfn->monitor_box(jvms, idx); |
424f9bfe6b96
6775880: EA +DeoptimizeALot: assert(mon_info->owner()->is_locked(),"object must be locked now")
kvn
parents:
418
diff
changeset
|
901 Node* obj_node = sfn->monitor_obj(jvms, idx); |
0 | 902 |
903 // Create ScopeValue for object | |
904 ScopeValue *scval = NULL; | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
905 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
906 if( obj_node->is_SafePointScalarObject() ) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
907 SafePointScalarObjectNode* spobj = obj_node->as_SafePointScalarObject(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
908 scval = Compile::sv_for_node_id(objs, spobj->_idx); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
909 if (scval == NULL) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
910 const Type *t = obj_node->bottom_type(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
911 ciKlass* cik = t->is_oopptr()->klass(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
912 assert(cik->is_instance_klass() || |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
913 cik->is_array_klass(), "Not supported allocation."); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
914 ObjectValue* sv = new ObjectValue(spobj->_idx, |
6725
da91efe96a93
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
6620
diff
changeset
|
915 new ConstantOopWriteValue(cik->java_mirror()->constant_encoding())); |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
916 Compile::set_sv_for_object_node(objs, sv); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
917 |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
918 uint first_ind = spobj->first_index(); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
919 for (uint i = 0; i < spobj->n_fields(); i++) { |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
920 Node* fld_node = sfn->in(first_ind+i); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
921 (void)FillLocArray(sv->field_values()->length(), sfn, fld_node, sv->field_values(), objs); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
922 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
923 scval = sv; |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
924 } |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
925 } else if( !obj_node->is_Con() ) { |
0 | 926 OptoReg::Name obj_reg = _regalloc->get_reg_first(obj_node); |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
927 if( obj_node->bottom_type()->base() == Type::NarrowOop ) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
928 scval = new_loc_value( _regalloc, obj_reg, Location::narrowoop ); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
929 } else { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
930 scval = new_loc_value( _regalloc, obj_reg, Location::oop ); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
931 } |
0 | 932 } else { |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
235
diff
changeset
|
933 const TypePtr *tp = obj_node->bottom_type()->make_ptr(); |
3400
7523488edce5
7047300: VM crashes with assert(_base == InstPtr) failed: Not an object pointer
kvn
parents:
3252
diff
changeset
|
934 scval = new ConstantOopWriteValue(tp->is_oopptr()->const_oop()->constant_encoding()); |
0 | 935 } |
936 | |
4777 | 937 OptoReg::Name box_reg = BoxLockNode::reg(box_node); |
66
6dbf1a175d6b
6672848: (Escape Analysis) improve lock elimination with EA
kvn
parents:
63
diff
changeset
|
938 Location basic_lock = Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg)); |
4777 | 939 bool eliminated = (box_node->is_BoxLock() && box_node->as_BoxLock()->is_eliminated()); |
940 monarray->append(new MonitorValue(scval, basic_lock, eliminated)); | |
0 | 941 } |
942 | |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
943 // We dump the object pool first, since deoptimization reads it in first. |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
944 debug_info()->dump_object_pool(objs); |
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
945 |
0 | 946 // Build first class objects to pass to scope |
947 DebugToken *locvals = debug_info()->create_scope_values(locarray); | |
948 DebugToken *expvals = debug_info()->create_scope_values(exparray); | |
949 DebugToken *monvals = debug_info()->create_monitor_values(monarray); | |
950 | |
951 // Make method available for all Safepoints | |
952 ciMethod* scope_method = method ? method : _method; | |
953 // Describe the scope here | |
954 assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI"); | |
1135
e66fd840cb6b
6893081: method handle & invokedynamic code needs additional cleanup (post 6815692, 6858164)
twisti
parents:
989
diff
changeset
|
955 assert(!jvms->should_reexecute() || depth == max_depth, "reexecute allowed only for the youngest"); |
63
eac007780a58
6671807: (Escape Analysis) Add new ideal node to represent the state of a scalarized object at a safepoint
kvn
parents:
28
diff
changeset
|
956 // Now we can describe the scope. |
4583
597bc897257d
Made DebugInformationRecorder::describe_scope() take both a methodHandle _and_ a ciMethod* parameter to avoid creating handles in scopes where it is not allowed.
Doug Simon <doug.simon@oracle.com>
parents:
4137
diff
changeset
|
957 methodHandle null_mh; |
597bc897257d
Made DebugInformationRecorder::describe_scope() take both a methodHandle _and_ a ciMethod* parameter to avoid creating handles in scopes where it is not allowed.
Doug Simon <doug.simon@oracle.com>
parents:
4137
diff
changeset
|
958 bool rethrow_exception = false; |
597bc897257d
Made DebugInformationRecorder::describe_scope() take both a methodHandle _and_ a ciMethod* parameter to avoid creating handles in scopes where it is not allowed.
Doug Simon <doug.simon@oracle.com>
parents:
4137
diff
changeset
|
959 debug_info()->describe_scope(safepoint_pc_offset, null_mh, scope_method, jvms->bci(), jvms->should_reexecute(), rethrow_exception, is_method_handle_invoke, return_oop, locvals, expvals, monvals); |
0 | 960 } // End jvms loop |
961 | |
962 // Mark the end of the scope set. | |
963 debug_info()->end_safepoint(safepoint_pc_offset); | |
964 } | |
965 | |
966 | |
967 | |
968 // A simplified version of Process_OopMap_Node, to handle non-safepoints. | |
969 class NonSafepointEmitter { | |
970 Compile* C; | |
971 JVMState* _pending_jvms; | |
972 int _pending_offset; | |
973 | |
974 void emit_non_safepoint(); | |
975 | |
976 public: | |
977 NonSafepointEmitter(Compile* compile) { | |
978 this->C = compile; | |
979 _pending_jvms = NULL; | |
980 _pending_offset = 0; | |
981 } | |
982 | |
983 void observe_instruction(Node* n, int pc_offset) { | |
984 if (!C->debug_info()->recording_non_safepoints()) return; | |
985 | |
986 Node_Notes* nn = C->node_notes_at(n->_idx); | |
987 if (nn == NULL || nn->jvms() == NULL) return; | |
988 if (_pending_jvms != NULL && | |
989 _pending_jvms->same_calls_as(nn->jvms())) { | |
990 // Repeated JVMS? Stretch it up here. | |
991 _pending_offset = pc_offset; | |
992 } else { | |
993 if (_pending_jvms != NULL && | |
994 _pending_offset < pc_offset) { | |
995 emit_non_safepoint(); | |
996 } | |
997 _pending_jvms = NULL; | |
998 if (pc_offset > C->debug_info()->last_pc_offset()) { | |
999 // This is the only way _pending_jvms can become non-NULL: | |
1000 _pending_jvms = nn->jvms(); | |
1001 _pending_offset = pc_offset; | |
1002 } | |
1003 } | |
1004 } | |
1005 | |
1006 // Stay out of the way of real safepoints: | |
1007 void observe_safepoint(JVMState* jvms, int pc_offset) { | |
1008 if (_pending_jvms != NULL && | |
1009 !_pending_jvms->same_calls_as(jvms) && | |
1010 _pending_offset < pc_offset) { | |
1011 emit_non_safepoint(); | |
1012 } | |
1013 _pending_jvms = NULL; | |
1014 } | |
1015 | |
1016 void flush_at_end() { | |
1017 if (_pending_jvms != NULL) { | |
1018 emit_non_safepoint(); | |
1019 } | |
1020 _pending_jvms = NULL; | |
1021 } | |
1022 }; | |
1023 | |
1024 void NonSafepointEmitter::emit_non_safepoint() { | |
1025 JVMState* youngest_jvms = _pending_jvms; | |
1026 int pc_offset = _pending_offset; | |
1027 | |
1028 // Clear it now: | |
1029 _pending_jvms = NULL; | |
1030 | |
1031 DebugInformationRecorder* debug_info = C->debug_info(); | |
1032 assert(debug_info->recording_non_safepoints(), "sanity"); | |
1033 | |
1034 debug_info->add_non_safepoint(pc_offset); | |
1035 int max_depth = youngest_jvms->depth(); | |
1036 | |
1037 // Visit scopes from oldest to youngest. | |
1038 for (int depth = 1; depth <= max_depth; depth++) { | |
1039 JVMState* jvms = youngest_jvms->of_depth(depth); | |
1040 ciMethod* method = jvms->has_method() ? jvms->method() : NULL; | |
900
9987d9d5eb0e
6833129: specjvm98 fails with NullPointerException in the compiler with -XX:DeoptimizeALot
cfang
parents:
859
diff
changeset
|
1041 assert(!jvms->should_reexecute() || depth==max_depth, "reexecute allowed only for the youngest"); |
4583
597bc897257d
Made DebugInformationRecorder::describe_scope() take both a methodHandle _and_ a ciMethod* parameter to avoid creating handles in scopes where it is not allowed.
Doug Simon <doug.simon@oracle.com>
parents:
4137
diff
changeset
|
1042 methodHandle null_mh; |
597bc897257d
Made DebugInformationRecorder::describe_scope() take both a methodHandle _and_ a ciMethod* parameter to avoid creating handles in scopes where it is not allowed.
Doug Simon <doug.simon@oracle.com>
parents:
4137
diff
changeset
|
1043 debug_info->describe_scope(pc_offset, null_mh, method, jvms->bci(), jvms->should_reexecute()); |
0 | 1044 } |
1045 | |
1046 // Mark the end of the scope set. | |
1047 debug_info->end_non_safepoint(pc_offset); | |
1048 } | |
1049 | |
1050 | |
1051 | |
3851 | 1052 // helper for fill_buffer bailout logic |
0 | 1053 static void turn_off_compiler(Compile* C) { |
2353
1c0cf339481b
7025742: Can not use CodeCache::unallocated_capacity() with fragmented CodeCache
kvn
parents:
2091
diff
changeset
|
1054 if (CodeCache::largest_free_block() >= CodeCacheMinimumFreeSpace*10) { |
0 | 1055 // Do not turn off compilation if a single giant method has |
1056 // blown the code cache size. | |
1057 C->record_failure("excessive request to CodeCache"); | |
1058 } else { | |
28 | 1059 // Let CompilerBroker disable further compilations. |
0 | 1060 C->record_failure("CodeCache is full"); |
1061 } | |
1062 } | |
1063 | |
1064 | |
3851 | 1065 //------------------------------init_buffer------------------------------------ |
1066 CodeBuffer* Compile::init_buffer(uint* blk_starts) { | |
0 | 1067 |
1068 // Set the initially allocated size | |
1069 int code_req = initial_code_capacity; | |
1070 int locs_req = initial_locs_capacity; | |
1071 int stub_req = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity; | |
1072 int const_req = initial_const_capacity; | |
1073 | |
1074 int pad_req = NativeCall::instruction_size; | |
1075 // The extra spacing after the code is necessary on some platforms. | |
1076 // Sometimes we need to patch in a jump after the last instruction, | |
1077 // if the nmethod has been deoptimized. (See 4932387, 4894843.) | |
1078 | |
1079 // Compute the byte offset where we can store the deopt pc. | |
1080 if (fixed_slots() != 0) { | |
1081 _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot)); | |
1082 } | |
1083 | |
1084 // Compute prolog code size | |
1085 _method_size = 0; | |
1086 _frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize; | |
1087 #ifdef IA64 | |
1088 if (save_argument_registers()) { | |
1089 // 4815101: this is a stub with implicit and unknown precision fp args. | |
1090 // The usual spill mechanism can only generate stfd's in this case, which | |
1091 // doesn't work if the fp reg to spill contains a single-precision denorm. | |
1092 // Instead, we hack around the normal spill mechanism using stfspill's and | |
1093 // ldffill's in the MachProlog and MachEpilog emit methods. We allocate | |
1094 // space here for the fp arg regs (f8-f15) we're going to thusly spill. | |
1095 // | |
1096 // If we ever implement 16-byte 'registers' == stack slots, we can | |
1097 // get rid of this hack and have SpillCopy generate stfspill/ldffill | |
1098 // instead of stfd/stfs/ldfd/ldfs. | |
1099 _frame_slots += 8*(16/BytesPerInt); | |
1100 } | |
1101 #endif | |
3851 | 1102 assert(_frame_slots >= 0 && _frame_slots < 1000000, "sanity check"); |
0 | 1103 |
2008 | 1104 if (has_mach_constant_base_node()) { |
1105 // Fill the constant table. | |
3851 | 1106 // Note: This must happen before shorten_branches. |
1107 for (uint i = 0; i < _cfg->_num_blocks; i++) { | |
2008 | 1108 Block* b = _cfg->_blocks[i]; |
1109 | |
1110 for (uint j = 0; j < b->_nodes.size(); j++) { | |
1111 Node* n = b->_nodes[j]; | |
1112 | |
1113 // If the node is a MachConstantNode evaluate the constant | |
1114 // value section. | |
1115 if (n->is_MachConstant()) { | |
1116 MachConstantNode* machcon = n->as_MachConstant(); | |
1117 machcon->eval_constant(C); | |
1118 } | |
1119 } | |
1120 } | |
1121 | |
1122 // Calculate the offsets of the constants and the size of the | |
1123 // constant table (including the padding to the next section). | |
1124 constant_table().calculate_offsets_and_size(); | |
1125 const_req = constant_table().size(); | |
1126 } | |
1127 | |
1128 // Initialize the space for the BufferBlob used to find and verify | |
1129 // instruction size in MachNode::emit_size() | |
1130 init_scratch_buffer_blob(const_req); | |
3851 | 1131 if (failing()) return NULL; // Out of memory |
1132 | |
1133 // Pre-compute the length of blocks and replace | |
1134 // long branches with short if machine supports it. | |
1135 shorten_branches(blk_starts, code_req, locs_req, stub_req); | |
0 | 1136 |
1137 // nmethod and CodeBuffer count stubs & constants as part of method's code. | |
1138 int exception_handler_req = size_exception_handler(); | |
1139 int deopt_handler_req = size_deopt_handler(); | |
1140 exception_handler_req += MAX_stubs_size; // add marginal slop for handler | |
1141 deopt_handler_req += MAX_stubs_size; // add marginal slop for handler | |
1142 stub_req += MAX_stubs_size; // ensure per-stub margin | |
1143 code_req += MAX_inst_size; // ensure per-instruction margin | |
1265 | 1144 |
0 | 1145 if (StressCodeBuffers) |
1146 code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion | |
1265 | 1147 |
1148 int total_req = | |
2008 | 1149 const_req + |
1265 | 1150 code_req + |
1151 pad_req + | |
1152 stub_req + | |
1153 exception_handler_req + | |
2008 | 1154 deopt_handler_req; // deopt handler |
1265 | 1155 |
1156 if (has_method_handle_invokes()) | |
1157 total_req += deopt_handler_req; // deopt MH handler | |
1158 | |
0 | 1159 CodeBuffer* cb = code_buffer(); |
1160 cb->initialize(total_req, locs_req); | |
1161 | |
1162 // Have we run out of code space? | |
1202 | 1163 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { |
0 | 1164 turn_off_compiler(this); |
3851 | 1165 return NULL; |
0 | 1166 } |
1167 // Configure the code buffer. | |
1168 cb->initialize_consts_size(const_req); | |
1169 cb->initialize_stubs_size(stub_req); | |
1170 cb->initialize_oop_recorder(env()->oop_recorder()); | |
1171 | |
1172 // fill in the nop array for bundling computations | |
1173 MachNode *_nop_list[Bundle::_nop_count]; | |
1174 Bundle::initialize_nops(_nop_list, this); | |
1175 | |
3851 | 1176 return cb; |
1177 } | |
1178 | |
1179 //------------------------------fill_buffer------------------------------------ | |
1180 void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { | |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1181 // blk_starts[] contains offsets calculated during short branches processing, |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1182 // offsets should not be increased during following steps. |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1183 |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1184 // Compute the size of first NumberOfLoopInstrToAlign instructions at head |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1185 // of a loop. It is used to determine the padding for loop alignment. |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1186 compute_loop_first_inst_sizes(); |
3851 | 1187 |
0 | 1188 // Create oopmap set. |
1189 _oop_map_set = new OopMapSet(); | |
1190 | |
1191 // !!!!! This preserves old handling of oopmaps for now | |
1192 debug_info()->set_oopmaps(_oop_map_set); | |
1193 | |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1194 uint nblocks = _cfg->_num_blocks; |
0 | 1195 // Count and start of implicit null check instructions |
1196 uint inct_cnt = 0; | |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1197 uint *inct_starts = NEW_RESOURCE_ARRAY(uint, nblocks+1); |
0 | 1198 |
1199 // Count and start of calls | |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1200 uint *call_returns = NEW_RESOURCE_ARRAY(uint, nblocks+1); |
0 | 1201 |
1202 uint return_offset = 0; | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
707
diff
changeset
|
1203 int nop_size = (new (this) MachNopNode())->size(_regalloc); |
0 | 1204 |
1205 int previous_offset = 0; | |
1206 int current_offset = 0; | |
1207 int last_call_offset = -1; | |
3851 | 1208 int last_avoid_back_to_back_offset = -1; |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1209 #ifdef ASSERT |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1210 uint* jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1211 uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1212 uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1213 uint* jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); |
3851 | 1214 #endif |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1215 |
0 | 1216 // Create an array of unused labels, one for each basic block, if printing is enabled |
1217 #ifndef PRODUCT | |
1218 int *node_offsets = NULL; | |
3851 | 1219 uint node_offset_limit = unique(); |
1220 | |
1221 if (print_assembly()) | |
0 | 1222 node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit); |
1223 #endif | |
1224 | |
1225 NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily | |
1226 | |
2008 | 1227 // Emit the constant table. |
1228 if (has_mach_constant_base_node()) { | |
1229 constant_table().emit(*cb); | |
1230 } | |
1231 | |
3851 | 1232 // Create an array of labels, one for each basic block |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1233 Label *blk_labels = NEW_RESOURCE_ARRAY(Label, nblocks+1); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1234 for (uint i=0; i <= nblocks; i++) { |
3851 | 1235 blk_labels[i].init(); |
1236 } | |
1237 | |
0 | 1238 // ------------------ |
1239 // Now fill in the code buffer | |
1240 Node *delay_slot = NULL; | |
1241 | |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1242 for (uint i=0; i < nblocks; i++) { |
0 | 1243 Block *b = _cfg->_blocks[i]; |
1244 | |
1245 Node *head = b->head(); | |
1246 | |
1247 // If this block needs to start aligned (i.e, can be reached other | |
1248 // than by falling-thru from the previous block), then force the | |
1249 // start of a new bundle. | |
3851 | 1250 if (Pipeline::requires_bundling() && starts_bundle(head)) |
0 | 1251 cb->flush_bundle(true); |
1252 | |
3851 | 1253 #ifdef ASSERT |
1254 if (!b->is_connector()) { | |
1255 stringStream st; | |
1256 b->dump_head(&_cfg->_bbs, &st); | |
1257 MacroAssembler(cb).block_comment(st.as_string()); | |
1258 } | |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1259 jmp_target[i] = 0; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1260 jmp_offset[i] = 0; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1261 jmp_size[i] = 0; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1262 jmp_rule[i] = 0; |
3851 | 1263 #endif |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1264 int blk_offset = current_offset; |
3851 | 1265 |
0 | 1266 // Define the label at the beginning of the basic block |
3851 | 1267 MacroAssembler(cb).bind(blk_labels[b->_pre_order]); |
0 | 1268 |
1269 uint last_inst = b->_nodes.size(); | |
1270 | |
1271 // Emit block normally, except for last instruction. | |
1272 // Emit means "dump code bits into code buffer". | |
3851 | 1273 for (uint j = 0; j<last_inst; j++) { |
0 | 1274 |
1275 // Get the node | |
1276 Node* n = b->_nodes[j]; | |
1277 | |
1278 // See if delay slots are supported | |
1279 if (valid_bundle_info(n) && | |
1280 node_bundling(n)->used_in_unconditional_delay()) { | |
1281 assert(delay_slot == NULL, "no use of delay slot node"); | |
1282 assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size"); | |
1283 | |
1284 delay_slot = n; | |
1285 continue; | |
1286 } | |
1287 | |
1288 // If this starts a new instruction group, then flush the current one | |
1289 // (but allow split bundles) | |
3851 | 1290 if (Pipeline::requires_bundling() && starts_bundle(n)) |
0 | 1291 cb->flush_bundle(false); |
1292 | |
1293 // The following logic is duplicated in the code ifdeffed for | |
605 | 1294 // ENABLE_ZAP_DEAD_LOCALS which appears above in this file. It |
0 | 1295 // should be factored out. Or maybe dispersed to the nodes? |
1296 | |
1297 // Special handling for SafePoint/Call Nodes | |
1298 bool is_mcall = false; | |
3851 | 1299 if (n->is_Mach()) { |
0 | 1300 MachNode *mach = n->as_Mach(); |
1301 is_mcall = n->is_MachCall(); | |
1302 bool is_sfn = n->is_MachSafePoint(); | |
1303 | |
1304 // If this requires all previous instructions be flushed, then do so | |
3851 | 1305 if (is_sfn || is_mcall || mach->alignment_required() != 1) { |
0 | 1306 cb->flush_bundle(true); |
1748 | 1307 current_offset = cb->insts_size(); |
0 | 1308 } |
1309 | |
3851 | 1310 // A padding may be needed again since a previous instruction |
1311 // could be moved to delay slot. | |
1312 | |
0 | 1313 // align the instruction if necessary |
1314 int padding = mach->compute_padding(current_offset); | |
1315 // Make sure safepoint node for polling is distinct from a call's | |
1316 // return by adding a nop if needed. | |
3851 | 1317 if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset) { |
0 | 1318 padding = nop_size; |
1319 } | |
3851 | 1320 if (padding == 0 && mach->avoid_back_to_back() && |
1321 current_offset == last_avoid_back_to_back_offset) { | |
1322 // Avoid back to back some instructions. | |
1323 padding = nop_size; | |
0 | 1324 } |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1325 |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1326 if(padding > 0) { |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1327 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1328 int nops_cnt = padding / nop_size; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1329 MachNode *nop = new (this) MachNopNode(nops_cnt); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1330 b->_nodes.insert(j++, nop); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1331 last_inst++; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1332 _cfg->_bbs.map( nop->_idx, b ); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1333 nop->emit(*cb, _regalloc); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1334 cb->flush_bundle(true); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1335 current_offset = cb->insts_size(); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1336 } |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1337 |
0 | 1338 // Remember the start of the last call in a basic block |
1339 if (is_mcall) { | |
1340 MachCallNode *mcall = mach->as_MachCall(); | |
1341 | |
1342 // This destination address is NOT PC-relative | |
1343 mcall->method_set((intptr_t)mcall->entry_point()); | |
1344 | |
1345 // Save the return address | |
1346 call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset(); | |
1347 | |
3842 | 1348 if (mcall->is_MachCallLeaf()) { |
0 | 1349 is_mcall = false; |
1350 is_sfn = false; | |
1351 } | |
1352 } | |
1353 | |
1354 // sfn will be valid whenever mcall is valid now because of inheritance | |
3851 | 1355 if (is_sfn || is_mcall) { |
0 | 1356 |
1357 // Handle special safepoint nodes for synchronization | |
3851 | 1358 if (!is_mcall) { |
0 | 1359 MachSafePointNode *sfn = mach->as_MachSafePoint(); |
1360 // !!!!! Stubs only need an oopmap right now, so bail out | |
3851 | 1361 if (sfn->jvms()->method() == NULL) { |
0 | 1362 // Write the oopmap directly to the code blob??!! |
1363 # ifdef ENABLE_ZAP_DEAD_LOCALS | |
1364 assert( !is_node_getting_a_safepoint(sfn), "logic does not match; false positive"); | |
1365 # endif | |
1366 continue; | |
1367 } | |
1368 } // End synchronization | |
1369 | |
1370 non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(), | |
1371 current_offset); | |
1372 Process_OopMap_Node(mach, current_offset); | |
1373 } // End if safepoint | |
1374 | |
1375 // If this is a null check, then add the start of the previous instruction to the list | |
1376 else if( mach->is_MachNullCheck() ) { | |
1377 inct_starts[inct_cnt++] = previous_offset; | |
1378 } | |
1379 | |
1380 // If this is a branch, then fill in the label with the target BB's label | |
3853
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
1381 else if (mach->is_MachBranch()) { |
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
1382 // This requires the TRUE branch target be in succs[0] |
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
1383 uint block_num = b->non_connector_successor(0)->_pre_order; |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1384 |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1385 // Try to replace long branch if delay slot is not used, |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1386 // it is mostly for back branches since forward branch's |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1387 // distance is not updated yet. |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1388 bool delay_slot_is_used = valid_bundle_info(n) && |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1389 node_bundling(n)->use_unconditional_delay(); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1390 if (!delay_slot_is_used && mach->may_be_short_branch()) { |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1391 assert(delay_slot == NULL, "not expecting delay slot node"); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1392 int br_size = n->size(_regalloc); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1393 int offset = blk_starts[block_num] - current_offset; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1394 if (block_num >= i) { |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1395 // Current and following block's offset are not |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1396 // finilized yet, adjust distance by the difference |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1397 // between calculated and final offsets of current block. |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1398 offset -= (blk_starts[i] - blk_offset); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1399 } |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1400 // In the following code a nop could be inserted before |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1401 // the branch which will increase the backward distance. |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1402 bool needs_padding = (current_offset == last_avoid_back_to_back_offset); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1403 if (needs_padding && offset <= 0) |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1404 offset -= nop_size; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1405 |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1406 if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) { |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1407 // We've got a winner. Replace this branch. |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1408 MachNode* replacement = mach->as_MachBranch()->short_branch_version(this); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1409 |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1410 // Update the jmp_size. |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1411 int new_size = replacement->size(_regalloc); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1412 assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller"); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1413 // Insert padding between avoid_back_to_back branches. |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1414 if (needs_padding && replacement->avoid_back_to_back()) { |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1415 MachNode *nop = new (this) MachNopNode(); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1416 b->_nodes.insert(j++, nop); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1417 _cfg->_bbs.map(nop->_idx, b); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1418 last_inst++; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1419 nop->emit(*cb, _regalloc); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1420 cb->flush_bundle(true); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1421 current_offset = cb->insts_size(); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1422 } |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1423 #ifdef ASSERT |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1424 jmp_target[i] = block_num; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1425 jmp_offset[i] = current_offset - blk_offset; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1426 jmp_size[i] = new_size; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1427 jmp_rule[i] = mach->rule(); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1428 #endif |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1429 b->_nodes.map(j, replacement); |
7196
2aff40cb4703
7092905: C2: Keep track of the number of dead nodes
bharadwaj
parents:
6804
diff
changeset
|
1430 mach->subsume_by(replacement, C); |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1431 n = replacement; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1432 mach = replacement; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1433 } |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1434 } |
3853
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
1435 mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num ); |
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
1436 } else if (mach->ideal_Opcode() == Op_Jump) { |
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
1437 for (uint h = 0; h < b->_num_succs; h++) { |
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
1438 Block* succs_block = b->_succs[h]; |
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
1439 for (uint j = 1; j < succs_block->num_preds(); j++) { |
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
1440 Node* jpn = succs_block->pred(j); |
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
1441 if (jpn->is_JumpProj() && jpn->in(0) == mach) { |
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
1442 uint block_num = succs_block->non_connector()->_pre_order; |
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
1443 Label *blkLabel = &blk_labels[block_num]; |
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
1444 mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel); |
0 | 1445 } |
1446 } | |
1447 } | |
1448 } | |
1449 | |
1450 #ifdef ASSERT | |
605 | 1451 // Check that oop-store precedes the card-mark |
3851 | 1452 else if (mach->ideal_Opcode() == Op_StoreCM) { |
0 | 1453 uint storeCM_idx = j; |
3248
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
2353
diff
changeset
|
1454 int count = 0; |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
2353
diff
changeset
|
1455 for (uint prec = mach->req(); prec < mach->len(); prec++) { |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
2353
diff
changeset
|
1456 Node *oop_store = mach->in(prec); // Precedence edge |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
2353
diff
changeset
|
1457 if (oop_store == NULL) continue; |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
2353
diff
changeset
|
1458 count++; |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
2353
diff
changeset
|
1459 uint i4; |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
2353
diff
changeset
|
1460 for( i4 = 0; i4 < last_inst; ++i4 ) { |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
2353
diff
changeset
|
1461 if( b->_nodes[i4] == oop_store ) break; |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
2353
diff
changeset
|
1462 } |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
2353
diff
changeset
|
1463 // Note: This test can provide a false failure if other precedence |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
2353
diff
changeset
|
1464 // edges have been added to the storeCMNode. |
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
2353
diff
changeset
|
1465 assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store"); |
0 | 1466 } |
3248
e6beb62de02d
7032963: StoreCM shouldn't participate in store elimination
never
parents:
2353
diff
changeset
|
1467 assert(count > 0, "storeCM expects at least one precedence edge"); |
0 | 1468 } |
1469 #endif | |
1470 | |
3851 | 1471 else if (!n->is_Proj()) { |
605 | 1472 // Remember the beginning of the previous instruction, in case |
0 | 1473 // it's followed by a flag-kill and a null-check. Happens on |
1474 // Intel all the time, with add-to-memory kind of opcodes. | |
1475 previous_offset = current_offset; | |
1476 } | |
1477 } | |
1478 | |
1479 // Verify that there is sufficient space remaining | |
1480 cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size); | |
1202 | 1481 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { |
0 | 1482 turn_off_compiler(this); |
1483 return; | |
1484 } | |
1485 | |
1486 // Save the offset for the listing | |
1487 #ifndef PRODUCT | |
3851 | 1488 if (node_offsets && n->_idx < node_offset_limit) |
1748 | 1489 node_offsets[n->_idx] = cb->insts_size(); |
0 | 1490 #endif |
1491 | |
1492 // "Normal" instruction case | |
3851 | 1493 DEBUG_ONLY( uint instr_offset = cb->insts_size(); ) |
0 | 1494 n->emit(*cb, _regalloc); |
1748 | 1495 current_offset = cb->insts_size(); |
3851 | 1496 |
1497 #ifdef ASSERT | |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1498 if (n->size(_regalloc) < (current_offset-instr_offset)) { |
3851 | 1499 n->dump(); |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1500 assert(false, "wrong size of mach node"); |
3851 | 1501 } |
1502 #endif | |
0 | 1503 non_safepoints.observe_instruction(n, current_offset); |
1504 | |
1505 // mcall is last "call" that can be a safepoint | |
1506 // record it so we can see if a poll will directly follow it | |
1507 // in which case we'll need a pad to make the PcDesc sites unique | |
1508 // see 5010568. This can be slightly inaccurate but conservative | |
1509 // in the case that return address is not actually at current_offset. | |
1510 // This is a small price to pay. | |
1511 | |
1512 if (is_mcall) { | |
1513 last_call_offset = current_offset; | |
1514 } | |
1515 | |
3851 | 1516 if (n->is_Mach() && n->as_Mach()->avoid_back_to_back()) { |
1517 // Avoid back to back some instructions. | |
1518 last_avoid_back_to_back_offset = current_offset; | |
1519 } | |
1520 | |
0 | 1521 // See if this instruction has a delay slot |
3851 | 1522 if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) { |
0 | 1523 assert(delay_slot != NULL, "expecting delay slot node"); |
1524 | |
1525 // Back up 1 instruction | |
1748 | 1526 cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size()); |
0 | 1527 |
1528 // Save the offset for the listing | |
1529 #ifndef PRODUCT | |
3851 | 1530 if (node_offsets && delay_slot->_idx < node_offset_limit) |
1748 | 1531 node_offsets[delay_slot->_idx] = cb->insts_size(); |
0 | 1532 #endif |
1533 | |
1534 // Support a SafePoint in the delay slot | |
3851 | 1535 if (delay_slot->is_MachSafePoint()) { |
0 | 1536 MachNode *mach = delay_slot->as_Mach(); |
1537 // !!!!! Stubs only need an oopmap right now, so bail out | |
3851 | 1538 if (!mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL) { |
0 | 1539 // Write the oopmap directly to the code blob??!! |
1540 # ifdef ENABLE_ZAP_DEAD_LOCALS | |
1541 assert( !is_node_getting_a_safepoint(mach), "logic does not match; false positive"); | |
1542 # endif | |
1543 delay_slot = NULL; | |
1544 continue; | |
1545 } | |
1546 | |
1547 int adjusted_offset = current_offset - Pipeline::instr_unit_size(); | |
1548 non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(), | |
1549 adjusted_offset); | |
1550 // Generate an OopMap entry | |
1551 Process_OopMap_Node(mach, adjusted_offset); | |
1552 } | |
1553 | |
1554 // Insert the delay slot instruction | |
1555 delay_slot->emit(*cb, _regalloc); | |
1556 | |
1557 // Don't reuse it | |
1558 delay_slot = NULL; | |
1559 } | |
1560 | |
1561 } // End for all instructions in block | |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1562 |
418 | 1563 // If the next block is the top of a loop, pad this block out to align |
1564 // the loop top a little. Helps prevent pipe stalls at loop back branches. | |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1565 if (i < nblocks-1) { |
0 | 1566 Block *nb = _cfg->_blocks[i+1]; |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1567 int padding = nb->alignment_padding(current_offset); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1568 if( padding > 0 ) { |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1569 MachNode *nop = new (this) MachNopNode(padding / nop_size); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1570 b->_nodes.insert( b->_nodes.size(), nop ); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1571 _cfg->_bbs.map( nop->_idx, b ); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1572 nop->emit(*cb, _regalloc); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1573 current_offset = cb->insts_size(); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1574 } |
0 | 1575 } |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1576 // Verify that the distance for generated before forward |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1577 // short branches is still valid. |
8874
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
1578 guarantee((int)(blk_starts[i+1] - blk_starts[i]) >= (current_offset - blk_offset), "shouldn't increase block size"); |
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
1579 |
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
1580 // Save new block start offset |
0a8c2ea3902d
8010437: guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
rasbold
parents:
8770
diff
changeset
|
1581 blk_starts[i] = blk_offset; |
0 | 1582 } // End of for all blocks |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1583 blk_starts[nblocks] = current_offset; |
0 | 1584 |
1585 non_safepoints.flush_at_end(); | |
1586 | |
1587 // Offset too large? | |
1588 if (failing()) return; | |
1589 | |
1590 // Define a pseudo-label at the end of the code | |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1591 MacroAssembler(cb).bind( blk_labels[nblocks] ); |
0 | 1592 |
1593 // Compute the size of the first block | |
1594 _first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos(); | |
1595 | |
1748 | 1596 assert(cb->insts_size() < 500000, "method is unreasonably large"); |
0 | 1597 |
3857
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1598 #ifdef ASSERT |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1599 for (uint i = 0; i < nblocks; i++) { // For all blocks |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1600 if (jmp_target[i] != 0) { |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1601 int br_size = jmp_size[i]; |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1602 int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1603 if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) { |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1604 tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1605 assert(false, "Displacement too large for short jmp"); |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1606 } |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1607 } |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1608 } |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1609 #endif |
739a9abbbd4b
7080431: VM asserts if specified size(x) in .ad is larger than emitted size
kvn
parents:
3853
diff
changeset
|
1610 |
0 | 1611 // ------------------ |
1612 | |
1613 #ifndef PRODUCT | |
1614 // Information on the size of the method, without the extraneous code | |
1748 | 1615 Scheduling::increment_method_size(cb->insts_size()); |
0 | 1616 #endif |
1617 | |
1618 // ------------------ | |
1619 // Fill in exception table entries. | |
1620 FillExceptionTables(inct_cnt, call_returns, inct_starts, blk_labels); | |
1621 | |
1622 // Only java methods have exception handlers and deopt handlers | |
1623 if (_method) { | |
1624 // Emit the exception handler code. | |
1625 _code_offsets.set_value(CodeOffsets::Exceptions, emit_exception_handler(*cb)); | |
1626 // Emit the deopt handler code. | |
1627 _code_offsets.set_value(CodeOffsets::Deopt, emit_deopt_handler(*cb)); | |
1265 | 1628 |
1629 // Emit the MethodHandle deopt handler code (if required). | |
1630 if (has_method_handle_invokes()) { | |
1631 // We can use the same code as for the normal deopt handler, we | |
1632 // just need a different entry point address. | |
1633 _code_offsets.set_value(CodeOffsets::DeoptMH, emit_deopt_handler(*cb)); | |
1634 } | |
0 | 1635 } |
1636 | |
1637 // One last check for failed CodeBuffer::expand: | |
1202 | 1638 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { |
0 | 1639 turn_off_compiler(this); |
1640 return; | |
1641 } | |
1642 | |
1643 #ifndef PRODUCT | |
1644 // Dump the assembly code, including basic-block numbers | |
1645 if (print_assembly()) { | |
1646 ttyLocker ttyl; // keep the following output all in one block | |
1647 if (!VMThread::should_terminate()) { // test this under the tty lock | |
1648 // This output goes directly to the tty, not the compiler log. | |
1649 // To enable tools to match it up with the compilation activity, | |
1650 // be sure to tag this tty output with the compile ID. | |
1651 if (xtty != NULL) { | |
1652 xtty->head("opto_assembly compile_id='%d'%s", compile_id(), | |
1653 is_osr_compilation() ? " compile_kind='osr'" : | |
1654 ""); | |
1655 } | |
1656 if (method() != NULL) { | |
6725
da91efe96a93
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
6620
diff
changeset
|
1657 method()->print_metadata(); |
0 | 1658 } |
1659 dump_asm(node_offsets, node_offset_limit); | |
1660 if (xtty != NULL) { | |
1661 xtty->tail("opto_assembly"); | |
1662 } | |
1663 } | |
1664 } | |
1665 #endif | |
1666 | |
1667 } | |
1668 | |
1669 void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels) { | |
1670 _inc_table.set_size(cnt); | |
1671 | |
1672 uint inct_cnt = 0; | |
1673 for( uint i=0; i<_cfg->_num_blocks; i++ ) { | |
1674 Block *b = _cfg->_blocks[i]; | |
1675 Node *n = NULL; | |
1676 int j; | |
1677 | |
1678 // Find the branch; ignore trailing NOPs. | |
1679 for( j = b->_nodes.size()-1; j>=0; j-- ) { | |
1680 n = b->_nodes[j]; | |
1681 if( !n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con ) | |
1682 break; | |
1683 } | |
1684 | |
1685 // If we didn't find anything, continue | |
1686 if( j < 0 ) continue; | |
1687 | |
1688 // Compute ExceptionHandlerTable subtable entry and add it | |
1689 // (skip empty blocks) | |
1690 if( n->is_Catch() ) { | |
1691 | |
1692 // Get the offset of the return from the call | |
1693 uint call_return = call_returns[b->_pre_order]; | |
1694 #ifdef ASSERT | |
1695 assert( call_return > 0, "no call seen for this basic block" ); | |
3842 | 1696 while( b->_nodes[--j]->is_MachProj() ) ; |
1697 assert( b->_nodes[j]->is_MachCall(), "CatchProj must follow call" ); | |
0 | 1698 #endif |
1699 // last instruction is a CatchNode, find it's CatchProjNodes | |
1700 int nof_succs = b->_num_succs; | |
1701 // allocate space | |
1702 GrowableArray<intptr_t> handler_bcis(nof_succs); | |
1703 GrowableArray<intptr_t> handler_pcos(nof_succs); | |
1704 // iterate through all successors | |
1705 for (int j = 0; j < nof_succs; j++) { | |
1706 Block* s = b->_succs[j]; | |
1707 bool found_p = false; | |
1708 for( uint k = 1; k < s->num_preds(); k++ ) { | |
1709 Node *pk = s->pred(k); | |
1710 if( pk->is_CatchProj() && pk->in(0) == n ) { | |
1711 const CatchProjNode* p = pk->as_CatchProj(); | |
1712 found_p = true; | |
1713 // add the corresponding handler bci & pco information | |
1714 if( p->_con != CatchProjNode::fall_through_index ) { | |
1715 // p leads to an exception handler (and is not fall through) | |
1716 assert(s == _cfg->_blocks[s->_pre_order],"bad numbering"); | |
1717 // no duplicates, please | |
1718 if( !handler_bcis.contains(p->handler_bci()) ) { | |
1719 uint block_num = s->non_connector()->_pre_order; | |
1720 handler_bcis.append(p->handler_bci()); | |
1721 handler_pcos.append(blk_labels[block_num].loc_pos()); | |
1722 } | |
1723 } | |
1724 } | |
1725 } | |
1726 assert(found_p, "no matching predecessor found"); | |
1727 // Note: Due to empty block removal, one block may have | |
1728 // several CatchProj inputs, from the same Catch. | |
1729 } | |
1730 | |
1731 // Set the offset of the return from the call | |
1732 _handler_table.add_subtable(call_return, &handler_bcis, NULL, &handler_pcos); | |
1733 continue; | |
1734 } | |
1735 | |
1736 // Handle implicit null exception table updates | |
1737 if( n->is_MachNullCheck() ) { | |
1738 uint block_num = b->non_connector_successor(0)->_pre_order; | |
1739 _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() ); | |
1740 continue; | |
1741 } | |
1742 } // End of for all blocks fill in exception table entries | |
1743 } | |
1744 | |
1745 // Static Variables | |
1746 #ifndef PRODUCT | |
1747 uint Scheduling::_total_nop_size = 0; | |
1748 uint Scheduling::_total_method_size = 0; | |
1749 uint Scheduling::_total_branches = 0; | |
1750 uint Scheduling::_total_unconditional_delays = 0; | |
1751 uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1]; | |
1752 #endif | |
1753 | |
1754 // Initializer for class Scheduling | |
1755 | |
1756 Scheduling::Scheduling(Arena *arena, Compile &compile) | |
1757 : _arena(arena), | |
1758 _cfg(compile.cfg()), | |
1759 _bbs(compile.cfg()->_bbs), | |
1760 _regalloc(compile.regalloc()), | |
1761 _reg_node(arena), | |
1762 _bundle_instr_count(0), | |
1763 _bundle_cycle_number(0), | |
1764 _scheduled(arena), | |
1765 _available(arena), | |
1766 _next_node(NULL), | |
1767 _bundle_use(0, 0, resource_count, &_bundle_use_elements[0]), | |
1768 _pinch_free_list(arena) | |
1769 #ifndef PRODUCT | |
1770 , _branches(0) | |
1771 , _unconditional_delays(0) | |
1772 #endif | |
1773 { | |
1774 // Create a MachNopNode | |
1775 _nop = new (&compile) MachNopNode(); | |
1776 | |
1777 // Now that the nops are in the array, save the count | |
1778 // (but allow entries for the nops) | |
1779 _node_bundling_limit = compile.unique(); | |
1780 uint node_max = _regalloc->node_regs_max_index(); | |
1781 | |
1782 compile.set_node_bundling_limit(_node_bundling_limit); | |
1783 | |
605 | 1784 // This one is persistent within the Compile class |
0 | 1785 _node_bundling_base = NEW_ARENA_ARRAY(compile.comp_arena(), Bundle, node_max); |
1786 | |
1787 // Allocate space for fixed-size arrays | |
1788 _node_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max); | |
1789 _uses = NEW_ARENA_ARRAY(arena, short, node_max); | |
1790 _current_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max); | |
1791 | |
1792 // Clear the arrays | |
1793 memset(_node_bundling_base, 0, node_max * sizeof(Bundle)); | |
1794 memset(_node_latency, 0, node_max * sizeof(unsigned short)); | |
1795 memset(_uses, 0, node_max * sizeof(short)); | |
1796 memset(_current_latency, 0, node_max * sizeof(unsigned short)); | |
1797 | |
1798 // Clear the bundling information | |
1799 memcpy(_bundle_use_elements, | |
1800 Pipeline_Use::elaborated_elements, | |
1801 sizeof(Pipeline_Use::elaborated_elements)); | |
1802 | |
1803 // Get the last node | |
1804 Block *bb = _cfg->_blocks[_cfg->_blocks.size()-1]; | |
1805 | |
1806 _next_node = bb->_nodes[bb->_nodes.size()-1]; | |
1807 } | |
1808 | |
1809 #ifndef PRODUCT | |
1810 // Scheduling destructor | |
1811 Scheduling::~Scheduling() { | |
1812 _total_branches += _branches; | |
1813 _total_unconditional_delays += _unconditional_delays; | |
1814 } | |
1815 #endif | |
1816 | |
1817 // Step ahead "i" cycles | |
1818 void Scheduling::step(uint i) { | |
1819 | |
1820 Bundle *bundle = node_bundling(_next_node); | |
1821 bundle->set_starts_bundle(); | |
1822 | |
1823 // Update the bundle record, but leave the flags information alone | |
1824 if (_bundle_instr_count > 0) { | |
1825 bundle->set_instr_count(_bundle_instr_count); | |
1826 bundle->set_resources_used(_bundle_use.resourcesUsed()); | |
1827 } | |
1828 | |
1829 // Update the state information | |
1830 _bundle_instr_count = 0; | |
1831 _bundle_cycle_number += i; | |
1832 _bundle_use.step(i); | |
1833 } | |
1834 | |
1835 void Scheduling::step_and_clear() { | |
1836 Bundle *bundle = node_bundling(_next_node); | |
1837 bundle->set_starts_bundle(); | |
1838 | |
1839 // Update the bundle record | |
1840 if (_bundle_instr_count > 0) { | |
1841 bundle->set_instr_count(_bundle_instr_count); | |
1842 bundle->set_resources_used(_bundle_use.resourcesUsed()); | |
1843 | |
1844 _bundle_cycle_number += 1; | |
1845 } | |
1846 | |
1847 // Clear the bundling information | |
1848 _bundle_instr_count = 0; | |
1849 _bundle_use.reset(); | |
1850 | |
1851 memcpy(_bundle_use_elements, | |
1852 Pipeline_Use::elaborated_elements, | |
1853 sizeof(Pipeline_Use::elaborated_elements)); | |
1854 } | |
1855 | |
1856 //------------------------------ScheduleAndBundle------------------------------ | |
1857 // Perform instruction scheduling and bundling over the sequence of | |
1858 // instructions in backwards order. | |
1859 void Compile::ScheduleAndBundle() { | |
1860 | |
1861 // Don't optimize this if it isn't a method | |
1862 if (!_method) | |
1863 return; | |
1864 | |
1865 // Don't optimize this if scheduling is disabled | |
1866 if (!do_scheduling()) | |
1867 return; | |
1868 | |
6792
137868b7aa6f
7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
kvn
parents:
6725
diff
changeset
|
1869 // Scheduling code works only with pairs (8 bytes) maximum. |
137868b7aa6f
7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
kvn
parents:
6725
diff
changeset
|
1870 if (max_vector_size() > 8) |
137868b7aa6f
7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
kvn
parents:
6725
diff
changeset
|
1871 return; |
6620
f7cd53cedd78
7192965: assert(is_aligned_sets(size)) failed: mask is not aligned, adjacent sets
kvn
parents:
4947
diff
changeset
|
1872 |
0 | 1873 NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); ) |
1874 | |
1875 // Create a data structure for all the scheduling information | |
1876 Scheduling scheduling(Thread::current()->resource_area(), *this); | |
1877 | |
1878 // Walk backwards over each basic block, computing the needed alignment | |
1879 // Walk over all the basic blocks | |
1880 scheduling.DoScheduling(); | |
1881 } | |
1882 | |
1883 //------------------------------ComputeLocalLatenciesForward------------------- | |
1884 // Compute the latency of all the instructions. This is fairly simple, | |
1885 // because we already have a legal ordering. Walk over the instructions | |
1886 // from first to last, and compute the latency of the instruction based | |
605 | 1887 // on the latency of the preceding instruction(s). |
0 | 1888 void Scheduling::ComputeLocalLatenciesForward(const Block *bb) { |
1889 #ifndef PRODUCT | |
1890 if (_cfg->C->trace_opto_output()) | |
1891 tty->print("# -> ComputeLocalLatenciesForward\n"); | |
1892 #endif | |
1893 | |
1894 // Walk over all the schedulable instructions | |
1895 for( uint j=_bb_start; j < _bb_end; j++ ) { | |
1896 | |
1897 // This is a kludge, forcing all latency calculations to start at 1. | |
1898 // Used to allow latency 0 to force an instruction to the beginning | |
1899 // of the bb | |
1900 uint latency = 1; | |
1901 Node *use = bb->_nodes[j]; | |
1902 uint nlen = use->len(); | |
1903 | |
1904 // Walk over all the inputs | |
1905 for ( uint k=0; k < nlen; k++ ) { | |
1906 Node *def = use->in(k); | |
1907 if (!def) | |
1908 continue; | |
1909 | |
1910 uint l = _node_latency[def->_idx] + use->latency(k); | |
1911 if (latency < l) | |
1912 latency = l; | |
1913 } | |
1914 | |
1915 _node_latency[use->_idx] = latency; | |
1916 | |
1917 #ifndef PRODUCT | |
1918 if (_cfg->C->trace_opto_output()) { | |
1919 tty->print("# latency %4d: ", latency); | |
1920 use->dump(); | |
1921 } | |
1922 #endif | |
1923 } | |
1924 | |
1925 #ifndef PRODUCT | |
1926 if (_cfg->C->trace_opto_output()) | |
1927 tty->print("# <- ComputeLocalLatenciesForward\n"); | |
1928 #endif | |
1929 | |
1930 } // end ComputeLocalLatenciesForward | |
1931 | |
1932 // See if this node fits into the present instruction bundle | |
1933 bool Scheduling::NodeFitsInBundle(Node *n) { | |
1934 uint n_idx = n->_idx; | |
1935 | |
1936 // If this is the unconditional delay instruction, then it fits | |
1937 if (n == _unconditional_delay_slot) { | |
1938 #ifndef PRODUCT | |
1939 if (_cfg->C->trace_opto_output()) | |
1940 tty->print("# NodeFitsInBundle [%4d]: TRUE; is in unconditional delay slot\n", n->_idx); | |
1941 #endif | |
1942 return (true); | |
1943 } | |
1944 | |
1945 // If the node cannot be scheduled this cycle, skip it | |
1946 if (_current_latency[n_idx] > _bundle_cycle_number) { | |
1947 #ifndef PRODUCT | |
1948 if (_cfg->C->trace_opto_output()) | |
1949 tty->print("# NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n", | |
1950 n->_idx, _current_latency[n_idx], _bundle_cycle_number); | |
1951 #endif | |
1952 return (false); | |
1953 } | |
1954 | |
1955 const Pipeline *node_pipeline = n->pipeline(); | |
1956 | |
1957 uint instruction_count = node_pipeline->instructionCount(); | |
1958 if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0) | |
1959 instruction_count = 0; | |
1960 else if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot) | |
1961 instruction_count++; | |
1962 | |
1963 if (_bundle_instr_count + instruction_count > Pipeline::_max_instrs_per_cycle) { | |
1964 #ifndef PRODUCT | |
1965 if (_cfg->C->trace_opto_output()) | |
1966 tty->print("# NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n", | |
1967 n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle); | |
1968 #endif | |
1969 return (false); | |
1970 } | |
1971 | |
1972 // Don't allow non-machine nodes to be handled this way | |
1973 if (!n->is_Mach() && instruction_count == 0) | |
1974 return (false); | |
1975 | |
1976 // See if there is any overlap | |
1977 uint delay = _bundle_use.full_latency(0, node_pipeline->resourceUse()); | |
1978 | |
1979 if (delay > 0) { | |
1980 #ifndef PRODUCT | |
1981 if (_cfg->C->trace_opto_output()) | |
1982 tty->print("# NodeFitsInBundle [%4d]: FALSE; functional units overlap\n", n_idx); | |
1983 #endif | |
1984 return false; | |
1985 } | |
1986 | |
1987 #ifndef PRODUCT | |
1988 if (_cfg->C->trace_opto_output()) | |
1989 tty->print("# NodeFitsInBundle [%4d]: TRUE\n", n_idx); | |
1990 #endif | |
1991 | |
1992 return true; | |
1993 } | |
1994 | |
1995 Node * Scheduling::ChooseNodeToBundle() { | |
1996 uint siz = _available.size(); | |
1997 | |
1998 if (siz == 0) { | |
1999 | |
2000 #ifndef PRODUCT | |
2001 if (_cfg->C->trace_opto_output()) | |
2002 tty->print("# ChooseNodeToBundle: NULL\n"); | |
2003 #endif | |
2004 return (NULL); | |
2005 } | |
2006 | |
2007 // Fast path, if only 1 instruction in the bundle | |
2008 if (siz == 1) { | |
2009 #ifndef PRODUCT | |
2010 if (_cfg->C->trace_opto_output()) { | |
2011 tty->print("# ChooseNodeToBundle (only 1): "); | |
2012 _available[0]->dump(); | |
2013 } | |
2014 #endif | |
2015 return (_available[0]); | |
2016 } | |
2017 | |
2018 // Don't bother, if the bundle is already full | |
2019 if (_bundle_instr_count < Pipeline::_max_instrs_per_cycle) { | |
2020 for ( uint i = 0; i < siz; i++ ) { | |
2021 Node *n = _available[i]; | |
2022 | |
2023 // Skip projections, we'll handle them another way | |
2024 if (n->is_Proj()) | |
2025 continue; | |
2026 | |
2027 // This presupposed that instructions are inserted into the | |
2028 // available list in a legality order; i.e. instructions that | |
2029 // must be inserted first are at the head of the list | |
2030 if (NodeFitsInBundle(n)) { | |
2031 #ifndef PRODUCT | |
2032 if (_cfg->C->trace_opto_output()) { | |
2033 tty->print("# ChooseNodeToBundle: "); | |
2034 n->dump(); | |
2035 } | |
2036 #endif | |
2037 return (n); | |
2038 } | |
2039 } | |
2040 } | |
2041 | |
2042 // Nothing fits in this bundle, choose the highest priority | |
2043 #ifndef PRODUCT | |
2044 if (_cfg->C->trace_opto_output()) { | |
2045 tty->print("# ChooseNodeToBundle: "); | |
2046 _available[0]->dump(); | |
2047 } | |
2048 #endif | |
2049 | |
2050 return _available[0]; | |
2051 } | |
2052 | |
2053 //------------------------------AddNodeToAvailableList------------------------- | |
2054 void Scheduling::AddNodeToAvailableList(Node *n) { | |
2055 assert( !n->is_Proj(), "projections never directly made available" ); | |
2056 #ifndef PRODUCT | |
2057 if (_cfg->C->trace_opto_output()) { | |
2058 tty->print("# AddNodeToAvailableList: "); | |
2059 n->dump(); | |
2060 } | |
2061 #endif | |
2062 | |
2063 int latency = _current_latency[n->_idx]; | |
2064 | |
2065 // Insert in latency order (insertion sort) | |
2066 uint i; | |
2067 for ( i=0; i < _available.size(); i++ ) | |
2068 if (_current_latency[_available[i]->_idx] > latency) | |
2069 break; | |
2070 | |
2071 // Special Check for compares following branches | |
2072 if( n->is_Mach() && _scheduled.size() > 0 ) { | |
2073 int op = n->as_Mach()->ideal_Opcode(); | |
2074 Node *last = _scheduled[0]; | |
2075 if( last->is_MachIf() && last->in(1) == n && | |
2076 ( op == Op_CmpI || | |
2077 op == Op_CmpU || | |
2078 op == Op_CmpP || | |
2079 op == Op_CmpF || | |
2080 op == Op_CmpD || | |
2081 op == Op_CmpL ) ) { | |
2082 | |
2083 // Recalculate position, moving to front of same latency | |
2084 for ( i=0 ; i < _available.size(); i++ ) | |
2085 if (_current_latency[_available[i]->_idx] >= latency) | |
2086 break; | |
2087 } | |
2088 } | |
2089 | |
2090 // Insert the node in the available list | |
2091 _available.insert(i, n); | |
2092 | |
2093 #ifndef PRODUCT | |
2094 if (_cfg->C->trace_opto_output()) | |
2095 dump_available(); | |
2096 #endif | |
2097 } | |
2098 | |
2099 //------------------------------DecrementUseCounts----------------------------- | |
2100 void Scheduling::DecrementUseCounts(Node *n, const Block *bb) { | |
2101 for ( uint i=0; i < n->len(); i++ ) { | |
2102 Node *def = n->in(i); | |
2103 if (!def) continue; | |
2104 if( def->is_Proj() ) // If this is a machine projection, then | |
2105 def = def->in(0); // propagate usage thru to the base instruction | |
2106 | |
2107 if( _bbs[def->_idx] != bb ) // Ignore if not block-local | |
2108 continue; | |
2109 | |
2110 // Compute the latency | |
2111 uint l = _bundle_cycle_number + n->latency(i); | |
2112 if (_current_latency[def->_idx] < l) | |
2113 _current_latency[def->_idx] = l; | |
2114 | |
2115 // If this does not have uses then schedule it | |
2116 if ((--_uses[def->_idx]) == 0) | |
2117 AddNodeToAvailableList(def); | |
2118 } | |
2119 } | |
2120 | |
2121 //------------------------------AddNodeToBundle-------------------------------- | |
2122 void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { | |
2123 #ifndef PRODUCT | |
2124 if (_cfg->C->trace_opto_output()) { | |
2125 tty->print("# AddNodeToBundle: "); | |
2126 n->dump(); | |
2127 } | |
2128 #endif | |
2129 | |
2130 // Remove this from the available list | |
2131 uint i; | |
2132 for (i = 0; i < _available.size(); i++) | |
2133 if (_available[i] == n) | |
2134 break; | |
2135 assert(i < _available.size(), "entry in _available list not found"); | |
2136 _available.remove(i); | |
2137 | |
2138 // See if this fits in the current bundle | |
2139 const Pipeline *node_pipeline = n->pipeline(); | |
2140 const Pipeline_Use& node_usage = node_pipeline->resourceUse(); | |
2141 | |
2142 // Check for instructions to be placed in the delay slot. We | |
2143 // do this before we actually schedule the current instruction, | |
2144 // because the delay slot follows the current instruction. | |
2145 if (Pipeline::_branch_has_delay_slot && | |
2146 node_pipeline->hasBranchDelay() && | |
2147 !_unconditional_delay_slot) { | |
2148 | |
2149 uint siz = _available.size(); | |
2150 | |
2151 // Conditional branches can support an instruction that | |
605 | 2152 // is unconditionally executed and not dependent by the |
0 | 2153 // branch, OR a conditionally executed instruction if |
2154 // the branch is taken. In practice, this means that | |
2155 // the first instruction at the branch target is | |
2156 // copied to the delay slot, and the branch goes to | |
2157 // the instruction after that at the branch target | |
3853
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
2158 if ( n->is_MachBranch() ) { |
0 | 2159 |
2160 assert( !n->is_MachNullCheck(), "should not look for delay slot for Null Check" ); | |
2161 assert( !n->is_Catch(), "should not look for delay slot for Catch" ); | |
2162 | |
2163 #ifndef PRODUCT | |
2164 _branches++; | |
2165 #endif | |
2166 | |
2167 // At least 1 instruction is on the available list | |
605 | 2168 // that is not dependent on the branch |
0 | 2169 for (uint i = 0; i < siz; i++) { |
2170 Node *d = _available[i]; | |
2171 const Pipeline *avail_pipeline = d->pipeline(); | |
2172 | |
2173 // Don't allow safepoints in the branch shadow, that will | |
2174 // cause a number of difficulties | |
2175 if ( avail_pipeline->instructionCount() == 1 && | |
2176 !avail_pipeline->hasMultipleBundles() && | |
2177 !avail_pipeline->hasBranchDelay() && | |
2178 Pipeline::instr_has_unit_size() && | |
2179 d->size(_regalloc) == Pipeline::instr_unit_size() && | |
2180 NodeFitsInBundle(d) && | |
2181 !node_bundling(d)->used_in_delay()) { | |
2182 | |
2183 if (d->is_Mach() && !d->is_MachSafePoint()) { | |
2184 // A node that fits in the delay slot was found, so we need to | |
2185 // set the appropriate bits in the bundle pipeline information so | |
2186 // that it correctly indicates resource usage. Later, when we | |
2187 // attempt to add this instruction to the bundle, we will skip | |
2188 // setting the resource usage. | |
2189 _unconditional_delay_slot = d; | |
2190 node_bundling(n)->set_use_unconditional_delay(); | |
2191 node_bundling(d)->set_used_in_unconditional_delay(); | |
2192 _bundle_use.add_usage(avail_pipeline->resourceUse()); | |
2193 _current_latency[d->_idx] = _bundle_cycle_number; | |
2194 _next_node = d; | |
2195 ++_bundle_instr_count; | |
2196 #ifndef PRODUCT | |
2197 _unconditional_delays++; | |
2198 #endif | |
2199 break; | |
2200 } | |
2201 } | |
2202 } | |
2203 } | |
2204 | |
2205 // No delay slot, add a nop to the usage | |
2206 if (!_unconditional_delay_slot) { | |
2207 // See if adding an instruction in the delay slot will overflow | |
2208 // the bundle. | |
2209 if (!NodeFitsInBundle(_nop)) { | |
2210 #ifndef PRODUCT | |
2211 if (_cfg->C->trace_opto_output()) | |
2212 tty->print("# *** STEP(1 instruction for delay slot) ***\n"); | |
2213 #endif | |
2214 step(1); | |
2215 } | |
2216 | |
2217 _bundle_use.add_usage(_nop->pipeline()->resourceUse()); | |
2218 _next_node = _nop; | |
2219 ++_bundle_instr_count; | |
2220 } | |
2221 | |
2222 // See if the instruction in the delay slot requires a | |
2223 // step of the bundles | |
2224 if (!NodeFitsInBundle(n)) { | |
2225 #ifndef PRODUCT | |
2226 if (_cfg->C->trace_opto_output()) | |
2227 tty->print("# *** STEP(branch won't fit) ***\n"); | |
2228 #endif | |
2229 // Update the state information | |
2230 _bundle_instr_count = 0; | |
2231 _bundle_cycle_number += 1; | |
2232 _bundle_use.step(1); | |
2233 } | |
2234 } | |
2235 | |
2236 // Get the number of instructions | |
2237 uint instruction_count = node_pipeline->instructionCount(); | |
2238 if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0) | |
2239 instruction_count = 0; | |
2240 | |
2241 // Compute the latency information | |
2242 uint delay = 0; | |
2243 | |
2244 if (instruction_count > 0 || !node_pipeline->mayHaveNoCode()) { | |
2245 int relative_latency = _current_latency[n->_idx] - _bundle_cycle_number; | |
2246 if (relative_latency < 0) | |
2247 relative_latency = 0; | |
2248 | |
2249 delay = _bundle_use.full_latency(relative_latency, node_usage); | |
2250 | |
2251 // Does not fit in this bundle, start a new one | |
2252 if (delay > 0) { | |
2253 step(delay); | |
2254 | |
2255 #ifndef PRODUCT | |
2256 if (_cfg->C->trace_opto_output()) | |
2257 tty->print("# *** STEP(%d) ***\n", delay); | |
2258 #endif | |
2259 } | |
2260 } | |
2261 | |
2262 // If this was placed in the delay slot, ignore it | |
2263 if (n != _unconditional_delay_slot) { | |
2264 | |
2265 if (delay == 0) { | |
2266 if (node_pipeline->hasMultipleBundles()) { | |
2267 #ifndef PRODUCT | |
2268 if (_cfg->C->trace_opto_output()) | |
2269 tty->print("# *** STEP(multiple instructions) ***\n"); | |
2270 #endif | |
2271 step(1); | |
2272 } | |
2273 | |
2274 else if (instruction_count + _bundle_instr_count > Pipeline::_max_instrs_per_cycle) { | |
2275 #ifndef PRODUCT | |
2276 if (_cfg->C->trace_opto_output()) | |
2277 tty->print("# *** STEP(%d >= %d instructions) ***\n", | |
2278 instruction_count + _bundle_instr_count, | |
2279 Pipeline::_max_instrs_per_cycle); | |
2280 #endif | |
2281 step(1); | |
2282 } | |
2283 } | |
2284 | |
2285 if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot) | |
2286 _bundle_instr_count++; | |
2287 | |
2288 // Set the node's latency | |
2289 _current_latency[n->_idx] = _bundle_cycle_number; | |
2290 | |
2291 // Now merge the functional unit information | |
2292 if (instruction_count > 0 || !node_pipeline->mayHaveNoCode()) | |
2293 _bundle_use.add_usage(node_usage); | |
2294 | |
2295 // Increment the number of instructions in this bundle | |
2296 _bundle_instr_count += instruction_count; | |
2297 | |
2298 // Remember this node for later | |
2299 if (n->is_Mach()) | |
2300 _next_node = n; | |
2301 } | |
2302 | |
2303 // It's possible to have a BoxLock in the graph and in the _bbs mapping but | |
2304 // not in the bb->_nodes array. This happens for debug-info-only BoxLocks. | |
2305 // 'Schedule' them (basically ignore in the schedule) but do not insert them | |
2306 // into the block. All other scheduled nodes get put in the schedule here. | |
2307 int op = n->Opcode(); | |
2308 if( (op == Op_Node && n->req() == 0) || // anti-dependence node OR | |
2309 (op != Op_Node && // Not an unused antidepedence node and | |
2310 // not an unallocated boxlock | |
2311 (OptoReg::is_valid(_regalloc->get_reg_first(n)) || op != Op_BoxLock)) ) { | |
2312 | |
2313 // Push any trailing projections | |
2314 if( bb->_nodes[bb->_nodes.size()-1] != n ) { | |
2315 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |
2316 Node *foi = n->fast_out(i); | |
2317 if( foi->is_Proj() ) | |
2318 _scheduled.push(foi); | |
2319 } | |
2320 } | |
2321 | |
2322 // Put the instruction in the schedule list | |
2323 _scheduled.push(n); | |
2324 } | |
2325 | |
2326 #ifndef PRODUCT | |
2327 if (_cfg->C->trace_opto_output()) | |
2328 dump_available(); | |
2329 #endif | |
2330 | |
2331 // Walk all the definitions, decrementing use counts, and | |
2332 // if a definition has a 0 use count, place it in the available list. | |
2333 DecrementUseCounts(n,bb); | |
2334 } | |
2335 | |
2336 //------------------------------ComputeUseCount-------------------------------- | |
2337 // This method sets the use count within a basic block. We will ignore all | |
2338 // uses outside the current basic block. As we are doing a backwards walk, | |
2339 // any node we reach that has a use count of 0 may be scheduled. This also | |
2340 // avoids the problem of cyclic references from phi nodes, as long as phi | |
2341 // nodes are at the front of the basic block. This method also initializes | |
2342 // the available list to the set of instructions that have no uses within this | |
2343 // basic block. | |
2344 void Scheduling::ComputeUseCount(const Block *bb) { | |
2345 #ifndef PRODUCT | |
2346 if (_cfg->C->trace_opto_output()) | |
2347 tty->print("# -> ComputeUseCount\n"); | |
2348 #endif | |
2349 | |
2350 // Clear the list of available and scheduled instructions, just in case | |
2351 _available.clear(); | |
2352 _scheduled.clear(); | |
2353 | |
2354 // No delay slot specified | |
2355 _unconditional_delay_slot = NULL; | |
2356 | |
2357 #ifdef ASSERT | |
2358 for( uint i=0; i < bb->_nodes.size(); i++ ) | |
2359 assert( _uses[bb->_nodes[i]->_idx] == 0, "_use array not clean" ); | |
2360 #endif | |
2361 | |
2362 // Force the _uses count to never go to zero for unscheduable pieces | |
2363 // of the block | |
2364 for( uint k = 0; k < _bb_start; k++ ) | |
2365 _uses[bb->_nodes[k]->_idx] = 1; | |
2366 for( uint l = _bb_end; l < bb->_nodes.size(); l++ ) | |
2367 _uses[bb->_nodes[l]->_idx] = 1; | |
2368 | |
2369 // Iterate backwards over the instructions in the block. Don't count the | |
2370 // branch projections at end or the block header instructions. | |
2371 for( uint j = _bb_end-1; j >= _bb_start; j-- ) { | |
2372 Node *n = bb->_nodes[j]; | |
2373 if( n->is_Proj() ) continue; // Projections handled another way | |
2374 | |
2375 // Account for all uses | |
2376 for ( uint k = 0; k < n->len(); k++ ) { | |
2377 Node *inp = n->in(k); | |
2378 if (!inp) continue; | |
2379 assert(inp != n, "no cycles allowed" ); | |
2380 if( _bbs[inp->_idx] == bb ) { // Block-local use? | |
2381 if( inp->is_Proj() ) // Skip through Proj's | |
2382 inp = inp->in(0); | |
2383 ++_uses[inp->_idx]; // Count 1 block-local use | |
2384 } | |
2385 } | |
2386 | |
2387 // If this instruction has a 0 use count, then it is available | |
2388 if (!_uses[n->_idx]) { | |
2389 _current_latency[n->_idx] = _bundle_cycle_number; | |
2390 AddNodeToAvailableList(n); | |
2391 } | |
2392 | |
2393 #ifndef PRODUCT | |
2394 if (_cfg->C->trace_opto_output()) { | |
2395 tty->print("# uses: %3d: ", _uses[n->_idx]); | |
2396 n->dump(); | |
2397 } | |
2398 #endif | |
2399 } | |
2400 | |
2401 #ifndef PRODUCT | |
2402 if (_cfg->C->trace_opto_output()) | |
2403 tty->print("# <- ComputeUseCount\n"); | |
2404 #endif | |
2405 } | |
2406 | |
2407 // This routine performs scheduling on each basic block in reverse order, | |
2408 // using instruction latencies and taking into account function unit | |
2409 // availability. | |
2410 void Scheduling::DoScheduling() { | |
2411 #ifndef PRODUCT | |
2412 if (_cfg->C->trace_opto_output()) | |
2413 tty->print("# -> DoScheduling\n"); | |
2414 #endif | |
2415 | |
2416 Block *succ_bb = NULL; | |
2417 Block *bb; | |
2418 | |
2419 // Walk over all the basic blocks in reverse order | |
2420 for( int i=_cfg->_num_blocks-1; i >= 0; succ_bb = bb, i-- ) { | |
2421 bb = _cfg->_blocks[i]; | |
2422 | |
2423 #ifndef PRODUCT | |
2424 if (_cfg->C->trace_opto_output()) { | |
2425 tty->print("# Schedule BB#%03d (initial)\n", i); | |
2426 for (uint j = 0; j < bb->_nodes.size(); j++) | |
2427 bb->_nodes[j]->dump(); | |
2428 } | |
2429 #endif | |
2430 | |
2431 // On the head node, skip processing | |
2432 if( bb == _cfg->_broot ) | |
2433 continue; | |
2434 | |
2435 // Skip empty, connector blocks | |
2436 if (bb->is_connector()) | |
2437 continue; | |
2438 | |
2439 // If the following block is not the sole successor of | |
2440 // this one, then reset the pipeline information | |
2441 if (bb->_num_succs != 1 || bb->non_connector_successor(0) != succ_bb) { | |
2442 #ifndef PRODUCT | |
2443 if (_cfg->C->trace_opto_output()) { | |
2444 tty->print("*** bundle start of next BB, node %d, for %d instructions\n", | |
2445 _next_node->_idx, _bundle_instr_count); | |
2446 } | |
2447 #endif | |
2448 step_and_clear(); | |
2449 } | |
2450 | |
2451 // Leave untouched the starting instruction, any Phis, a CreateEx node | |
2452 // or Top. bb->_nodes[_bb_start] is the first schedulable instruction. | |
2453 _bb_end = bb->_nodes.size()-1; | |
2454 for( _bb_start=1; _bb_start <= _bb_end; _bb_start++ ) { | |
2455 Node *n = bb->_nodes[_bb_start]; | |
2456 // Things not matched, like Phinodes and ProjNodes don't get scheduled. | |
2457 // Also, MachIdealNodes do not get scheduled | |
2458 if( !n->is_Mach() ) continue; // Skip non-machine nodes | |
2459 MachNode *mach = n->as_Mach(); | |
2460 int iop = mach->ideal_Opcode(); | |
2461 if( iop == Op_CreateEx ) continue; // CreateEx is pinned | |
2462 if( iop == Op_Con ) continue; // Do not schedule Top | |
2463 if( iop == Op_Node && // Do not schedule PhiNodes, ProjNodes | |
2464 mach->pipeline() == MachNode::pipeline_class() && | |
2465 !n->is_SpillCopy() ) // Breakpoints, Prolog, etc | |
2466 continue; | |
2467 break; // Funny loop structure to be sure... | |
2468 } | |
2469 // Compute last "interesting" instruction in block - last instruction we | |
2470 // might schedule. _bb_end points just after last schedulable inst. We | |
2471 // normally schedule conditional branches (despite them being forced last | |
2472 // in the block), because they have delay slots we can fill. Calls all | |
2473 // have their delay slots filled in the template expansions, so we don't | |
2474 // bother scheduling them. | |
2475 Node *last = bb->_nodes[_bb_end]; | |
3851 | 2476 // Ignore trailing NOPs. |
2477 while (_bb_end > 0 && last->is_Mach() && | |
2478 last->as_Mach()->ideal_Opcode() == Op_Con) { | |
2479 last = bb->_nodes[--_bb_end]; | |
2480 } | |
2481 assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, ""); | |
0 | 2482 if( last->is_Catch() || |
707
4ec1257180ec
6826960: C2 Sparc: assert(bb->_nodes(_bb_end)->is_Proj(),"skipping projections after expected call")
kvn
parents:
605
diff
changeset
|
2483 // Exclude unreachable path case when Halt node is in a separate block. |
4ec1257180ec
6826960: C2 Sparc: assert(bb->_nodes(_bb_end)->is_Proj(),"skipping projections after expected call")
kvn
parents:
605
diff
changeset
|
2484 (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) { |
0 | 2485 // There must be a prior call. Skip it. |
3842 | 2486 while( !bb->_nodes[--_bb_end]->is_MachCall() ) { |
2487 assert( bb->_nodes[_bb_end]->is_MachProj(), "skipping projections after expected call" ); | |
0 | 2488 } |
2489 } else if( last->is_MachNullCheck() ) { | |
2490 // Backup so the last null-checked memory instruction is | |
2491 // outside the schedulable range. Skip over the nullcheck, | |
2492 // projection, and the memory nodes. | |
2493 Node *mem = last->in(1); | |
2494 do { | |
2495 _bb_end--; | |
2496 } while (mem != bb->_nodes[_bb_end]); | |
2497 } else { | |
2498 // Set _bb_end to point after last schedulable inst. | |
2499 _bb_end++; | |
2500 } | |
2501 | |
2502 assert( _bb_start <= _bb_end, "inverted block ends" ); | |
2503 | |
2504 // Compute the register antidependencies for the basic block | |
2505 ComputeRegisterAntidependencies(bb); | |
2506 if (_cfg->C->failing()) return; // too many D-U pinch points | |
2507 | |
2508 // Compute intra-bb latencies for the nodes | |
2509 ComputeLocalLatenciesForward(bb); | |
2510 | |
2511 // Compute the usage within the block, and set the list of all nodes | |
2512 // in the block that have no uses within the block. | |
2513 ComputeUseCount(bb); | |
2514 | |
2515 // Schedule the remaining instructions in the block | |
2516 while ( _available.size() > 0 ) { | |
2517 Node *n = ChooseNodeToBundle(); | |
8770
f15df3af32c5
8009172: [parfait] Null pointer deference in hotspot/src/share/vm/opto/output.cpp
morris
parents:
7196
diff
changeset
|
2518 guarantee(n != NULL, "no nodes available"); |
0 | 2519 AddNodeToBundle(n,bb); |
2520 } | |
2521 | |
2522 assert( _scheduled.size() == _bb_end - _bb_start, "wrong number of instructions" ); | |
2523 #ifdef ASSERT | |
2524 for( uint l = _bb_start; l < _bb_end; l++ ) { | |
2525 Node *n = bb->_nodes[l]; | |
2526 uint m; | |
2527 for( m = 0; m < _bb_end-_bb_start; m++ ) | |
2528 if( _scheduled[m] == n ) | |
2529 break; | |
2530 assert( m < _bb_end-_bb_start, "instruction missing in schedule" ); | |
2531 } | |
2532 #endif | |
2533 | |
2534 // Now copy the instructions (in reverse order) back to the block | |
2535 for ( uint k = _bb_start; k < _bb_end; k++ ) | |
2536 bb->_nodes.map(k, _scheduled[_bb_end-k-1]); | |
2537 | |
2538 #ifndef PRODUCT | |
2539 if (_cfg->C->trace_opto_output()) { | |
2540 tty->print("# Schedule BB#%03d (final)\n", i); | |
2541 uint current = 0; | |
2542 for (uint j = 0; j < bb->_nodes.size(); j++) { | |
2543 Node *n = bb->_nodes[j]; | |
2544 if( valid_bundle_info(n) ) { | |
2545 Bundle *bundle = node_bundling(n); | |
2546 if (bundle->instr_count() > 0 || bundle->flags() > 0) { | |
2547 tty->print("*** Bundle: "); | |
2548 bundle->dump(); | |
2549 } | |
2550 n->dump(); | |
2551 } | |
2552 } | |
2553 } | |
2554 #endif | |
2555 #ifdef ASSERT | |
2556 verify_good_schedule(bb,"after block local scheduling"); | |
2557 #endif | |
2558 } | |
2559 | |
2560 #ifndef PRODUCT | |
2561 if (_cfg->C->trace_opto_output()) | |
2562 tty->print("# <- DoScheduling\n"); | |
2563 #endif | |
2564 | |
2565 // Record final node-bundling array location | |
2566 _regalloc->C->set_node_bundling_base(_node_bundling_base); | |
2567 | |
2568 } // end DoScheduling | |
2569 | |
2570 //------------------------------verify_good_schedule--------------------------- | |
2571 // Verify that no live-range used in the block is killed in the block by a | |
2572 // wrong DEF. This doesn't verify live-ranges that span blocks. | |
2573 | |
2574 // Check for edge existence. Used to avoid adding redundant precedence edges. | |
2575 static bool edge_from_to( Node *from, Node *to ) { | |
2576 for( uint i=0; i<from->len(); i++ ) | |
2577 if( from->in(i) == to ) | |
2578 return true; | |
2579 return false; | |
2580 } | |
2581 | |
2582 #ifdef ASSERT | |
2583 //------------------------------verify_do_def---------------------------------- | |
2584 void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) { | |
2585 // Check for bad kills | |
2586 if( OptoReg::is_valid(def) ) { // Ignore stores & control flow | |
2587 Node *prior_use = _reg_node[def]; | |
2588 if( prior_use && !edge_from_to(prior_use,n) ) { | |
2589 tty->print("%s = ",OptoReg::as_VMReg(def)->name()); | |
2590 n->dump(); | |
2591 tty->print_cr("..."); | |
2592 prior_use->dump(); | |
1490
f03d0a26bf83
6888954: argument formatting for assert() and friends
jcoomes
parents:
1489
diff
changeset
|
2593 assert(edge_from_to(prior_use,n),msg); |
0 | 2594 } |
2595 _reg_node.map(def,NULL); // Kill live USEs | |
2596 } | |
2597 } | |
2598 | |
2599 //------------------------------verify_good_schedule--------------------------- | |
2600 void Scheduling::verify_good_schedule( Block *b, const char *msg ) { | |
2601 | |
2602 // Zap to something reasonable for the verify code | |
2603 _reg_node.clear(); | |
2604 | |
2605 // Walk over the block backwards. Check to make sure each DEF doesn't | |
2606 // kill a live value (other than the one it's supposed to). Add each | |
2607 // USE to the live set. | |
2608 for( uint i = b->_nodes.size()-1; i >= _bb_start; i-- ) { | |
2609 Node *n = b->_nodes[i]; | |
2610 int n_op = n->Opcode(); | |
2611 if( n_op == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) { | |
2612 // Fat-proj kills a slew of registers | |
2613 RegMask rm = n->out_RegMask();// Make local copy | |
2614 while( rm.is_NotEmpty() ) { | |
2615 OptoReg::Name kill = rm.find_first_elem(); | |
2616 rm.Remove(kill); | |
2617 verify_do_def( n, kill, msg ); | |
2618 } | |
2619 } else if( n_op != Op_Node ) { // Avoid brand new antidependence nodes | |
2620 // Get DEF'd registers the normal way | |
2621 verify_do_def( n, _regalloc->get_reg_first(n), msg ); | |
2622 verify_do_def( n, _regalloc->get_reg_second(n), msg ); | |
2623 } | |
2624 | |
2625 // Now make all USEs live | |
2626 for( uint i=1; i<n->req(); i++ ) { | |
2627 Node *def = n->in(i); | |
2628 assert(def != 0, "input edge required"); | |
2629 OptoReg::Name reg_lo = _regalloc->get_reg_first(def); | |
2630 OptoReg::Name reg_hi = _regalloc->get_reg_second(def); | |
2631 if( OptoReg::is_valid(reg_lo) ) { | |
1490
f03d0a26bf83
6888954: argument formatting for assert() and friends
jcoomes
parents:
1489
diff
changeset
|
2632 assert(!_reg_node[reg_lo] || edge_from_to(_reg_node[reg_lo],def), msg); |
0 | 2633 _reg_node.map(reg_lo,n); |
2634 } | |
2635 if( OptoReg::is_valid(reg_hi) ) { | |
1490
f03d0a26bf83
6888954: argument formatting for assert() and friends
jcoomes
parents:
1489
diff
changeset
|
2636 assert(!_reg_node[reg_hi] || edge_from_to(_reg_node[reg_hi],def), msg); |
0 | 2637 _reg_node.map(reg_hi,n); |
2638 } | |
2639 } | |
2640 | |
2641 } | |
2642 | |
2643 // Zap to something reasonable for the Antidependence code | |
2644 _reg_node.clear(); | |
2645 } | |
2646 #endif | |
2647 | |
2648 // Conditionally add precedence edges. Avoid putting edges on Projs. | |
2649 static void add_prec_edge_from_to( Node *from, Node *to ) { | |
2650 if( from->is_Proj() ) { // Put precedence edge on Proj's input | |
2651 assert( from->req() == 1 && (from->len() == 1 || from->in(1)==0), "no precedence edges on projections" ); | |
2652 from = from->in(0); | |
2653 } | |
2654 if( from != to && // No cycles (for things like LD L0,[L0+4] ) | |
2655 !edge_from_to( from, to ) ) // Avoid duplicate edge | |
2656 from->add_prec(to); | |
2657 } | |
2658 | |
2659 //------------------------------anti_do_def------------------------------------ | |
2660 void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) { | |
2661 if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow | |
2662 return; | |
2663 | |
2664 Node *pinch = _reg_node[def_reg]; // Get pinch point | |
2665 if( !pinch || _bbs[pinch->_idx] != b || // No pinch-point yet? | |
2666 is_def ) { // Check for a true def (not a kill) | |
2667 _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point | |
2668 return; | |
2669 } | |
2670 | |
2671 Node *kill = def; // Rename 'def' to more descriptive 'kill' | |
2672 debug_only( def = (Node*)0xdeadbeef; ) | |
2673 | |
2674 // After some number of kills there _may_ be a later def | |
2675 Node *later_def = NULL; | |
2676 | |
2677 // Finding a kill requires a real pinch-point. | |
2678 // Check for not already having a pinch-point. | |
2679 // Pinch points are Op_Node's. | |
2680 if( pinch->Opcode() != Op_Node ) { // Or later-def/kill as pinch-point? | |
2681 later_def = pinch; // Must be def/kill as optimistic pinch-point | |
2682 if ( _pinch_free_list.size() > 0) { | |
2683 pinch = _pinch_free_list.pop(); | |
2684 } else { | |
6804
e626685e9f6c
7193318: C2: remove number of inputs requirement from Node's new operator
kvn
parents:
6792
diff
changeset
|
2685 pinch = new (_cfg->C) Node(1); // Pinch point to-be |
0 | 2686 } |
2687 if (pinch->_idx >= _regalloc->node_regs_max_index()) { | |
2688 _cfg->C->record_method_not_compilable("too many D-U pinch points"); | |
2689 return; | |
2690 } | |
2691 _bbs.map(pinch->_idx,b); // Pretend it's valid in this block (lazy init) | |
2692 _reg_node.map(def_reg,pinch); // Record pinch-point | |
2693 //_regalloc->set_bad(pinch->_idx); // Already initialized this way. | |
2694 if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill | |
2695 pinch->init_req(0, _cfg->C->top()); // set not NULL for the next call | |
2696 add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch | |
2697 later_def = NULL; // and no later def | |
2698 } | |
2699 pinch->set_req(0,later_def); // Hook later def so we can find it | |
2700 } else { // Else have valid pinch point | |
2701 if( pinch->in(0) ) // If there is a later-def | |
2702 later_def = pinch->in(0); // Get it | |
2703 } | |
2704 | |
2705 // Add output-dependence edge from later def to kill | |
2706 if( later_def ) // If there is some original def | |
2707 add_prec_edge_from_to(later_def,kill); // Add edge from def to kill | |
2708 | |
2709 // See if current kill is also a use, and so is forced to be the pinch-point. | |
2710 if( pinch->Opcode() == Op_Node ) { | |
2711 Node *uses = kill->is_Proj() ? kill->in(0) : kill; | |
2712 for( uint i=1; i<uses->req(); i++ ) { | |
2713 if( _regalloc->get_reg_first(uses->in(i)) == def_reg || | |
2714 _regalloc->get_reg_second(uses->in(i)) == def_reg ) { | |
2715 // Yes, found a use/kill pinch-point | |
2716 pinch->set_req(0,NULL); // | |
2717 pinch->replace_by(kill); // Move anti-dep edges up | |
2718 pinch = kill; | |
2719 _reg_node.map(def_reg,pinch); | |
2720 return; | |
2721 } | |
2722 } | |
2723 } | |
2724 | |
2725 // Add edge from kill to pinch-point | |
2726 add_prec_edge_from_to(kill,pinch); | |
2727 } | |
2728 | |
2729 //------------------------------anti_do_use------------------------------------ | |
2730 void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) { | |
2731 if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow | |
2732 return; | |
2733 Node *pinch = _reg_node[use_reg]; // Get pinch point | |
2734 // Check for no later def_reg/kill in block | |
2735 if( pinch && _bbs[pinch->_idx] == b && | |
2736 // Use has to be block-local as well | |
2737 _bbs[use->_idx] == b ) { | |
2738 if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?) | |
2739 pinch->req() == 1 ) { // pinch not yet in block? | |
2740 pinch->del_req(0); // yank pointer to later-def, also set flag | |
2741 // Insert the pinch-point in the block just after the last use | |
2742 b->_nodes.insert(b->find_node(use)+1,pinch); | |
2743 _bb_end++; // Increase size scheduled region in block | |
2744 } | |
2745 | |
2746 add_prec_edge_from_to(pinch,use); | |
2747 } | |
2748 } | |
2749 | |
2750 //------------------------------ComputeRegisterAntidependences----------------- | |
2751 // We insert antidependences between the reads and following write of | |
2752 // allocated registers to prevent illegal code motion. Hopefully, the | |
2753 // number of added references should be fairly small, especially as we | |
2754 // are only adding references within the current basic block. | |
2755 void Scheduling::ComputeRegisterAntidependencies(Block *b) { | |
2756 | |
2757 #ifdef ASSERT | |
2758 verify_good_schedule(b,"before block local scheduling"); | |
2759 #endif | |
2760 | |
2761 // A valid schedule, for each register independently, is an endless cycle | |
2762 // of: a def, then some uses (connected to the def by true dependencies), | |
2763 // then some kills (defs with no uses), finally the cycle repeats with a new | |
2764 // def. The uses are allowed to float relative to each other, as are the | |
2765 // kills. No use is allowed to slide past a kill (or def). This requires | |
2766 // antidependencies between all uses of a single def and all kills that | |
2767 // follow, up to the next def. More edges are redundant, because later defs | |
2768 // & kills are already serialized with true or antidependencies. To keep | |
2769 // the edge count down, we add a 'pinch point' node if there's more than | |
2770 // one use or more than one kill/def. | |
2771 | |
2772 // We add dependencies in one bottom-up pass. | |
2773 | |
2774 // For each instruction we handle it's DEFs/KILLs, then it's USEs. | |
2775 | |
2776 // For each DEF/KILL, we check to see if there's a prior DEF/KILL for this | |
2777 // register. If not, we record the DEF/KILL in _reg_node, the | |
2778 // register-to-def mapping. If there is a prior DEF/KILL, we insert a | |
2779 // "pinch point", a new Node that's in the graph but not in the block. | |
2780 // We put edges from the prior and current DEF/KILLs to the pinch point. | |
2781 // We put the pinch point in _reg_node. If there's already a pinch point | |
2782 // we merely add an edge from the current DEF/KILL to the pinch point. | |
2783 | |
2784 // After doing the DEF/KILLs, we handle USEs. For each used register, we | |
2785 // put an edge from the pinch point to the USE. | |
2786 | |
2787 // To be expedient, the _reg_node array is pre-allocated for the whole | |
2788 // compilation. _reg_node is lazily initialized; it either contains a NULL, | |
2789 // or a valid def/kill/pinch-point, or a leftover node from some prior | |
2790 // block. Leftover node from some prior block is treated like a NULL (no | |
2791 // prior def, so no anti-dependence needed). Valid def is distinguished by | |
2792 // it being in the current block. | |
2793 bool fat_proj_seen = false; | |
2794 uint last_safept = _bb_end-1; | |
2795 Node* end_node = (_bb_end-1 >= _bb_start) ? b->_nodes[last_safept] : NULL; | |
2796 Node* last_safept_node = end_node; | |
2797 for( uint i = _bb_end-1; i >= _bb_start; i-- ) { | |
2798 Node *n = b->_nodes[i]; | |
2799 int is_def = n->outcnt(); // def if some uses prior to adding precedence edges | |
3842 | 2800 if( n->is_MachProj() && n->ideal_reg() == MachProjNode::fat_proj ) { |
0 | 2801 // Fat-proj kills a slew of registers |
2802 // This can add edges to 'n' and obscure whether or not it was a def, | |
2803 // hence the is_def flag. | |
2804 fat_proj_seen = true; | |
2805 RegMask rm = n->out_RegMask();// Make local copy | |
2806 while( rm.is_NotEmpty() ) { | |
2807 OptoReg::Name kill = rm.find_first_elem(); | |
2808 rm.Remove(kill); | |
2809 anti_do_def( b, n, kill, is_def ); | |
2810 } | |
2811 } else { | |
2812 // Get DEF'd registers the normal way | |
2813 anti_do_def( b, n, _regalloc->get_reg_first(n), is_def ); | |
2814 anti_do_def( b, n, _regalloc->get_reg_second(n), is_def ); | |
2815 } | |
2816 | |
3851 | 2817 // Kill projections on a branch should appear to occur on the |
2818 // branch, not afterwards, so grab the masks from the projections | |
2819 // and process them. | |
3853
11211f7cb5a0
7079317: Incorrect branch's destination block in PrintoOptoAssembly output
kvn
parents:
3851
diff
changeset
|
2820 if (n->is_MachBranch() || n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_Jump) { |
3851 | 2821 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
2822 Node* use = n->fast_out(i); | |
2823 if (use->is_Proj()) { | |
2824 RegMask rm = use->out_RegMask();// Make local copy | |
2825 while( rm.is_NotEmpty() ) { | |
2826 OptoReg::Name kill = rm.find_first_elem(); | |
2827 rm.Remove(kill); | |
2828 anti_do_def( b, n, kill, false ); | |
2829 } | |
2830 } | |
2831 } | |
2832 } | |
2833 | |
0 | 2834 // Check each register used by this instruction for a following DEF/KILL |
2835 // that must occur afterward and requires an anti-dependence edge. | |
2836 for( uint j=0; j<n->req(); j++ ) { | |
2837 Node *def = n->in(j); | |
2838 if( def ) { | |
3842 | 2839 assert( !def->is_MachProj() || def->ideal_reg() != MachProjNode::fat_proj, "" ); |
0 | 2840 anti_do_use( b, n, _regalloc->get_reg_first(def) ); |
2841 anti_do_use( b, n, _regalloc->get_reg_second(def) ); | |
2842 } | |
2843 } | |
2844 // Do not allow defs of new derived values to float above GC | |
2845 // points unless the base is definitely available at the GC point. | |
2846 | |
2847 Node *m = b->_nodes[i]; | |
2848 | |
2849 // Add precedence edge from following safepoint to use of derived pointer | |
2850 if( last_safept_node != end_node && | |
2851 m != last_safept_node) { | |
2852 for (uint k = 1; k < m->req(); k++) { | |
2853 const Type *t = m->in(k)->bottom_type(); | |
2854 if( t->isa_oop_ptr() && | |
2855 t->is_ptr()->offset() != 0 ) { | |
2856 last_safept_node->add_prec( m ); | |
2857 break; | |
2858 } | |
2859 } | |
2860 } | |
2861 | |
2862 if( n->jvms() ) { // Precedence edge from derived to safept | |
2863 // Check if last_safept_node was moved by pinch-point insertion in anti_do_use() | |
2864 if( b->_nodes[last_safept] != last_safept_node ) { | |
2865 last_safept = b->find_node(last_safept_node); | |
2866 } | |
2867 for( uint j=last_safept; j > i; j-- ) { | |
2868 Node *mach = b->_nodes[j]; | |
2869 if( mach->is_Mach() && mach->as_Mach()->ideal_Opcode() == Op_AddP ) | |
2870 mach->add_prec( n ); | |
2871 } | |
2872 last_safept = i; | |
2873 last_safept_node = m; | |
2874 } | |
2875 } | |
2876 | |
2877 if (fat_proj_seen) { | |
2878 // Garbage collect pinch nodes that were not consumed. | |
2879 // They are usually created by a fat kill MachProj for a call. | |
2880 garbage_collect_pinch_nodes(); | |
2881 } | |
2882 } | |
2883 | |
2884 //------------------------------garbage_collect_pinch_nodes------------------------------- | |
2885 | |
2886 // Garbage collect pinch nodes for reuse by other blocks. | |
2887 // | |
2888 // The block scheduler's insertion of anti-dependence | |
2889 // edges creates many pinch nodes when the block contains | |
2890 // 2 or more Calls. A pinch node is used to prevent a | |
2891 // combinatorial explosion of edges. If a set of kills for a | |
2892 // register is anti-dependent on a set of uses (or defs), rather | |
2893 // than adding an edge in the graph between each pair of kill | |
2894 // and use (or def), a pinch is inserted between them: | |
2895 // | |
2896 // use1 use2 use3 | |
2897 // \ | / | |
2898 // \ | / | |
2899 // pinch | |
2900 // / | \ | |
2901 // / | \ | |
2902 // kill1 kill2 kill3 | |
2903 // | |
2904 // One pinch node is created per register killed when | |
2905 // the second call is encountered during a backwards pass | |
2906 // over the block. Most of these pinch nodes are never | |
2907 // wired into the graph because the register is never | |
2908 // used or def'ed in the block. | |
2909 // | |
2910 void Scheduling::garbage_collect_pinch_nodes() { | |
2911 #ifndef PRODUCT | |
2912 if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:"); | |
2913 #endif | |
2914 int trace_cnt = 0; | |
2915 for (uint k = 0; k < _reg_node.Size(); k++) { | |
2916 Node* pinch = _reg_node[k]; | |
2917 if (pinch != NULL && pinch->Opcode() == Op_Node && | |
2918 // no predecence input edges | |
2919 (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) { | |
2920 cleanup_pinch(pinch); | |
2921 _pinch_free_list.push(pinch); | |
2922 _reg_node.map(k, NULL); | |
2923 #ifndef PRODUCT | |
2924 if (_cfg->C->trace_opto_output()) { | |
2925 trace_cnt++; | |
2926 if (trace_cnt > 40) { | |
2927 tty->print("\n"); | |
2928 trace_cnt = 0; | |
2929 } | |
2930 tty->print(" %d", pinch->_idx); | |
2931 } | |
2932 #endif | |
2933 } | |
2934 } | |
2935 #ifndef PRODUCT | |
2936 if (_cfg->C->trace_opto_output()) tty->print("\n"); | |
2937 #endif | |
2938 } | |
2939 | |
2940 // Clean up a pinch node for reuse. | |
2941 void Scheduling::cleanup_pinch( Node *pinch ) { | |
2942 assert (pinch && pinch->Opcode() == Op_Node && pinch->req() == 1, "just checking"); | |
2943 | |
2944 for (DUIterator_Last imin, i = pinch->last_outs(imin); i >= imin; ) { | |
2945 Node* use = pinch->last_out(i); | |
2946 uint uses_found = 0; | |
2947 for (uint j = use->req(); j < use->len(); j++) { | |
2948 if (use->in(j) == pinch) { | |
2949 use->rm_prec(j); | |
2950 uses_found++; | |
2951 } | |
2952 } | |
2953 assert(uses_found > 0, "must be a precedence edge"); | |
2954 i -= uses_found; // we deleted 1 or more copies of this edge | |
2955 } | |
2956 // May have a later_def entry | |
2957 pinch->set_req(0, NULL); | |
2958 } | |
2959 | |
2960 //------------------------------print_statistics------------------------------- | |
2961 #ifndef PRODUCT | |
2962 | |
2963 void Scheduling::dump_available() const { | |
2964 tty->print("#Availist "); | |
2965 for (uint i = 0; i < _available.size(); i++) | |
2966 tty->print(" N%d/l%d", _available[i]->_idx,_current_latency[_available[i]->_idx]); | |
2967 tty->cr(); | |
2968 } | |
2969 | |
2970 // Print Scheduling Statistics | |
2971 void Scheduling::print_statistics() { | |
2972 // Print the size added by nops for bundling | |
2973 tty->print("Nops added %d bytes to total of %d bytes", | |
2974 _total_nop_size, _total_method_size); | |
2975 if (_total_method_size > 0) | |
2976 tty->print(", for %.2f%%", | |
2977 ((double)_total_nop_size) / ((double) _total_method_size) * 100.0); | |
2978 tty->print("\n"); | |
2979 | |
2980 // Print the number of branch shadows filled | |
2981 if (Pipeline::_branch_has_delay_slot) { | |
2982 tty->print("Of %d branches, %d had unconditional delay slots filled", | |
2983 _total_branches, _total_unconditional_delays); | |
2984 if (_total_branches > 0) | |
2985 tty->print(", for %.2f%%", | |
2986 ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0); | |
2987 tty->print("\n"); | |
2988 } | |
2989 | |
2990 uint total_instructions = 0, total_bundles = 0; | |
2991 | |
2992 for (uint i = 1; i <= Pipeline::_max_instrs_per_cycle; i++) { | |
2993 uint bundle_count = _total_instructions_per_bundle[i]; | |
2994 total_instructions += bundle_count * i; | |
2995 total_bundles += bundle_count; | |
2996 } | |
2997 | |
2998 if (total_bundles > 0) | |
2999 tty->print("Average ILP (excluding nops) is %.2f\n", | |
3000 ((double)total_instructions) / ((double)total_bundles)); | |
3001 } | |
3002 #endif |