Mercurial > hg > graal-compiler
annotate src/share/vm/opto/chaitin.cpp @ 3992:d1bdeef3e3e2
7098282: G1: assert(interval >= 0) failed: Sanity check, referencePolicy.cpp: 76
Summary: There is a race between one thread successfully forwarding and copying the klass mirror for the SoftReference class (including the static master clock) and another thread attempting to use the master clock while attempting to discover a soft reference object. Maintain a shadow copy of the soft reference master clock and use the shadow during reference discovery and reference processing.
Reviewed-by: tonyp, brutisso, ysr
author | johnc |
---|---|
date | Wed, 12 Oct 2011 10:25:51 -0700 |
parents | 1d1603768966 |
children | 9b8ce46870df |
rev | line source |
---|---|
0 | 1 /* |
2426
1d1603768966
7010070: Update all 2010 Oracle-changed OpenJDK files to have the proper copyright dates - second pass
trims
parents:
2401
diff
changeset
|
2 * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1012
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1012
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1012
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "compiler/compileLog.hpp" | |
27 #include "compiler/oopMap.hpp" | |
28 #include "memory/allocation.inline.hpp" | |
29 #include "opto/addnode.hpp" | |
30 #include "opto/block.hpp" | |
31 #include "opto/callnode.hpp" | |
32 #include "opto/cfgnode.hpp" | |
33 #include "opto/chaitin.hpp" | |
34 #include "opto/coalesce.hpp" | |
35 #include "opto/connode.hpp" | |
36 #include "opto/idealGraphPrinter.hpp" | |
37 #include "opto/indexSet.hpp" | |
38 #include "opto/machnode.hpp" | |
39 #include "opto/memnode.hpp" | |
40 #include "opto/opcodes.hpp" | |
41 #include "opto/rootnode.hpp" | |
0 | 42 |
43 //============================================================================= | |
44 | |
45 #ifndef PRODUCT | |
46 void LRG::dump( ) const { | |
47 ttyLocker ttyl; | |
48 tty->print("%d ",num_regs()); | |
49 _mask.dump(); | |
50 if( _msize_valid ) { | |
51 if( mask_size() == compute_mask_size() ) tty->print(", #%d ",_mask_size); | |
52 else tty->print(", #!!!_%d_vs_%d ",_mask_size,_mask.Size()); | |
53 } else { | |
54 tty->print(", #?(%d) ",_mask.Size()); | |
55 } | |
56 | |
57 tty->print("EffDeg: "); | |
58 if( _degree_valid ) tty->print( "%d ", _eff_degree ); | |
59 else tty->print("? "); | |
60 | |
295
ea18057223c4
6732194: Data corruption dependent on -server/-client/-Xbatch
never
parents:
196
diff
changeset
|
61 if( is_multidef() ) { |
0 | 62 tty->print("MultiDef "); |
63 if (_defs != NULL) { | |
64 tty->print("("); | |
65 for (int i = 0; i < _defs->length(); i++) { | |
66 tty->print("N%d ", _defs->at(i)->_idx); | |
67 } | |
68 tty->print(") "); | |
69 } | |
70 } | |
71 else if( _def == 0 ) tty->print("Dead "); | |
72 else tty->print("Def: N%d ",_def->_idx); | |
73 | |
74 tty->print("Cost:%4.2g Area:%4.2g Score:%4.2g ",_cost,_area, score()); | |
75 // Flags | |
76 if( _is_oop ) tty->print("Oop "); | |
77 if( _is_float ) tty->print("Float "); | |
78 if( _was_spilled1 ) tty->print("Spilled "); | |
79 if( _was_spilled2 ) tty->print("Spilled2 "); | |
80 if( _direct_conflict ) tty->print("Direct_conflict "); | |
81 if( _fat_proj ) tty->print("Fat "); | |
82 if( _was_lo ) tty->print("Lo "); | |
83 if( _has_copy ) tty->print("Copy "); | |
84 if( _at_risk ) tty->print("Risk "); | |
85 | |
86 if( _must_spill ) tty->print("Must_spill "); | |
87 if( _is_bound ) tty->print("Bound "); | |
88 if( _msize_valid ) { | |
89 if( _degree_valid && lo_degree() ) tty->print("Trivial "); | |
90 } | |
91 | |
92 tty->cr(); | |
93 } | |
94 #endif | |
95 | |
96 //------------------------------score------------------------------------------ | |
97 // Compute score from cost and area. Low score is best to spill. | |
98 static double raw_score( double cost, double area ) { | |
99 return cost - (area*RegisterCostAreaRatio) * 1.52588e-5; | |
100 } | |
101 | |
102 double LRG::score() const { | |
103 // Scale _area by RegisterCostAreaRatio/64K then subtract from cost. | |
104 // Bigger area lowers score, encourages spilling this live range. | |
105 // Bigger cost raise score, prevents spilling this live range. | |
106 // (Note: 1/65536 is the magic constant below; I dont trust the C optimizer | |
107 // to turn a divide by a constant into a multiply by the reciprical). | |
108 double score = raw_score( _cost, _area); | |
109 | |
110 // Account for area. Basically, LRGs covering large areas are better | |
111 // to spill because more other LRGs get freed up. | |
112 if( _area == 0.0 ) // No area? Then no progress to spill | |
113 return 1e35; | |
114 | |
115 if( _was_spilled2 ) // If spilled once before, we are unlikely | |
116 return score + 1e30; // to make progress again. | |
117 | |
118 if( _cost >= _area*3.0 ) // Tiny area relative to cost | |
119 return score + 1e17; // Probably no progress to spill | |
120 | |
121 if( (_cost+_cost) >= _area*3.0 ) // Small area relative to cost | |
122 return score + 1e10; // Likely no progress to spill | |
123 | |
124 return score; | |
125 } | |
126 | |
127 //------------------------------LRG_List--------------------------------------- | |
128 LRG_List::LRG_List( uint max ) : _cnt(max), _max(max), _lidxs(NEW_RESOURCE_ARRAY(uint,max)) { | |
129 memset( _lidxs, 0, sizeof(uint)*max ); | |
130 } | |
131 | |
132 void LRG_List::extend( uint nidx, uint lidx ) { | |
133 _nesting.check(); | |
134 if( nidx >= _max ) { | |
135 uint size = 16; | |
136 while( size <= nidx ) size <<=1; | |
137 _lidxs = REALLOC_RESOURCE_ARRAY( uint, _lidxs, _max, size ); | |
138 _max = size; | |
139 } | |
140 while( _cnt <= nidx ) | |
141 _lidxs[_cnt++] = 0; | |
142 _lidxs[nidx] = lidx; | |
143 } | |
144 | |
145 #define NUMBUCKS 3 | |
146 | |
147 //------------------------------Chaitin---------------------------------------- | |
148 PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher) | |
149 : PhaseRegAlloc(unique, cfg, matcher, | |
150 #ifndef PRODUCT | |
151 print_chaitin_statistics | |
152 #else | |
153 NULL | |
154 #endif | |
155 ), | |
156 _names(unique), _uf_map(unique), | |
157 _maxlrg(0), _live(0), | |
158 _spilled_once(Thread::current()->resource_area()), | |
159 _spilled_twice(Thread::current()->resource_area()), | |
160 _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0), | |
161 _oldphi(unique) | |
162 #ifndef PRODUCT | |
163 , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling")) | |
164 #endif | |
165 { | |
166 NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); ) | |
673 | 167 |
168 _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg._outer_loop_freq); | |
169 | |
0 | 170 uint i,j; |
171 // Build a list of basic blocks, sorted by frequency | |
172 _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks ); | |
173 // Experiment with sorting strategies to speed compilation | |
174 double cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket | |
175 Block **buckets[NUMBUCKS]; // Array of buckets | |
176 uint buckcnt[NUMBUCKS]; // Array of bucket counters | |
177 double buckval[NUMBUCKS]; // Array of bucket value cutoffs | |
178 for( i = 0; i < NUMBUCKS; i++ ) { | |
179 buckets[i] = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks ); | |
180 buckcnt[i] = 0; | |
181 // Bump by three orders of magnitude each time | |
182 cutoff *= 0.001; | |
183 buckval[i] = cutoff; | |
184 for( j = 0; j < _cfg._num_blocks; j++ ) { | |
185 buckets[i][j] = NULL; | |
186 } | |
187 } | |
188 // Sort blocks into buckets | |
189 for( i = 0; i < _cfg._num_blocks; i++ ) { | |
190 for( j = 0; j < NUMBUCKS; j++ ) { | |
191 if( (j == NUMBUCKS-1) || (_cfg._blocks[i]->_freq > buckval[j]) ) { | |
192 // Assign block to end of list for appropriate bucket | |
193 buckets[j][buckcnt[j]++] = _cfg._blocks[i]; | |
194 break; // kick out of inner loop | |
195 } | |
196 } | |
197 } | |
198 // Dump buckets into final block array | |
199 uint blkcnt = 0; | |
200 for( i = 0; i < NUMBUCKS; i++ ) { | |
201 for( j = 0; j < buckcnt[i]; j++ ) { | |
202 _blks[blkcnt++] = buckets[i][j]; | |
203 } | |
204 } | |
205 | |
206 assert(blkcnt == _cfg._num_blocks, "Block array not totally filled"); | |
207 } | |
208 | |
209 void PhaseChaitin::Register_Allocate() { | |
210 | |
211 // Above the OLD FP (and in registers) are the incoming arguments. Stack | |
212 // slots in this area are called "arg_slots". Above the NEW FP (and in | |
213 // registers) is the outgoing argument area; above that is the spill/temp | |
214 // area. These are all "frame_slots". Arg_slots start at the zero | |
215 // stack_slots and count up to the known arg_size. Frame_slots start at | |
216 // the stack_slot #arg_size and go up. After allocation I map stack | |
217 // slots to actual offsets. Stack-slots in the arg_slot area are biased | |
218 // by the frame_size; stack-slots in the frame_slot area are biased by 0. | |
219 | |
220 _trip_cnt = 0; | |
221 _alternate = 0; | |
222 _matcher._allocation_started = true; | |
223 | |
224 ResourceArea live_arena; // Arena for liveness & IFG info | |
225 ResourceMark rm(&live_arena); | |
226 | |
227 // Need live-ness for the IFG; need the IFG for coalescing. If the | |
228 // liveness is JUST for coalescing, then I can get some mileage by renaming | |
229 // all copy-related live ranges low and then using the max copy-related | |
230 // live range as a cut-off for LIVE and the IFG. In other words, I can | |
231 // build a subset of LIVE and IFG just for copies. | |
232 PhaseLive live(_cfg,_names,&live_arena); | |
233 | |
234 // Need IFG for coalescing and coloring | |
235 PhaseIFG ifg( &live_arena ); | |
236 _ifg = &ifg; | |
237 | |
238 if (C->unique() > _names.Size()) _names.extend(C->unique()-1, 0); | |
239 | |
240 // Come out of SSA world to the Named world. Assign (virtual) registers to | |
241 // Nodes. Use the same register for all inputs and the output of PhiNodes | |
242 // - effectively ending SSA form. This requires either coalescing live | |
243 // ranges or inserting copies. For the moment, we insert "virtual copies" | |
244 // - we pretend there is a copy prior to each Phi in predecessor blocks. | |
245 // We will attempt to coalesce such "virtual copies" before we manifest | |
246 // them for real. | |
247 de_ssa(); | |
248 | |
566
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
249 #ifdef ASSERT |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
250 // Veify the graph before RA. |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
251 verify(&live_arena); |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
252 #endif |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
253 |
0 | 254 { |
255 NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) | |
256 _live = NULL; // Mark live as being not available | |
257 rm.reset_to_mark(); // Reclaim working storage | |
258 IndexSet::reset_memory(C, &live_arena); | |
259 ifg.init(_maxlrg); // Empty IFG | |
260 gather_lrg_masks( false ); // Collect LRG masks | |
261 live.compute( _maxlrg ); // Compute liveness | |
262 _live = &live; // Mark LIVE as being available | |
263 } | |
264 | |
265 // Base pointers are currently "used" by instructions which define new | |
266 // derived pointers. This makes base pointers live up to the where the | |
267 // derived pointer is made, but not beyond. Really, they need to be live | |
268 // across any GC point where the derived value is live. So this code looks | |
269 // at all the GC points, and "stretches" the live range of any base pointer | |
270 // to the GC point. | |
271 if( stretch_base_pointer_live_ranges(&live_arena) ) { | |
272 NOT_PRODUCT( Compile::TracePhase t3("computeLive (sbplr)", &_t_computeLive, TimeCompiler); ) | |
273 // Since some live range stretched, I need to recompute live | |
274 _live = NULL; | |
275 rm.reset_to_mark(); // Reclaim working storage | |
276 IndexSet::reset_memory(C, &live_arena); | |
277 ifg.init(_maxlrg); | |
278 gather_lrg_masks( false ); | |
279 live.compute( _maxlrg ); | |
280 _live = &live; | |
281 } | |
282 // Create the interference graph using virtual copies | |
283 build_ifg_virtual( ); // Include stack slots this time | |
284 | |
285 // Aggressive (but pessimistic) copy coalescing. | |
286 // This pass works on virtual copies. Any virtual copies which are not | |
287 // coalesced get manifested as actual copies | |
288 { | |
289 // The IFG is/was triangular. I am 'squaring it up' so Union can run | |
290 // faster. Union requires a 'for all' operation which is slow on the | |
291 // triangular adjacency matrix (quick reminder: the IFG is 'sparse' - | |
292 // meaning I can visit all the Nodes neighbors less than a Node in time | |
293 // O(# of neighbors), but I have to visit all the Nodes greater than a | |
294 // given Node and search them for an instance, i.e., time O(#MaxLRG)). | |
295 _ifg->SquareUp(); | |
296 | |
297 PhaseAggressiveCoalesce coalesce( *this ); | |
298 coalesce.coalesce_driver( ); | |
299 // Insert un-coalesced copies. Visit all Phis. Where inputs to a Phi do | |
300 // not match the Phi itself, insert a copy. | |
301 coalesce.insert_copies(_matcher); | |
302 } | |
303 | |
304 // After aggressive coalesce, attempt a first cut at coloring. | |
305 // To color, we need the IFG and for that we need LIVE. | |
306 { | |
307 NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) | |
308 _live = NULL; | |
309 rm.reset_to_mark(); // Reclaim working storage | |
310 IndexSet::reset_memory(C, &live_arena); | |
311 ifg.init(_maxlrg); | |
312 gather_lrg_masks( true ); | |
313 live.compute( _maxlrg ); | |
314 _live = &live; | |
315 } | |
316 | |
317 // Build physical interference graph | |
318 uint must_spill = 0; | |
319 must_spill = build_ifg_physical( &live_arena ); | |
320 // If we have a guaranteed spill, might as well spill now | |
321 if( must_spill ) { | |
322 if( !_maxlrg ) return; | |
323 // Bail out if unique gets too large (ie - unique > MaxNodeLimit) | |
324 C->check_node_count(10*must_spill, "out of nodes before split"); | |
325 if (C->failing()) return; | |
326 _maxlrg = Split( _maxlrg ); // Split spilling LRG everywhere | |
327 // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor) | |
328 // or we failed to split | |
329 C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split"); | |
330 if (C->failing()) return; | |
331 | |
332 NOT_PRODUCT( C->verify_graph_edges(); ) | |
333 | |
334 compact(); // Compact LRGs; return new lower max lrg | |
335 | |
336 { | |
337 NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) | |
338 _live = NULL; | |
339 rm.reset_to_mark(); // Reclaim working storage | |
340 IndexSet::reset_memory(C, &live_arena); | |
341 ifg.init(_maxlrg); // Build a new interference graph | |
342 gather_lrg_masks( true ); // Collect intersect mask | |
343 live.compute( _maxlrg ); // Compute LIVE | |
344 _live = &live; | |
345 } | |
346 build_ifg_physical( &live_arena ); | |
347 _ifg->SquareUp(); | |
348 _ifg->Compute_Effective_Degree(); | |
349 // Only do conservative coalescing if requested | |
350 if( OptoCoalesce ) { | |
351 // Conservative (and pessimistic) copy coalescing of those spills | |
352 PhaseConservativeCoalesce coalesce( *this ); | |
353 // If max live ranges greater than cutoff, don't color the stack. | |
354 // This cutoff can be larger than below since it is only done once. | |
355 coalesce.coalesce_driver( ); | |
356 } | |
357 compress_uf_map_for_nodes(); | |
358 | |
359 #ifdef ASSERT | |
566
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
360 verify(&live_arena, true); |
0 | 361 #endif |
362 } else { | |
363 ifg.SquareUp(); | |
364 ifg.Compute_Effective_Degree(); | |
365 #ifdef ASSERT | |
366 set_was_low(); | |
367 #endif | |
368 } | |
369 | |
370 // Prepare for Simplify & Select | |
371 cache_lrg_info(); // Count degree of LRGs | |
372 | |
373 // Simplify the InterFerence Graph by removing LRGs of low degree. | |
374 // LRGs of low degree are trivially colorable. | |
375 Simplify(); | |
376 | |
377 // Select colors by re-inserting LRGs back into the IFG in reverse order. | |
378 // Return whether or not something spills. | |
379 uint spills = Select( ); | |
380 | |
381 // If we spill, split and recycle the entire thing | |
382 while( spills ) { | |
383 if( _trip_cnt++ > 24 ) { | |
384 DEBUG_ONLY( dump_for_spill_split_recycle(); ) | |
385 if( _trip_cnt > 27 ) { | |
386 C->record_method_not_compilable("failed spill-split-recycle sanity check"); | |
387 return; | |
388 } | |
389 } | |
390 | |
391 if( !_maxlrg ) return; | |
392 _maxlrg = Split( _maxlrg ); // Split spilling LRG everywhere | |
393 // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor) | |
394 C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after split"); | |
395 if (C->failing()) return; | |
396 | |
397 compact(); // Compact LRGs; return new lower max lrg | |
398 | |
399 // Nuke the live-ness and interference graph and LiveRanGe info | |
400 { | |
401 NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) | |
402 _live = NULL; | |
403 rm.reset_to_mark(); // Reclaim working storage | |
404 IndexSet::reset_memory(C, &live_arena); | |
405 ifg.init(_maxlrg); | |
406 | |
407 // Create LiveRanGe array. | |
408 // Intersect register masks for all USEs and DEFs | |
409 gather_lrg_masks( true ); | |
410 live.compute( _maxlrg ); | |
411 _live = &live; | |
412 } | |
413 must_spill = build_ifg_physical( &live_arena ); | |
414 _ifg->SquareUp(); | |
415 _ifg->Compute_Effective_Degree(); | |
416 | |
417 // Only do conservative coalescing if requested | |
418 if( OptoCoalesce ) { | |
419 // Conservative (and pessimistic) copy coalescing | |
420 PhaseConservativeCoalesce coalesce( *this ); | |
421 // Check for few live ranges determines how aggressive coalesce is. | |
422 coalesce.coalesce_driver( ); | |
423 } | |
424 compress_uf_map_for_nodes(); | |
425 #ifdef ASSERT | |
566
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
426 verify(&live_arena, true); |
0 | 427 #endif |
428 cache_lrg_info(); // Count degree of LRGs | |
429 | |
430 // Simplify the InterFerence Graph by removing LRGs of low degree. | |
431 // LRGs of low degree are trivially colorable. | |
432 Simplify(); | |
433 | |
434 // Select colors by re-inserting LRGs back into the IFG in reverse order. | |
435 // Return whether or not something spills. | |
436 spills = Select( ); | |
437 } | |
438 | |
439 // Count number of Simplify-Select trips per coloring success. | |
440 _allocator_attempts += _trip_cnt + 1; | |
441 _allocator_successes += 1; | |
442 | |
443 // Peephole remove copies | |
444 post_allocate_copy_removal(); | |
445 | |
566
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
446 #ifdef ASSERT |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
447 // Veify the graph after RA. |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
448 verify(&live_arena); |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
449 #endif |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
450 |
0 | 451 // max_reg is past the largest *register* used. |
452 // Convert that to a frame_slot number. | |
453 if( _max_reg <= _matcher._new_SP ) | |
454 _framesize = C->out_preserve_stack_slots(); | |
455 else _framesize = _max_reg -_matcher._new_SP; | |
456 assert((int)(_matcher._new_SP+_framesize) >= (int)_matcher._out_arg_limit, "framesize must be large enough"); | |
457 | |
458 // This frame must preserve the required fp alignment | |
419
0bf25c4807f9
6761594: framesize rounding code rounds using wrong units leading to slightly oversized frames
never
parents:
295
diff
changeset
|
459 _framesize = round_to(_framesize, Matcher::stack_alignment_in_slots()); |
0 | 460 assert( _framesize >= 0 && _framesize <= 1000000, "sanity check" ); |
461 #ifndef PRODUCT | |
462 _total_framesize += _framesize; | |
463 if( (int)_framesize > _max_framesize ) | |
464 _max_framesize = _framesize; | |
465 #endif | |
466 | |
467 // Convert CISC spills | |
468 fixup_spills(); | |
469 | |
470 // Log regalloc results | |
471 CompileLog* log = Compile::current()->log(); | |
472 if (log != NULL) { | |
473 log->elem("regalloc attempts='%d' success='%d'", _trip_cnt, !C->failing()); | |
474 } | |
475 | |
476 if (C->failing()) return; | |
477 | |
478 NOT_PRODUCT( C->verify_graph_edges(); ) | |
479 | |
480 // Move important info out of the live_arena to longer lasting storage. | |
481 alloc_node_regs(_names.Size()); | |
482 for( uint i=0; i < _names.Size(); i++ ) { | |
483 if( _names[i] ) { // Live range associated with Node? | |
484 LRG &lrg = lrgs( _names[i] ); | |
485 if( lrg.num_regs() == 1 ) { | |
486 _node_regs[i].set1( lrg.reg() ); | |
487 } else { // Must be a register-pair | |
488 if( !lrg._fat_proj ) { // Must be aligned adjacent register pair | |
489 // Live ranges record the highest register in their mask. | |
490 // We want the low register for the AD file writer's convenience. | |
491 _node_regs[i].set2( OptoReg::add(lrg.reg(),-1) ); | |
492 } else { // Misaligned; extract 2 bits | |
493 OptoReg::Name hi = lrg.reg(); // Get hi register | |
494 lrg.Remove(hi); // Yank from mask | |
495 int lo = lrg.mask().find_first_elem(); // Find lo | |
496 _node_regs[i].set_pair( hi, lo ); | |
497 } | |
498 } | |
499 if( lrg._is_oop ) _node_oops.set(i); | |
500 } else { | |
501 _node_regs[i].set_bad(); | |
502 } | |
503 } | |
504 | |
505 // Done! | |
506 _live = NULL; | |
507 _ifg = NULL; | |
508 C->set_indexSet_arena(NULL); // ResourceArea is at end of scope | |
509 } | |
510 | |
511 //------------------------------de_ssa----------------------------------------- | |
512 void PhaseChaitin::de_ssa() { | |
513 // Set initial Names for all Nodes. Most Nodes get the virtual register | |
514 // number. A few get the ZERO live range number. These do not | |
515 // get allocated, but instead rely on correct scheduling to ensure that | |
516 // only one instance is simultaneously live at a time. | |
517 uint lr_counter = 1; | |
518 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
519 Block *b = _cfg._blocks[i]; | |
520 uint cnt = b->_nodes.size(); | |
521 | |
522 // Handle all the normal Nodes in the block | |
523 for( uint j = 0; j < cnt; j++ ) { | |
524 Node *n = b->_nodes[j]; | |
525 // Pre-color to the zero live range, or pick virtual register | |
526 const RegMask &rm = n->out_RegMask(); | |
527 _names.map( n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0 ); | |
528 } | |
529 } | |
530 // Reset the Union-Find mapping to be identity | |
531 reset_uf_map(lr_counter); | |
532 } | |
533 | |
534 | |
535 //------------------------------gather_lrg_masks------------------------------- | |
536 // Gather LiveRanGe information, including register masks. Modification of | |
537 // cisc spillable in_RegMasks should not be done before AggressiveCoalesce. | |
538 void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { | |
539 | |
540 // Nail down the frame pointer live range | |
541 uint fp_lrg = n2lidx(_cfg._root->in(1)->in(TypeFunc::FramePtr)); | |
542 lrgs(fp_lrg)._cost += 1e12; // Cost is infinite | |
543 | |
544 // For all blocks | |
545 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
546 Block *b = _cfg._blocks[i]; | |
547 | |
548 // For all instructions | |
549 for( uint j = 1; j < b->_nodes.size(); j++ ) { | |
550 Node *n = b->_nodes[j]; | |
551 uint input_edge_start =1; // Skip control most nodes | |
552 if( n->is_Mach() ) input_edge_start = n->as_Mach()->oper_input_base(); | |
553 uint idx = n->is_Copy(); | |
554 | |
555 // Get virtual register number, same as LiveRanGe index | |
556 uint vreg = n2lidx(n); | |
557 LRG &lrg = lrgs(vreg); | |
558 if( vreg ) { // No vreg means un-allocable (e.g. memory) | |
559 | |
560 // Collect has-copy bit | |
561 if( idx ) { | |
562 lrg._has_copy = 1; | |
563 uint clidx = n2lidx(n->in(idx)); | |
564 LRG ©_src = lrgs(clidx); | |
565 copy_src._has_copy = 1; | |
566 } | |
567 | |
568 // Check for float-vs-int live range (used in register-pressure | |
569 // calculations) | |
570 const Type *n_type = n->bottom_type(); | |
571 if( n_type->is_floatingpoint() ) | |
572 lrg._is_float = 1; | |
573 | |
574 // Check for twice prior spilling. Once prior spilling might have | |
575 // spilled 'soft', 2nd prior spill should have spilled 'hard' and | |
576 // further spilling is unlikely to make progress. | |
577 if( _spilled_once.test(n->_idx) ) { | |
578 lrg._was_spilled1 = 1; | |
579 if( _spilled_twice.test(n->_idx) ) | |
580 lrg._was_spilled2 = 1; | |
581 } | |
582 | |
583 #ifndef PRODUCT | |
584 if (trace_spilling() && lrg._def != NULL) { | |
585 // collect defs for MultiDef printing | |
586 if (lrg._defs == NULL) { | |
1685 | 587 lrg._defs = new (_ifg->_arena) GrowableArray<Node*>(_ifg->_arena, 2, 0, NULL); |
0 | 588 lrg._defs->append(lrg._def); |
589 } | |
590 lrg._defs->append(n); | |
591 } | |
592 #endif | |
593 | |
594 // Check for a single def LRG; these can spill nicely | |
595 // via rematerialization. Flag as NULL for no def found | |
596 // yet, or 'n' for single def or -1 for many defs. | |
597 lrg._def = lrg._def ? NodeSentinel : n; | |
598 | |
599 // Limit result register mask to acceptable registers | |
600 const RegMask &rm = n->out_RegMask(); | |
601 lrg.AND( rm ); | |
602 // Check for bound register masks | |
603 const RegMask &lrgmask = lrg.mask(); | |
604 if( lrgmask.is_bound1() || lrgmask.is_bound2() ) | |
605 lrg._is_bound = 1; | |
606 | |
607 // Check for maximum frequency value | |
608 if( lrg._maxfreq < b->_freq ) | |
609 lrg._maxfreq = b->_freq; | |
610 | |
611 int ireg = n->ideal_reg(); | |
612 assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP, | |
613 "oops must be in Op_RegP's" ); | |
614 // Check for oop-iness, or long/double | |
615 // Check for multi-kill projection | |
616 switch( ireg ) { | |
617 case MachProjNode::fat_proj: | |
618 // Fat projections have size equal to number of registers killed | |
619 lrg.set_num_regs(rm.Size()); | |
620 lrg.set_reg_pressure(lrg.num_regs()); | |
621 lrg._fat_proj = 1; | |
622 lrg._is_bound = 1; | |
623 break; | |
624 case Op_RegP: | |
625 #ifdef _LP64 | |
626 lrg.set_num_regs(2); // Size is 2 stack words | |
627 #else | |
628 lrg.set_num_regs(1); // Size is 1 stack word | |
629 #endif | |
630 // Register pressure is tracked relative to the maximum values | |
631 // suggested for that platform, INTPRESSURE and FLOATPRESSURE, | |
632 // and relative to other types which compete for the same regs. | |
633 // | |
634 // The following table contains suggested values based on the | |
635 // architectures as defined in each .ad file. | |
636 // INTPRESSURE and FLOATPRESSURE may be tuned differently for | |
637 // compile-speed or performance. | |
638 // Note1: | |
639 // SPARC and SPARCV9 reg_pressures are at 2 instead of 1 | |
640 // since .ad registers are defined as high and low halves. | |
641 // These reg_pressure values remain compatible with the code | |
642 // in is_high_pressure() which relates get_invalid_mask_size(), | |
643 // Block::_reg_pressure and INTPRESSURE, FLOATPRESSURE. | |
644 // Note2: | |
645 // SPARC -d32 has 24 registers available for integral values, | |
646 // but only 10 of these are safe for 64-bit longs. | |
647 // Using set_reg_pressure(2) for both int and long means | |
648 // the allocator will believe it can fit 26 longs into | |
649 // registers. Using 2 for longs and 1 for ints means the | |
650 // allocator will attempt to put 52 integers into registers. | |
651 // The settings below limit this problem to methods with | |
652 // many long values which are being run on 32-bit SPARC. | |
653 // | |
654 // ------------------- reg_pressure -------------------- | |
655 // Each entry is reg_pressure_per_value,number_of_regs | |
656 // RegL RegI RegFlags RegF RegD INTPRESSURE FLOATPRESSURE | |
657 // IA32 2 1 1 1 1 6 6 | |
658 // IA64 1 1 1 1 1 50 41 | |
659 // SPARC 2 2 2 2 2 48 (24) 52 (26) | |
660 // SPARCV9 2 2 2 2 2 48 (24) 52 (26) | |
661 // AMD64 1 1 1 1 1 14 15 | |
662 // ----------------------------------------------------- | |
663 #if defined(SPARC) | |
664 lrg.set_reg_pressure(2); // use for v9 as well | |
665 #else | |
666 lrg.set_reg_pressure(1); // normally one value per register | |
667 #endif | |
668 if( n_type->isa_oop_ptr() ) { | |
669 lrg._is_oop = 1; | |
670 } | |
671 break; | |
672 case Op_RegL: // Check for long or double | |
673 case Op_RegD: | |
674 lrg.set_num_regs(2); | |
675 // Define platform specific register pressure | |
2401
7e88bdae86ec
7029017: Additional architecture support for c2 compiler
roland
parents:
2016
diff
changeset
|
676 #if defined(SPARC) || defined(ARM) |
0 | 677 lrg.set_reg_pressure(2); |
678 #elif defined(IA32) | |
679 if( ireg == Op_RegL ) { | |
680 lrg.set_reg_pressure(2); | |
681 } else { | |
682 lrg.set_reg_pressure(1); | |
683 } | |
684 #else | |
685 lrg.set_reg_pressure(1); // normally one value per register | |
686 #endif | |
687 // If this def of a double forces a mis-aligned double, | |
688 // flag as '_fat_proj' - really flag as allowing misalignment | |
689 // AND changes how we count interferences. A mis-aligned | |
690 // double can interfere with TWO aligned pairs, or effectively | |
691 // FOUR registers! | |
692 if( rm.is_misaligned_Pair() ) { | |
693 lrg._fat_proj = 1; | |
694 lrg._is_bound = 1; | |
695 } | |
696 break; | |
697 case Op_RegF: | |
698 case Op_RegI: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
699 case Op_RegN: |
0 | 700 case Op_RegFlags: |
701 case 0: // not an ideal register | |
702 lrg.set_num_regs(1); | |
703 #ifdef SPARC | |
704 lrg.set_reg_pressure(2); | |
705 #else | |
706 lrg.set_reg_pressure(1); | |
707 #endif | |
708 break; | |
709 default: | |
710 ShouldNotReachHere(); | |
711 } | |
712 } | |
713 | |
714 // Now do the same for inputs | |
715 uint cnt = n->req(); | |
716 // Setup for CISC SPILLING | |
717 uint inp = (uint)AdlcVMDeps::Not_cisc_spillable; | |
718 if( UseCISCSpill && after_aggressive ) { | |
719 inp = n->cisc_operand(); | |
720 if( inp != (uint)AdlcVMDeps::Not_cisc_spillable ) | |
721 // Convert operand number to edge index number | |
722 inp = n->as_Mach()->operand_index(inp); | |
723 } | |
724 // Prepare register mask for each input | |
725 for( uint k = input_edge_start; k < cnt; k++ ) { | |
726 uint vreg = n2lidx(n->in(k)); | |
727 if( !vreg ) continue; | |
728 | |
729 // If this instruction is CISC Spillable, add the flags | |
730 // bit to its appropriate input | |
731 if( UseCISCSpill && after_aggressive && inp == k ) { | |
732 #ifndef PRODUCT | |
733 if( TraceCISCSpill ) { | |
734 tty->print(" use_cisc_RegMask: "); | |
735 n->dump(); | |
736 } | |
737 #endif | |
738 n->as_Mach()->use_cisc_RegMask(); | |
739 } | |
740 | |
741 LRG &lrg = lrgs(vreg); | |
742 // // Testing for floating point code shape | |
743 // Node *test = n->in(k); | |
744 // if( test->is_Mach() ) { | |
745 // MachNode *m = test->as_Mach(); | |
746 // int op = m->ideal_Opcode(); | |
747 // if (n->is_Call() && (op == Op_AddF || op == Op_MulF) ) { | |
748 // int zzz = 1; | |
749 // } | |
750 // } | |
751 | |
752 // Limit result register mask to acceptable registers. | |
753 // Do not limit registers from uncommon uses before | |
754 // AggressiveCoalesce. This effectively pre-virtual-splits | |
755 // around uncommon uses of common defs. | |
756 const RegMask &rm = n->in_RegMask(k); | |
757 if( !after_aggressive && | |
758 _cfg._bbs[n->in(k)->_idx]->_freq > 1000*b->_freq ) { | |
759 // Since we are BEFORE aggressive coalesce, leave the register | |
760 // mask untrimmed by the call. This encourages more coalescing. | |
761 // Later, AFTER aggressive, this live range will have to spill | |
762 // but the spiller handles slow-path calls very nicely. | |
763 } else { | |
764 lrg.AND( rm ); | |
765 } | |
766 // Check for bound register masks | |
767 const RegMask &lrgmask = lrg.mask(); | |
768 if( lrgmask.is_bound1() || lrgmask.is_bound2() ) | |
769 lrg._is_bound = 1; | |
770 // If this use of a double forces a mis-aligned double, | |
771 // flag as '_fat_proj' - really flag as allowing misalignment | |
772 // AND changes how we count interferences. A mis-aligned | |
773 // double can interfere with TWO aligned pairs, or effectively | |
774 // FOUR registers! | |
775 if( lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_Pair() ) { | |
776 lrg._fat_proj = 1; | |
777 lrg._is_bound = 1; | |
778 } | |
779 // if the LRG is an unaligned pair, we will have to spill | |
780 // so clear the LRG's register mask if it is not already spilled | |
781 if ( !n->is_SpillCopy() && | |
295
ea18057223c4
6732194: Data corruption dependent on -server/-client/-Xbatch
never
parents:
196
diff
changeset
|
782 (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) && |
0 | 783 lrgmask.is_misaligned_Pair()) { |
784 lrg.Clear(); | |
785 } | |
786 | |
787 // Check for maximum frequency value | |
788 if( lrg._maxfreq < b->_freq ) | |
789 lrg._maxfreq = b->_freq; | |
790 | |
791 } // End for all allocated inputs | |
792 } // end for all instructions | |
793 } // end for all blocks | |
794 | |
795 // Final per-liverange setup | |
796 for( uint i2=0; i2<_maxlrg; i2++ ) { | |
797 LRG &lrg = lrgs(i2); | |
798 if( lrg.num_regs() == 2 && !lrg._fat_proj ) | |
799 lrg.ClearToPairs(); | |
800 lrg.compute_set_mask_size(); | |
801 if( lrg.not_free() ) { // Handle case where we lose from the start | |
802 lrg.set_reg(OptoReg::Name(LRG::SPILL_REG)); | |
803 lrg._direct_conflict = 1; | |
804 } | |
805 lrg.set_degree(0); // no neighbors in IFG yet | |
806 } | |
807 } | |
808 | |
809 //------------------------------set_was_low------------------------------------ | |
810 // Set the was-lo-degree bit. Conservative coalescing should not change the | |
811 // colorability of the graph. If any live range was of low-degree before | |
812 // coalescing, it should Simplify. This call sets the was-lo-degree bit. | |
813 // The bit is checked in Simplify. | |
814 void PhaseChaitin::set_was_low() { | |
815 #ifdef ASSERT | |
816 for( uint i = 1; i < _maxlrg; i++ ) { | |
817 int size = lrgs(i).num_regs(); | |
818 uint old_was_lo = lrgs(i)._was_lo; | |
819 lrgs(i)._was_lo = 0; | |
820 if( lrgs(i).lo_degree() ) { | |
821 lrgs(i)._was_lo = 1; // Trivially of low degree | |
822 } else { // Else check the Brigg's assertion | |
823 // Brigg's observation is that the lo-degree neighbors of a | |
824 // hi-degree live range will not interfere with the color choices | |
825 // of said hi-degree live range. The Simplify reverse-stack-coloring | |
826 // order takes care of the details. Hence you do not have to count | |
827 // low-degree neighbors when determining if this guy colors. | |
828 int briggs_degree = 0; | |
829 IndexSet *s = _ifg->neighbors(i); | |
830 IndexSetIterator elements(s); | |
831 uint lidx; | |
832 while((lidx = elements.next()) != 0) { | |
833 if( !lrgs(lidx).lo_degree() ) | |
834 briggs_degree += MAX2(size,lrgs(lidx).num_regs()); | |
835 } | |
836 if( briggs_degree < lrgs(i).degrees_of_freedom() ) | |
837 lrgs(i)._was_lo = 1; // Low degree via the briggs assertion | |
838 } | |
839 assert(old_was_lo <= lrgs(i)._was_lo, "_was_lo may not decrease"); | |
840 } | |
841 #endif | |
842 } | |
843 | |
844 #define REGISTER_CONSTRAINED 16 | |
845 | |
846 //------------------------------cache_lrg_info--------------------------------- | |
847 // Compute cost/area ratio, in case we spill. Build the lo-degree list. | |
848 void PhaseChaitin::cache_lrg_info( ) { | |
849 | |
850 for( uint i = 1; i < _maxlrg; i++ ) { | |
851 LRG &lrg = lrgs(i); | |
852 | |
853 // Check for being of low degree: means we can be trivially colored. | |
854 // Low degree, dead or must-spill guys just get to simplify right away | |
855 if( lrg.lo_degree() || | |
856 !lrg.alive() || | |
857 lrg._must_spill ) { | |
858 // Split low degree list into those guys that must get a | |
859 // register and those that can go to register or stack. | |
860 // The idea is LRGs that can go register or stack color first when | |
861 // they have a good chance of getting a register. The register-only | |
862 // lo-degree live ranges always get a register. | |
863 OptoReg::Name hi_reg = lrg.mask().find_last_elem(); | |
864 if( OptoReg::is_stack(hi_reg)) { // Can go to stack? | |
865 lrg._next = _lo_stk_degree; | |
866 _lo_stk_degree = i; | |
867 } else { | |
868 lrg._next = _lo_degree; | |
869 _lo_degree = i; | |
870 } | |
871 } else { // Else high degree | |
872 lrgs(_hi_degree)._prev = i; | |
873 lrg._next = _hi_degree; | |
874 lrg._prev = 0; | |
875 _hi_degree = i; | |
876 } | |
877 } | |
878 } | |
879 | |
880 //------------------------------Pre-Simplify----------------------------------- | |
881 // Simplify the IFG by removing LRGs of low degree that have NO copies | |
882 void PhaseChaitin::Pre_Simplify( ) { | |
883 | |
884 // Warm up the lo-degree no-copy list | |
885 int lo_no_copy = 0; | |
886 for( uint i = 1; i < _maxlrg; i++ ) { | |
887 if( (lrgs(i).lo_degree() && !lrgs(i)._has_copy) || | |
888 !lrgs(i).alive() || | |
889 lrgs(i)._must_spill ) { | |
890 lrgs(i)._next = lo_no_copy; | |
891 lo_no_copy = i; | |
892 } | |
893 } | |
894 | |
895 while( lo_no_copy ) { | |
896 uint lo = lo_no_copy; | |
897 lo_no_copy = lrgs(lo)._next; | |
898 int size = lrgs(lo).num_regs(); | |
899 | |
900 // Put the simplified guy on the simplified list. | |
901 lrgs(lo)._next = _simplified; | |
902 _simplified = lo; | |
903 | |
904 // Yank this guy from the IFG. | |
905 IndexSet *adj = _ifg->remove_node( lo ); | |
906 | |
907 // If any neighbors' degrees fall below their number of | |
908 // allowed registers, then put that neighbor on the low degree | |
909 // list. Note that 'degree' can only fall and 'numregs' is | |
910 // unchanged by this action. Thus the two are equal at most once, | |
911 // so LRGs hit the lo-degree worklists at most once. | |
912 IndexSetIterator elements(adj); | |
913 uint neighbor; | |
914 while ((neighbor = elements.next()) != 0) { | |
915 LRG *n = &lrgs(neighbor); | |
916 assert( _ifg->effective_degree(neighbor) == n->degree(), "" ); | |
917 | |
918 // Check for just becoming of-low-degree | |
919 if( n->just_lo_degree() && !n->_has_copy ) { | |
920 assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice"); | |
921 // Put on lo-degree list | |
922 n->_next = lo_no_copy; | |
923 lo_no_copy = neighbor; | |
924 } | |
925 } | |
926 } // End of while lo-degree no_copy worklist not empty | |
927 | |
928 // No more lo-degree no-copy live ranges to simplify | |
929 } | |
930 | |
931 //------------------------------Simplify--------------------------------------- | |
932 // Simplify the IFG by removing LRGs of low degree. | |
933 void PhaseChaitin::Simplify( ) { | |
934 | |
935 while( 1 ) { // Repeat till simplified it all | |
936 // May want to explore simplifying lo_degree before _lo_stk_degree. | |
937 // This might result in more spills coloring into registers during | |
938 // Select(). | |
939 while( _lo_degree || _lo_stk_degree ) { | |
940 // If possible, pull from lo_stk first | |
941 uint lo; | |
942 if( _lo_degree ) { | |
943 lo = _lo_degree; | |
944 _lo_degree = lrgs(lo)._next; | |
945 } else { | |
946 lo = _lo_stk_degree; | |
947 _lo_stk_degree = lrgs(lo)._next; | |
948 } | |
949 | |
950 // Put the simplified guy on the simplified list. | |
951 lrgs(lo)._next = _simplified; | |
952 _simplified = lo; | |
953 // If this guy is "at risk" then mark his current neighbors | |
954 if( lrgs(lo)._at_risk ) { | |
955 IndexSetIterator elements(_ifg->neighbors(lo)); | |
956 uint datum; | |
957 while ((datum = elements.next()) != 0) { | |
958 lrgs(datum)._risk_bias = lo; | |
959 } | |
960 } | |
961 | |
962 // Yank this guy from the IFG. | |
963 IndexSet *adj = _ifg->remove_node( lo ); | |
964 | |
965 // If any neighbors' degrees fall below their number of | |
966 // allowed registers, then put that neighbor on the low degree | |
967 // list. Note that 'degree' can only fall and 'numregs' is | |
968 // unchanged by this action. Thus the two are equal at most once, | |
969 // so LRGs hit the lo-degree worklist at most once. | |
970 IndexSetIterator elements(adj); | |
971 uint neighbor; | |
972 while ((neighbor = elements.next()) != 0) { | |
973 LRG *n = &lrgs(neighbor); | |
974 #ifdef ASSERT | |
550
96964ebdb154
6782232: assert("CreateEx must be first instruction in block" )
kvn
parents:
419
diff
changeset
|
975 if( VerifyOpto || VerifyRegisterAllocator ) { |
0 | 976 assert( _ifg->effective_degree(neighbor) == n->degree(), "" ); |
977 } | |
978 #endif | |
979 | |
980 // Check for just becoming of-low-degree just counting registers. | |
981 // _must_spill live ranges are already on the low degree list. | |
982 if( n->just_lo_degree() && !n->_must_spill ) { | |
983 assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice"); | |
984 // Pull from hi-degree list | |
985 uint prev = n->_prev; | |
986 uint next = n->_next; | |
987 if( prev ) lrgs(prev)._next = next; | |
988 else _hi_degree = next; | |
989 lrgs(next)._prev = prev; | |
990 n->_next = _lo_degree; | |
991 _lo_degree = neighbor; | |
992 } | |
993 } | |
994 } // End of while lo-degree/lo_stk_degree worklist not empty | |
995 | |
996 // Check for got everything: is hi-degree list empty? | |
997 if( !_hi_degree ) break; | |
998 | |
999 // Time to pick a potential spill guy | |
1000 uint lo_score = _hi_degree; | |
1001 double score = lrgs(lo_score).score(); | |
1002 double area = lrgs(lo_score)._area; | |
1008
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1003 double cost = lrgs(lo_score)._cost; |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1004 bool bound = lrgs(lo_score)._is_bound; |
0 | 1005 |
1006 // Find cheapest guy | |
1007 debug_only( int lo_no_simplify=0; ); | |
1012
5f29a958a545
6889656: assert(lo_lrg->lo_degree() || !lo_no_simplify,"Live range was lo-degree before coalesce
kvn
parents:
1008
diff
changeset
|
1008 for( uint i = _hi_degree; i; i = lrgs(i)._next ) { |
0 | 1009 assert( !(*_ifg->_yanked)[i], "" ); |
1010 // It's just vaguely possible to move hi-degree to lo-degree without | |
1011 // going through a just-lo-degree stage: If you remove a double from | |
1012 // a float live range it's degree will drop by 2 and you can skip the | |
1013 // just-lo-degree stage. It's very rare (shows up after 5000+ methods | |
1014 // in -Xcomp of Java2Demo). So just choose this guy to simplify next. | |
1015 if( lrgs(i).lo_degree() ) { | |
1016 lo_score = i; | |
1017 break; | |
1018 } | |
1019 debug_only( if( lrgs(i)._was_lo ) lo_no_simplify=i; ); | |
1020 double iscore = lrgs(i).score(); | |
1021 double iarea = lrgs(i)._area; | |
1008
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1022 double icost = lrgs(i)._cost; |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1023 bool ibound = lrgs(i)._is_bound; |
0 | 1024 |
1025 // Compare cost/area of i vs cost/area of lo_score. Smaller cost/area | |
1026 // wins. Ties happen because all live ranges in question have spilled | |
1027 // a few times before and the spill-score adds a huge number which | |
1028 // washes out the low order bits. We are choosing the lesser of 2 | |
1029 // evils; in this case pick largest area to spill. | |
1008
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1030 // Ties also happen when live ranges are defined and used only inside |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1031 // one block. In which case their area is 0 and score set to max. |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1032 // In such case choose bound live range over unbound to free registers |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1033 // or with smaller cost to spill. |
0 | 1034 if( iscore < score || |
1008
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1035 (iscore == score && iarea > area && lrgs(lo_score)._was_spilled2) || |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1036 (iscore == score && iarea == area && |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1037 ( (ibound && !bound) || ibound == bound && (icost < cost) )) ) { |
0 | 1038 lo_score = i; |
1039 score = iscore; | |
1040 area = iarea; | |
1008
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1041 cost = icost; |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1042 bound = ibound; |
0 | 1043 } |
1044 } | |
1045 LRG *lo_lrg = &lrgs(lo_score); | |
1046 // The live range we choose for spilling is either hi-degree, or very | |
1047 // rarely it can be low-degree. If we choose a hi-degree live range | |
1048 // there better not be any lo-degree choices. | |
1049 assert( lo_lrg->lo_degree() || !lo_no_simplify, "Live range was lo-degree before coalesce; should simplify" ); | |
1050 | |
1051 // Pull from hi-degree list | |
1052 uint prev = lo_lrg->_prev; | |
1053 uint next = lo_lrg->_next; | |
1054 if( prev ) lrgs(prev)._next = next; | |
1055 else _hi_degree = next; | |
1056 lrgs(next)._prev = prev; | |
1057 // Jam him on the lo-degree list, despite his high degree. | |
1058 // Maybe he'll get a color, and maybe he'll spill. | |
1059 // Only Select() will know. | |
1060 lrgs(lo_score)._at_risk = true; | |
1061 _lo_degree = lo_score; | |
1062 lo_lrg->_next = 0; | |
1063 | |
1064 } // End of while not simplified everything | |
1065 | |
1066 } | |
1067 | |
1068 //------------------------------bias_color------------------------------------- | |
1069 // Choose a color using the biasing heuristic | |
1070 OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { | |
1071 | |
1072 // Check for "at_risk" LRG's | |
1073 uint risk_lrg = Find(lrg._risk_bias); | |
1074 if( risk_lrg != 0 ) { | |
1075 // Walk the colored neighbors of the "at_risk" candidate | |
1076 // Choose a color which is both legal and already taken by a neighbor | |
1077 // of the "at_risk" candidate in order to improve the chances of the | |
1078 // "at_risk" candidate of coloring | |
1079 IndexSetIterator elements(_ifg->neighbors(risk_lrg)); | |
1080 uint datum; | |
1081 while ((datum = elements.next()) != 0) { | |
1082 OptoReg::Name reg = lrgs(datum).reg(); | |
1083 // If this LRG's register is legal for us, choose it | |
1084 if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE && | |
1085 lrg.mask().Member(OptoReg::add(reg,-chunk)) && | |
1086 (lrg.num_regs()==1 || // either size 1 | |
1087 (reg&1) == 1) ) // or aligned (adjacent reg is available since we already cleared-to-pairs) | |
1088 return reg; | |
1089 } | |
1090 } | |
1091 | |
1092 uint copy_lrg = Find(lrg._copy_bias); | |
1093 if( copy_lrg != 0 ) { | |
1094 // If he has a color, | |
1095 if( !(*(_ifg->_yanked))[copy_lrg] ) { | |
1096 OptoReg::Name reg = lrgs(copy_lrg).reg(); | |
1097 // And it is legal for you, | |
1098 if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE && | |
1099 lrg.mask().Member(OptoReg::add(reg,-chunk)) && | |
1100 (lrg.num_regs()==1 || // either size 1 | |
1101 (reg&1) == 1) ) // or aligned (adjacent reg is available since we already cleared-to-pairs) | |
1102 return reg; | |
1103 } else if( chunk == 0 ) { | |
1104 // Choose a color which is legal for him | |
1105 RegMask tempmask = lrg.mask(); | |
1106 tempmask.AND(lrgs(copy_lrg).mask()); | |
1107 OptoReg::Name reg; | |
1108 if( lrg.num_regs() == 1 ) { | |
1109 reg = tempmask.find_first_elem(); | |
1110 } else { | |
1111 tempmask.ClearToPairs(); | |
1112 reg = tempmask.find_first_pair(); | |
1113 } | |
1114 if( OptoReg::is_valid(reg) ) | |
1115 return reg; | |
1116 } | |
1117 } | |
1118 | |
1119 // If no bias info exists, just go with the register selection ordering | |
1120 if( lrg.num_regs() == 2 ) { | |
1121 // Find an aligned pair | |
1122 return OptoReg::add(lrg.mask().find_first_pair(),chunk); | |
1123 } | |
1124 | |
1125 // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate | |
1126 // copy removal to remove many more copies, by preventing a just-assigned | |
1127 // register from being repeatedly assigned. | |
1128 OptoReg::Name reg = lrg.mask().find_first_elem(); | |
1129 if( (++_alternate & 1) && OptoReg::is_valid(reg) ) { | |
1130 // This 'Remove; find; Insert' idiom is an expensive way to find the | |
1131 // SECOND element in the mask. | |
1132 lrg.Remove(reg); | |
1133 OptoReg::Name reg2 = lrg.mask().find_first_elem(); | |
1134 lrg.Insert(reg); | |
1135 if( OptoReg::is_reg(reg2)) | |
1136 reg = reg2; | |
1137 } | |
1138 return OptoReg::add( reg, chunk ); | |
1139 } | |
1140 | |
1141 //------------------------------choose_color----------------------------------- | |
1142 // Choose a color in the current chunk | |
1143 OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) { | |
1144 assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)"); | |
1145 assert(C->out_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP+0)), "must not allocate stack0 (inside preserve area)"); | |
1146 | |
1147 if( lrg.num_regs() == 1 || // Common Case | |
1148 !lrg._fat_proj ) // Aligned+adjacent pairs ok | |
1149 // Use a heuristic to "bias" the color choice | |
1150 return bias_color(lrg, chunk); | |
1151 | |
1152 assert( lrg.num_regs() >= 2, "dead live ranges do not color" ); | |
1153 | |
1154 // Fat-proj case or misaligned double argument. | |
1155 assert(lrg.compute_mask_size() == lrg.num_regs() || | |
1156 lrg.num_regs() == 2,"fat projs exactly color" ); | |
1157 assert( !chunk, "always color in 1st chunk" ); | |
1158 // Return the highest element in the set. | |
1159 return lrg.mask().find_last_elem(); | |
1160 } | |
1161 | |
1162 //------------------------------Select----------------------------------------- | |
1163 // Select colors by re-inserting LRGs back into the IFG. LRGs are re-inserted | |
1164 // in reverse order of removal. As long as nothing of hi-degree was yanked, | |
1165 // everything going back is guaranteed a color. Select that color. If some | |
1166 // hi-degree LRG cannot get a color then we record that we must spill. | |
1167 uint PhaseChaitin::Select( ) { | |
1168 uint spill_reg = LRG::SPILL_REG; | |
1169 _max_reg = OptoReg::Name(0); // Past max register used | |
1170 while( _simplified ) { | |
1171 // Pull next LRG from the simplified list - in reverse order of removal | |
1172 uint lidx = _simplified; | |
1173 LRG *lrg = &lrgs(lidx); | |
1174 _simplified = lrg->_next; | |
1175 | |
1176 | |
1177 #ifndef PRODUCT | |
1178 if (trace_spilling()) { | |
1179 ttyLocker ttyl; | |
1180 tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(), | |
1181 lrg->degrees_of_freedom()); | |
1182 lrg->dump(); | |
1183 } | |
1184 #endif | |
1185 | |
1186 // Re-insert into the IFG | |
1187 _ifg->re_insert(lidx); | |
1188 if( !lrg->alive() ) continue; | |
1189 // capture allstackedness flag before mask is hacked | |
1190 const int is_allstack = lrg->mask().is_AllStack(); | |
1191 | |
1192 // Yeah, yeah, yeah, I know, I know. I can refactor this | |
1193 // to avoid the GOTO, although the refactored code will not | |
1194 // be much clearer. We arrive here IFF we have a stack-based | |
1195 // live range that cannot color in the current chunk, and it | |
1196 // has to move into the next free stack chunk. | |
1197 int chunk = 0; // Current chunk is first chunk | |
1198 retry_next_chunk: | |
1199 | |
1200 // Remove neighbor colors | |
1201 IndexSet *s = _ifg->neighbors(lidx); | |
1202 | |
1203 debug_only(RegMask orig_mask = lrg->mask();) | |
1204 IndexSetIterator elements(s); | |
1205 uint neighbor; | |
1206 while ((neighbor = elements.next()) != 0) { | |
1207 // Note that neighbor might be a spill_reg. In this case, exclusion | |
1208 // of its color will be a no-op, since the spill_reg chunk is in outer | |
1209 // space. Also, if neighbor is in a different chunk, this exclusion | |
1210 // will be a no-op. (Later on, if lrg runs out of possible colors in | |
1211 // its chunk, a new chunk of color may be tried, in which case | |
1212 // examination of neighbors is started again, at retry_next_chunk.) | |
1213 LRG &nlrg = lrgs(neighbor); | |
1214 OptoReg::Name nreg = nlrg.reg(); | |
1215 // Only subtract masks in the same chunk | |
1216 if( nreg >= chunk && nreg < chunk + RegMask::CHUNK_SIZE ) { | |
1217 #ifndef PRODUCT | |
1218 uint size = lrg->mask().Size(); | |
1219 RegMask rm = lrg->mask(); | |
1220 #endif | |
1221 lrg->SUBTRACT(nlrg.mask()); | |
1222 #ifndef PRODUCT | |
1223 if (trace_spilling() && lrg->mask().Size() != size) { | |
1224 ttyLocker ttyl; | |
1225 tty->print("L%d ", lidx); | |
1226 rm.dump(); | |
1227 tty->print(" intersected L%d ", neighbor); | |
1228 nlrg.mask().dump(); | |
1229 tty->print(" removed "); | |
1230 rm.SUBTRACT(lrg->mask()); | |
1231 rm.dump(); | |
1232 tty->print(" leaving "); | |
1233 lrg->mask().dump(); | |
1234 tty->cr(); | |
1235 } | |
1236 #endif | |
1237 } | |
1238 } | |
1239 //assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness"); | |
1240 // Aligned pairs need aligned masks | |
1241 if( lrg->num_regs() == 2 && !lrg->_fat_proj ) | |
1242 lrg->ClearToPairs(); | |
1243 | |
1244 // Check if a color is available and if so pick the color | |
1245 OptoReg::Name reg = choose_color( *lrg, chunk ); | |
1246 #ifdef SPARC | |
1247 debug_only(lrg->compute_set_mask_size()); | |
1248 assert(lrg->num_regs() != 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned"); | |
1249 #endif | |
1250 | |
1251 //--------------- | |
1252 // If we fail to color and the AllStack flag is set, trigger | |
1253 // a chunk-rollover event | |
1254 if(!OptoReg::is_valid(OptoReg::add(reg,-chunk)) && is_allstack) { | |
1255 // Bump register mask up to next stack chunk | |
1256 chunk += RegMask::CHUNK_SIZE; | |
1257 lrg->Set_All(); | |
1258 | |
1259 goto retry_next_chunk; | |
1260 } | |
1261 | |
1262 //--------------- | |
1263 // Did we get a color? | |
1264 else if( OptoReg::is_valid(reg)) { | |
1265 #ifndef PRODUCT | |
1266 RegMask avail_rm = lrg->mask(); | |
1267 #endif | |
1268 | |
1269 // Record selected register | |
1270 lrg->set_reg(reg); | |
1271 | |
1272 if( reg >= _max_reg ) // Compute max register limit | |
1273 _max_reg = OptoReg::add(reg,1); | |
1274 // Fold reg back into normal space | |
1275 reg = OptoReg::add(reg,-chunk); | |
1276 | |
1277 // If the live range is not bound, then we actually had some choices | |
1278 // to make. In this case, the mask has more bits in it than the colors | |
605 | 1279 // chosen. Restrict the mask to just what was picked. |
0 | 1280 if( lrg->num_regs() == 1 ) { // Size 1 live range |
1281 lrg->Clear(); // Clear the mask | |
1282 lrg->Insert(reg); // Set regmask to match selected reg | |
1283 lrg->set_mask_size(1); | |
1284 } else if( !lrg->_fat_proj ) { | |
1285 // For pairs, also insert the low bit of the pair | |
1286 assert( lrg->num_regs() == 2, "unbound fatproj???" ); | |
1287 lrg->Clear(); // Clear the mask | |
1288 lrg->Insert(reg); // Set regmask to match selected reg | |
1289 lrg->Insert(OptoReg::add(reg,-1)); | |
1290 lrg->set_mask_size(2); | |
1291 } else { // Else fatproj | |
1292 // mask must be equal to fatproj bits, by definition | |
1293 } | |
1294 #ifndef PRODUCT | |
1295 if (trace_spilling()) { | |
1296 ttyLocker ttyl; | |
1297 tty->print("L%d selected ", lidx); | |
1298 lrg->mask().dump(); | |
1299 tty->print(" from "); | |
1300 avail_rm.dump(); | |
1301 tty->cr(); | |
1302 } | |
1303 #endif | |
1304 // Note that reg is the highest-numbered register in the newly-bound mask. | |
1305 } // end color available case | |
1306 | |
1307 //--------------- | |
1308 // Live range is live and no colors available | |
1309 else { | |
1310 assert( lrg->alive(), "" ); | |
295
ea18057223c4
6732194: Data corruption dependent on -server/-client/-Xbatch
never
parents:
196
diff
changeset
|
1311 assert( !lrg->_fat_proj || lrg->is_multidef() || |
0 | 1312 lrg->_def->outcnt() > 0, "fat_proj cannot spill"); |
1313 assert( !orig_mask.is_AllStack(), "All Stack does not spill" ); | |
1314 | |
1315 // Assign the special spillreg register | |
1316 lrg->set_reg(OptoReg::Name(spill_reg++)); | |
1317 // Do not empty the regmask; leave mask_size lying around | |
1318 // for use during Spilling | |
1319 #ifndef PRODUCT | |
1320 if( trace_spilling() ) { | |
1321 ttyLocker ttyl; | |
1322 tty->print("L%d spilling with neighbors: ", lidx); | |
1323 s->dump(); | |
1324 debug_only(tty->print(" original mask: ")); | |
1325 debug_only(orig_mask.dump()); | |
1326 dump_lrg(lidx); | |
1327 } | |
1328 #endif | |
1329 } // end spill case | |
1330 | |
1331 } | |
1332 | |
1333 return spill_reg-LRG::SPILL_REG; // Return number of spills | |
1334 } | |
1335 | |
1336 | |
1337 //------------------------------copy_was_spilled------------------------------- | |
1338 // Copy 'was_spilled'-edness from the source Node to the dst Node. | |
1339 void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) { | |
1340 if( _spilled_once.test(src->_idx) ) { | |
1341 _spilled_once.set(dst->_idx); | |
1342 lrgs(Find(dst))._was_spilled1 = 1; | |
1343 if( _spilled_twice.test(src->_idx) ) { | |
1344 _spilled_twice.set(dst->_idx); | |
1345 lrgs(Find(dst))._was_spilled2 = 1; | |
1346 } | |
1347 } | |
1348 } | |
1349 | |
1350 //------------------------------set_was_spilled-------------------------------- | |
1351 // Set the 'spilled_once' or 'spilled_twice' flag on a node. | |
1352 void PhaseChaitin::set_was_spilled( Node *n ) { | |
1353 if( _spilled_once.test_set(n->_idx) ) | |
1354 _spilled_twice.set(n->_idx); | |
1355 } | |
1356 | |
1357 //------------------------------fixup_spills----------------------------------- | |
1358 // Convert Ideal spill instructions into proper FramePtr + offset Loads and | |
1359 // Stores. Use-def chains are NOT preserved, but Node->LRG->reg maps are. | |
1360 void PhaseChaitin::fixup_spills() { | |
1361 // This function does only cisc spill work. | |
1362 if( !UseCISCSpill ) return; | |
1363 | |
1364 NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); ) | |
1365 | |
1366 // Grab the Frame Pointer | |
1367 Node *fp = _cfg._broot->head()->in(1)->in(TypeFunc::FramePtr); | |
1368 | |
1369 // For all blocks | |
1370 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
1371 Block *b = _cfg._blocks[i]; | |
1372 | |
1373 // For all instructions in block | |
1374 uint last_inst = b->end_idx(); | |
1375 for( uint j = 1; j <= last_inst; j++ ) { | |
1376 Node *n = b->_nodes[j]; | |
1377 | |
1378 // Dead instruction??? | |
1379 assert( n->outcnt() != 0 ||// Nothing dead after post alloc | |
1380 C->top() == n || // Or the random TOP node | |
1381 n->is_Proj(), // Or a fat-proj kill node | |
1382 "No dead instructions after post-alloc" ); | |
1383 | |
1384 int inp = n->cisc_operand(); | |
1385 if( inp != AdlcVMDeps::Not_cisc_spillable ) { | |
1386 // Convert operand number to edge index number | |
1387 MachNode *mach = n->as_Mach(); | |
1388 inp = mach->operand_index(inp); | |
1389 Node *src = n->in(inp); // Value to load or store | |
1390 LRG &lrg_cisc = lrgs( Find_const(src) ); | |
1391 OptoReg::Name src_reg = lrg_cisc.reg(); | |
1392 // Doubles record the HIGH register of an adjacent pair. | |
1393 src_reg = OptoReg::add(src_reg,1-lrg_cisc.num_regs()); | |
1394 if( OptoReg::is_stack(src_reg) ) { // If input is on stack | |
1395 // This is a CISC Spill, get stack offset and construct new node | |
1396 #ifndef PRODUCT | |
1397 if( TraceCISCSpill ) { | |
1398 tty->print(" reg-instr: "); | |
1399 n->dump(); | |
1400 } | |
1401 #endif | |
1402 int stk_offset = reg2offset(src_reg); | |
1403 // Bailout if we might exceed node limit when spilling this instruction | |
1404 C->check_node_count(0, "out of nodes fixing spills"); | |
1405 if (C->failing()) return; | |
1406 // Transform node | |
1407 MachNode *cisc = mach->cisc_version(stk_offset, C)->as_Mach(); | |
1408 cisc->set_req(inp,fp); // Base register is frame pointer | |
1409 if( cisc->oper_input_base() > 1 && mach->oper_input_base() <= 1 ) { | |
1410 assert( cisc->oper_input_base() == 2, "Only adding one edge"); | |
1411 cisc->ins_req(1,src); // Requires a memory edge | |
1412 } | |
1413 b->_nodes.map(j,cisc); // Insert into basic block | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
113
diff
changeset
|
1414 n->subsume_by(cisc); // Correct graph |
0 | 1415 // |
1416 ++_used_cisc_instructions; | |
1417 #ifndef PRODUCT | |
1418 if( TraceCISCSpill ) { | |
1419 tty->print(" cisc-instr: "); | |
1420 cisc->dump(); | |
1421 } | |
1422 #endif | |
1423 } else { | |
1424 #ifndef PRODUCT | |
1425 if( TraceCISCSpill ) { | |
1426 tty->print(" using reg-instr: "); | |
1427 n->dump(); | |
1428 } | |
1429 #endif | |
1430 ++_unused_cisc_instructions; // input can be on stack | |
1431 } | |
1432 } | |
1433 | |
1434 } // End of for all instructions | |
1435 | |
1436 } // End of for all blocks | |
1437 } | |
1438 | |
1439 //------------------------------find_base_for_derived-------------------------- | |
1440 // Helper to stretch above; recursively discover the base Node for a | |
1441 // given derived Node. Easy for AddP-related machine nodes, but needs | |
1442 // to be recursive for derived Phis. | |
1443 Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derived, uint &maxlrg ) { | |
1444 // See if already computed; if so return it | |
1445 if( derived_base_map[derived->_idx] ) | |
1446 return derived_base_map[derived->_idx]; | |
1447 | |
1448 // See if this happens to be a base. | |
1449 // NOTE: we use TypePtr instead of TypeOopPtr because we can have | |
1450 // pointers derived from NULL! These are always along paths that | |
1451 // can't happen at run-time but the optimizer cannot deduce it so | |
1452 // we have to handle it gracefully. | |
729
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1453 assert(!derived->bottom_type()->isa_narrowoop() || |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1454 derived->bottom_type()->make_ptr()->is_ptr()->_offset == 0, "sanity"); |
0 | 1455 const TypePtr *tj = derived->bottom_type()->isa_ptr(); |
1456 // If its an OOP with a non-zero offset, then it is derived. | |
729
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1457 if( tj == NULL || tj->_offset == 0 ) { |
0 | 1458 derived_base_map[derived->_idx] = derived; |
1459 return derived; | |
1460 } | |
1461 // Derived is NULL+offset? Base is NULL! | |
1462 if( derived->is_Con() ) { | |
729
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1463 Node *base = _matcher.mach_null(); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1464 assert(base != NULL, "sanity"); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1465 if (base->in(0) == NULL) { |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1466 // Initialize it once and make it shared: |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1467 // set control to _root and place it into Start block |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1468 // (where top() node is placed). |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1469 base->init_req(0, _cfg._root); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1470 Block *startb = _cfg._bbs[C->top()->_idx]; |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1471 startb->_nodes.insert(startb->find_node(C->top()), base ); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1472 _cfg._bbs.map( base->_idx, startb ); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1473 assert (n2lidx(base) == 0, "should not have LRG yet"); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1474 } |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1475 if (n2lidx(base) == 0) { |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1476 new_lrg(base, maxlrg++); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1477 } |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1478 assert(base->in(0) == _cfg._root && |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1479 _cfg._bbs[base->_idx] == _cfg._bbs[C->top()->_idx], "base NULL should be shared"); |
0 | 1480 derived_base_map[derived->_idx] = base; |
1481 return base; | |
1482 } | |
1483 | |
1484 // Check for AddP-related opcodes | |
1485 if( !derived->is_Phi() ) { | |
1486 assert( derived->as_Mach()->ideal_Opcode() == Op_AddP, "" ); | |
1487 Node *base = derived->in(AddPNode::Base); | |
1488 derived_base_map[derived->_idx] = base; | |
1489 return base; | |
1490 } | |
1491 | |
1492 // Recursively find bases for Phis. | |
1493 // First check to see if we can avoid a base Phi here. | |
1494 Node *base = find_base_for_derived( derived_base_map, derived->in(1),maxlrg); | |
1495 uint i; | |
1496 for( i = 2; i < derived->req(); i++ ) | |
1497 if( base != find_base_for_derived( derived_base_map,derived->in(i),maxlrg)) | |
1498 break; | |
1499 // Went to the end without finding any different bases? | |
1500 if( i == derived->req() ) { // No need for a base Phi here | |
1501 derived_base_map[derived->_idx] = base; | |
1502 return base; | |
1503 } | |
1504 | |
1505 // Now we see we need a base-Phi here to merge the bases | |
729
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1506 const Type *t = base->bottom_type(); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1507 base = new (C, derived->req()) PhiNode( derived->in(0), t ); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1508 for( i = 1; i < derived->req(); i++ ) { |
0 | 1509 base->init_req(i, find_base_for_derived(derived_base_map, derived->in(i), maxlrg)); |
729
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1510 t = t->meet(base->in(i)->bottom_type()); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1511 } |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1512 base->as_Phi()->set_type(t); |
0 | 1513 |
1514 // Search the current block for an existing base-Phi | |
1515 Block *b = _cfg._bbs[derived->_idx]; | |
1516 for( i = 1; i <= b->end_idx(); i++ ) {// Search for matching Phi | |
1517 Node *phi = b->_nodes[i]; | |
1518 if( !phi->is_Phi() ) { // Found end of Phis with no match? | |
1519 b->_nodes.insert( i, base ); // Must insert created Phi here as base | |
1520 _cfg._bbs.map( base->_idx, b ); | |
1521 new_lrg(base,maxlrg++); | |
1522 break; | |
1523 } | |
1524 // See if Phi matches. | |
1525 uint j; | |
1526 for( j = 1; j < base->req(); j++ ) | |
1527 if( phi->in(j) != base->in(j) && | |
1528 !(phi->in(j)->is_Con() && base->in(j)->is_Con()) ) // allow different NULLs | |
1529 break; | |
1530 if( j == base->req() ) { // All inputs match? | |
1531 base = phi; // Then use existing 'phi' and drop 'base' | |
1532 break; | |
1533 } | |
1534 } | |
1535 | |
1536 | |
1537 // Cache info for later passes | |
1538 derived_base_map[derived->_idx] = base; | |
1539 return base; | |
1540 } | |
1541 | |
1542 | |
1543 //------------------------------stretch_base_pointer_live_ranges--------------- | |
1544 // At each Safepoint, insert extra debug edges for each pair of derived value/ | |
1545 // base pointer that is live across the Safepoint for oopmap building. The | |
1546 // edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the | |
1547 // required edge set. | |
1548 bool PhaseChaitin::stretch_base_pointer_live_ranges( ResourceArea *a ) { | |
1549 int must_recompute_live = false; | |
1550 uint maxlrg = _maxlrg; | |
1551 Node **derived_base_map = (Node**)a->Amalloc(sizeof(Node*)*C->unique()); | |
1552 memset( derived_base_map, 0, sizeof(Node*)*C->unique() ); | |
1553 | |
1554 // For all blocks in RPO do... | |
1555 for( uint i=0; i<_cfg._num_blocks; i++ ) { | |
1556 Block *b = _cfg._blocks[i]; | |
1557 // Note use of deep-copy constructor. I cannot hammer the original | |
1558 // liveout bits, because they are needed by the following coalesce pass. | |
1559 IndexSet liveout(_live->live(b)); | |
1560 | |
1561 for( uint j = b->end_idx() + 1; j > 1; j-- ) { | |
1562 Node *n = b->_nodes[j-1]; | |
1563 | |
1564 // Pre-split compares of loop-phis. Loop-phis form a cycle we would | |
1565 // like to see in the same register. Compare uses the loop-phi and so | |
1566 // extends its live range BUT cannot be part of the cycle. If this | |
1567 // extended live range overlaps with the update of the loop-phi value | |
1568 // we need both alive at the same time -- which requires at least 1 | |
1569 // copy. But because Intel has only 2-address registers we end up with | |
1570 // at least 2 copies, one before the loop-phi update instruction and | |
1571 // one after. Instead we split the input to the compare just after the | |
1572 // phi. | |
1573 if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CmpI ) { | |
1574 Node *phi = n->in(1); | |
1575 if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) { | |
1576 Block *phi_block = _cfg._bbs[phi->_idx]; | |
1577 if( _cfg._bbs[phi_block->pred(2)->_idx] == b ) { | |
1578 const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI]; | |
1579 Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask ); | |
1580 insert_proj( phi_block, 1, spill, maxlrg++ ); | |
1581 n->set_req(1,spill); | |
1582 must_recompute_live = true; | |
1583 } | |
1584 } | |
1585 } | |
1586 | |
1587 // Get value being defined | |
1588 uint lidx = n2lidx(n); | |
1589 if( lidx && lidx < _maxlrg /* Ignore the occasional brand-new live range */) { | |
1590 // Remove from live-out set | |
1591 liveout.remove(lidx); | |
1592 | |
1593 // Copies do not define a new value and so do not interfere. | |
1594 // Remove the copies source from the liveout set before interfering. | |
1595 uint idx = n->is_Copy(); | |
1596 if( idx ) liveout.remove( n2lidx(n->in(idx)) ); | |
1597 } | |
1598 | |
1599 // Found a safepoint? | |
1600 JVMState *jvms = n->jvms(); | |
1601 if( jvms ) { | |
1602 // Now scan for a live derived pointer | |
1603 IndexSetIterator elements(&liveout); | |
1604 uint neighbor; | |
1605 while ((neighbor = elements.next()) != 0) { | |
1606 // Find reaching DEF for base and derived values | |
1607 // This works because we are still in SSA during this call. | |
1608 Node *derived = lrgs(neighbor)._def; | |
1609 const TypePtr *tj = derived->bottom_type()->isa_ptr(); | |
729
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1610 assert(!derived->bottom_type()->isa_narrowoop() || |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1611 derived->bottom_type()->make_ptr()->is_ptr()->_offset == 0, "sanity"); |
0 | 1612 // If its an OOP with a non-zero offset, then it is derived. |
1613 if( tj && tj->_offset != 0 && tj->isa_oop_ptr() ) { | |
1614 Node *base = find_base_for_derived( derived_base_map, derived, maxlrg ); | |
1615 assert( base->_idx < _names.Size(), "" ); | |
1616 // Add reaching DEFs of derived pointer and base pointer as a | |
1617 // pair of inputs | |
1618 n->add_req( derived ); | |
1619 n->add_req( base ); | |
1620 | |
1621 // See if the base pointer is already live to this point. | |
1622 // Since I'm working on the SSA form, live-ness amounts to | |
1623 // reaching def's. So if I find the base's live range then | |
1624 // I know the base's def reaches here. | |
1625 if( (n2lidx(base) >= _maxlrg ||// (Brand new base (hence not live) or | |
1626 !liveout.member( n2lidx(base) ) ) && // not live) AND | |
1627 (n2lidx(base) > 0) && // not a constant | |
1628 _cfg._bbs[base->_idx] != b ) { // base not def'd in blk) | |
1629 // Base pointer is not currently live. Since I stretched | |
1630 // the base pointer to here and it crosses basic-block | |
1631 // boundaries, the global live info is now incorrect. | |
1632 // Recompute live. | |
1633 must_recompute_live = true; | |
1634 } // End of if base pointer is not live to debug info | |
1635 } | |
1636 } // End of scan all live data for derived ptrs crossing GC point | |
1637 } // End of if found a GC point | |
1638 | |
1639 // Make all inputs live | |
1640 if( !n->is_Phi() ) { // Phi function uses come from prior block | |
1641 for( uint k = 1; k < n->req(); k++ ) { | |
1642 uint lidx = n2lidx(n->in(k)); | |
1643 if( lidx < _maxlrg ) | |
1644 liveout.insert( lidx ); | |
1645 } | |
1646 } | |
1647 | |
1648 } // End of forall instructions in block | |
1649 liveout.clear(); // Free the memory used by liveout. | |
1650 | |
1651 } // End of forall blocks | |
1652 _maxlrg = maxlrg; | |
1653 | |
1654 // If I created a new live range I need to recompute live | |
1655 if( maxlrg != _ifg->_maxlrg ) | |
1656 must_recompute_live = true; | |
1657 | |
1658 return must_recompute_live != 0; | |
1659 } | |
1660 | |
1661 | |
1662 //------------------------------add_reference---------------------------------- | |
1663 // Extend the node to LRG mapping | |
1664 void PhaseChaitin::add_reference( const Node *node, const Node *old_node ) { | |
1665 _names.extend( node->_idx, n2lidx(old_node) ); | |
1666 } | |
1667 | |
1668 //------------------------------dump------------------------------------------- | |
1669 #ifndef PRODUCT | |
1670 void PhaseChaitin::dump( const Node *n ) const { | |
1671 uint r = (n->_idx < _names.Size() ) ? Find_const(n) : 0; | |
1672 tty->print("L%d",r); | |
1673 if( r && n->Opcode() != Op_Phi ) { | |
1674 if( _node_regs ) { // Got a post-allocation copy of allocation? | |
1675 tty->print("["); | |
1676 OptoReg::Name second = get_reg_second(n); | |
1677 if( OptoReg::is_valid(second) ) { | |
1678 if( OptoReg::is_reg(second) ) | |
1679 tty->print("%s:",Matcher::regName[second]); | |
1680 else | |
1681 tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(second)); | |
1682 } | |
1683 OptoReg::Name first = get_reg_first(n); | |
1684 if( OptoReg::is_reg(first) ) | |
1685 tty->print("%s]",Matcher::regName[first]); | |
1686 else | |
1687 tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(first)); | |
1688 } else | |
1689 n->out_RegMask().dump(); | |
1690 } | |
1691 tty->print("/N%d\t",n->_idx); | |
1692 tty->print("%s === ", n->Name()); | |
1693 uint k; | |
1694 for( k = 0; k < n->req(); k++) { | |
1695 Node *m = n->in(k); | |
1696 if( !m ) tty->print("_ "); | |
1697 else { | |
1698 uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0; | |
1699 tty->print("L%d",r); | |
1700 // Data MultiNode's can have projections with no real registers. | |
1701 // Don't die while dumping them. | |
1702 int op = n->Opcode(); | |
1703 if( r && op != Op_Phi && op != Op_Proj && op != Op_SCMemProj) { | |
1704 if( _node_regs ) { | |
1705 tty->print("["); | |
1706 OptoReg::Name second = get_reg_second(n->in(k)); | |
1707 if( OptoReg::is_valid(second) ) { | |
1708 if( OptoReg::is_reg(second) ) | |
1709 tty->print("%s:",Matcher::regName[second]); | |
1710 else | |
1711 tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer), | |
1712 reg2offset_unchecked(second)); | |
1713 } | |
1714 OptoReg::Name first = get_reg_first(n->in(k)); | |
1715 if( OptoReg::is_reg(first) ) | |
1716 tty->print("%s]",Matcher::regName[first]); | |
1717 else | |
1718 tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer), | |
1719 reg2offset_unchecked(first)); | |
1720 } else | |
1721 n->in_RegMask(k).dump(); | |
1722 } | |
1723 tty->print("/N%d ",m->_idx); | |
1724 } | |
1725 } | |
1726 if( k < n->len() && n->in(k) ) tty->print("| "); | |
1727 for( ; k < n->len(); k++ ) { | |
1728 Node *m = n->in(k); | |
1729 if( !m ) break; | |
1730 uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0; | |
1731 tty->print("L%d",r); | |
1732 tty->print("/N%d ",m->_idx); | |
1733 } | |
1734 if( n->is_Mach() ) n->as_Mach()->dump_spec(tty); | |
1735 else n->dump_spec(tty); | |
1736 if( _spilled_once.test(n->_idx ) ) { | |
1737 tty->print(" Spill_1"); | |
1738 if( _spilled_twice.test(n->_idx ) ) | |
1739 tty->print(" Spill_2"); | |
1740 } | |
1741 tty->print("\n"); | |
1742 } | |
1743 | |
1744 void PhaseChaitin::dump( const Block * b ) const { | |
1745 b->dump_head( &_cfg._bbs ); | |
1746 | |
1747 // For all instructions | |
1748 for( uint j = 0; j < b->_nodes.size(); j++ ) | |
1749 dump(b->_nodes[j]); | |
1750 // Print live-out info at end of block | |
1751 if( _live ) { | |
1752 tty->print("Liveout: "); | |
1753 IndexSet *live = _live->live(b); | |
1754 IndexSetIterator elements(live); | |
1755 tty->print("{"); | |
1756 uint i; | |
1757 while ((i = elements.next()) != 0) { | |
1758 tty->print("L%d ", Find_const(i)); | |
1759 } | |
1760 tty->print_cr("}"); | |
1761 } | |
1762 tty->print("\n"); | |
1763 } | |
1764 | |
1765 void PhaseChaitin::dump() const { | |
1766 tty->print( "--- Chaitin -- argsize: %d framesize: %d ---\n", | |
1767 _matcher._new_SP, _framesize ); | |
1768 | |
1769 // For all blocks | |
1770 for( uint i = 0; i < _cfg._num_blocks; i++ ) | |
1771 dump(_cfg._blocks[i]); | |
1772 // End of per-block dump | |
1773 tty->print("\n"); | |
1774 | |
1775 if (!_ifg) { | |
1776 tty->print("(No IFG.)\n"); | |
1777 return; | |
1778 } | |
1779 | |
1780 // Dump LRG array | |
1781 tty->print("--- Live RanGe Array ---\n"); | |
1782 for(uint i2 = 1; i2 < _maxlrg; i2++ ) { | |
1783 tty->print("L%d: ",i2); | |
1784 if( i2 < _ifg->_maxlrg ) lrgs(i2).dump( ); | |
2016
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
1785 else tty->print_cr("new LRG"); |
0 | 1786 } |
1787 tty->print_cr(""); | |
1788 | |
1789 // Dump lo-degree list | |
1790 tty->print("Lo degree: "); | |
1791 for(uint i3 = _lo_degree; i3; i3 = lrgs(i3)._next ) | |
1792 tty->print("L%d ",i3); | |
1793 tty->print_cr(""); | |
1794 | |
1795 // Dump lo-stk-degree list | |
1796 tty->print("Lo stk degree: "); | |
1797 for(uint i4 = _lo_stk_degree; i4; i4 = lrgs(i4)._next ) | |
1798 tty->print("L%d ",i4); | |
1799 tty->print_cr(""); | |
1800 | |
1801 // Dump lo-degree list | |
1802 tty->print("Hi degree: "); | |
1803 for(uint i5 = _hi_degree; i5; i5 = lrgs(i5)._next ) | |
1804 tty->print("L%d ",i5); | |
1805 tty->print_cr(""); | |
1806 } | |
1807 | |
1808 //------------------------------dump_degree_lists------------------------------ | |
1809 void PhaseChaitin::dump_degree_lists() const { | |
1810 // Dump lo-degree list | |
1811 tty->print("Lo degree: "); | |
1812 for( uint i = _lo_degree; i; i = lrgs(i)._next ) | |
1813 tty->print("L%d ",i); | |
1814 tty->print_cr(""); | |
1815 | |
1816 // Dump lo-stk-degree list | |
1817 tty->print("Lo stk degree: "); | |
1818 for(uint i2 = _lo_stk_degree; i2; i2 = lrgs(i2)._next ) | |
1819 tty->print("L%d ",i2); | |
1820 tty->print_cr(""); | |
1821 | |
1822 // Dump lo-degree list | |
1823 tty->print("Hi degree: "); | |
1824 for(uint i3 = _hi_degree; i3; i3 = lrgs(i3)._next ) | |
1825 tty->print("L%d ",i3); | |
1826 tty->print_cr(""); | |
1827 } | |
1828 | |
1829 //------------------------------dump_simplified-------------------------------- | |
1830 void PhaseChaitin::dump_simplified() const { | |
1831 tty->print("Simplified: "); | |
1832 for( uint i = _simplified; i; i = lrgs(i)._next ) | |
1833 tty->print("L%d ",i); | |
1834 tty->print_cr(""); | |
1835 } | |
1836 | |
1837 static char *print_reg( OptoReg::Name reg, const PhaseChaitin *pc, char *buf ) { | |
1838 if ((int)reg < 0) | |
1839 sprintf(buf, "<OptoReg::%d>", (int)reg); | |
1840 else if (OptoReg::is_reg(reg)) | |
1841 strcpy(buf, Matcher::regName[reg]); | |
1842 else | |
1843 sprintf(buf,"%s + #%d",OptoReg::regname(OptoReg::c_frame_pointer), | |
1844 pc->reg2offset(reg)); | |
1845 return buf+strlen(buf); | |
1846 } | |
1847 | |
1848 //------------------------------dump_register---------------------------------- | |
1849 // Dump a register name into a buffer. Be intelligent if we get called | |
1850 // before allocation is complete. | |
1851 char *PhaseChaitin::dump_register( const Node *n, char *buf ) const { | |
1852 if( !this ) { // Not got anything? | |
1853 sprintf(buf,"N%d",n->_idx); // Then use Node index | |
1854 } else if( _node_regs ) { | |
1855 // Post allocation, use direct mappings, no LRG info available | |
1856 print_reg( get_reg_first(n), this, buf ); | |
1857 } else { | |
1858 uint lidx = Find_const(n); // Grab LRG number | |
1859 if( !_ifg ) { | |
1860 sprintf(buf,"L%d",lidx); // No register binding yet | |
1861 } else if( !lidx ) { // Special, not allocated value | |
1862 strcpy(buf,"Special"); | |
1863 } else if( (lrgs(lidx).num_regs() == 1) | |
1864 ? !lrgs(lidx).mask().is_bound1() | |
1865 : !lrgs(lidx).mask().is_bound2() ) { | |
1866 sprintf(buf,"L%d",lidx); // No register binding yet | |
1867 } else { // Hah! We have a bound machine register | |
1868 print_reg( lrgs(lidx).reg(), this, buf ); | |
1869 } | |
1870 } | |
1871 return buf+strlen(buf); | |
1872 } | |
1873 | |
1874 //----------------------dump_for_spill_split_recycle-------------------------- | |
1875 void PhaseChaitin::dump_for_spill_split_recycle() const { | |
1876 if( WizardMode && (PrintCompilation || PrintOpto) ) { | |
1877 // Display which live ranges need to be split and the allocator's state | |
1878 tty->print_cr("Graph-Coloring Iteration %d will split the following live ranges", _trip_cnt); | |
1879 for( uint bidx = 1; bidx < _maxlrg; bidx++ ) { | |
1880 if( lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG ) { | |
1881 tty->print("L%d: ", bidx); | |
1882 lrgs(bidx).dump(); | |
1883 } | |
1884 } | |
1885 tty->cr(); | |
1886 dump(); | |
1887 } | |
1888 } | |
1889 | |
1890 //------------------------------dump_frame------------------------------------ | |
1891 void PhaseChaitin::dump_frame() const { | |
1892 const char *fp = OptoReg::regname(OptoReg::c_frame_pointer); | |
1893 const TypeTuple *domain = C->tf()->domain(); | |
1894 const int argcnt = domain->cnt() - TypeFunc::Parms; | |
1895 | |
1896 // Incoming arguments in registers dump | |
1897 for( int k = 0; k < argcnt; k++ ) { | |
1898 OptoReg::Name parmreg = _matcher._parm_regs[k].first(); | |
1899 if( OptoReg::is_reg(parmreg)) { | |
1900 const char *reg_name = OptoReg::regname(parmreg); | |
1901 tty->print("#r%3.3d %s", parmreg, reg_name); | |
1902 parmreg = _matcher._parm_regs[k].second(); | |
1903 if( OptoReg::is_reg(parmreg)) { | |
1904 tty->print(":%s", OptoReg::regname(parmreg)); | |
1905 } | |
1906 tty->print(" : parm %d: ", k); | |
1907 domain->field_at(k + TypeFunc::Parms)->dump(); | |
1908 tty->print_cr(""); | |
1909 } | |
1910 } | |
1911 | |
1912 // Check for un-owned padding above incoming args | |
1913 OptoReg::Name reg = _matcher._new_SP; | |
1914 if( reg > _matcher._in_arg_limit ) { | |
1915 reg = OptoReg::add(reg, -1); | |
1916 tty->print_cr("#r%3.3d %s+%2d: pad0, owned by CALLER", reg, fp, reg2offset_unchecked(reg)); | |
1917 } | |
1918 | |
1919 // Incoming argument area dump | |
1920 OptoReg::Name begin_in_arg = OptoReg::add(_matcher._old_SP,C->out_preserve_stack_slots()); | |
1921 while( reg > begin_in_arg ) { | |
1922 reg = OptoReg::add(reg, -1); | |
1923 tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg)); | |
1924 int j; | |
1925 for( j = 0; j < argcnt; j++) { | |
1926 if( _matcher._parm_regs[j].first() == reg || | |
1927 _matcher._parm_regs[j].second() == reg ) { | |
1928 tty->print("parm %d: ",j); | |
1929 domain->field_at(j + TypeFunc::Parms)->dump(); | |
1930 tty->print_cr(""); | |
1931 break; | |
1932 } | |
1933 } | |
1934 if( j >= argcnt ) | |
1935 tty->print_cr("HOLE, owned by SELF"); | |
1936 } | |
1937 | |
1938 // Old outgoing preserve area | |
1939 while( reg > _matcher._old_SP ) { | |
1940 reg = OptoReg::add(reg, -1); | |
1941 tty->print_cr("#r%3.3d %s+%2d: old out preserve",reg,fp,reg2offset_unchecked(reg)); | |
1942 } | |
1943 | |
1944 // Old SP | |
1945 tty->print_cr("# -- Old %s -- Framesize: %d --",fp, | |
1946 reg2offset_unchecked(OptoReg::add(_matcher._old_SP,-1)) - reg2offset_unchecked(_matcher._new_SP)+jintSize); | |
1947 | |
1948 // Preserve area dump | |
1949 reg = OptoReg::add(reg, -1); | |
1950 while( OptoReg::is_stack(reg)) { | |
1951 tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg)); | |
1952 if( _matcher.return_addr() == reg ) | |
1953 tty->print_cr("return address"); | |
1954 else if( _matcher.return_addr() == OptoReg::add(reg,1) && | |
1955 VerifyStackAtCalls ) | |
1956 tty->print_cr("0xBADB100D +VerifyStackAtCalls"); | |
1957 else if ((int)OptoReg::reg2stack(reg) < C->fixed_slots()) | |
1958 tty->print_cr("Fixed slot %d", OptoReg::reg2stack(reg)); | |
1959 else | |
1960 tty->print_cr("pad2, in_preserve"); | |
1961 reg = OptoReg::add(reg, -1); | |
1962 } | |
1963 | |
1964 // Spill area dump | |
1965 reg = OptoReg::add(_matcher._new_SP, _framesize ); | |
1966 while( reg > _matcher._out_arg_limit ) { | |
1967 reg = OptoReg::add(reg, -1); | |
1968 tty->print_cr("#r%3.3d %s+%2d: spill",reg,fp,reg2offset_unchecked(reg)); | |
1969 } | |
1970 | |
1971 // Outgoing argument area dump | |
1972 while( reg > OptoReg::add(_matcher._new_SP, C->out_preserve_stack_slots()) ) { | |
1973 reg = OptoReg::add(reg, -1); | |
1974 tty->print_cr("#r%3.3d %s+%2d: outgoing argument",reg,fp,reg2offset_unchecked(reg)); | |
1975 } | |
1976 | |
1977 // Outgoing new preserve area | |
1978 while( reg > _matcher._new_SP ) { | |
1979 reg = OptoReg::add(reg, -1); | |
1980 tty->print_cr("#r%3.3d %s+%2d: new out preserve",reg,fp,reg2offset_unchecked(reg)); | |
1981 } | |
1982 tty->print_cr("#"); | |
1983 } | |
1984 | |
1985 //------------------------------dump_bb---------------------------------------- | |
1986 void PhaseChaitin::dump_bb( uint pre_order ) const { | |
1987 tty->print_cr("---dump of B%d---",pre_order); | |
1988 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
1989 Block *b = _cfg._blocks[i]; | |
1990 if( b->_pre_order == pre_order ) | |
1991 dump(b); | |
1992 } | |
1993 } | |
1994 | |
1995 //------------------------------dump_lrg--------------------------------------- | |
2016
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
1996 void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const { |
0 | 1997 tty->print_cr("---dump of L%d---",lidx); |
1998 | |
1999 if( _ifg ) { | |
2000 if( lidx >= _maxlrg ) { | |
2001 tty->print("Attempt to print live range index beyond max live range.\n"); | |
2002 return; | |
2003 } | |
2004 tty->print("L%d: ",lidx); | |
2016
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2005 if( lidx < _ifg->_maxlrg ) lrgs(lidx).dump( ); |
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2006 else tty->print_cr("new LRG"); |
0 | 2007 } |
2016
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2008 if( _ifg && lidx < _ifg->_maxlrg) { |
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2009 tty->print("Neighbors: %d - ", _ifg->neighbor_cnt(lidx)); |
0 | 2010 _ifg->neighbors(lidx)->dump(); |
2011 tty->cr(); | |
2012 } | |
2013 // For all blocks | |
2014 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
2015 Block *b = _cfg._blocks[i]; | |
2016 int dump_once = 0; | |
2017 | |
2018 // For all instructions | |
2019 for( uint j = 0; j < b->_nodes.size(); j++ ) { | |
2020 Node *n = b->_nodes[j]; | |
2021 if( Find_const(n) == lidx ) { | |
2022 if( !dump_once++ ) { | |
2023 tty->cr(); | |
2024 b->dump_head( &_cfg._bbs ); | |
2025 } | |
2026 dump(n); | |
2027 continue; | |
2028 } | |
2016
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2029 if (!defs_only) { |
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2030 uint cnt = n->req(); |
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2031 for( uint k = 1; k < cnt; k++ ) { |
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2032 Node *m = n->in(k); |
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2033 if (!m) continue; // be robust in the dumper |
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2034 if( Find_const(m) == lidx ) { |
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2035 if( !dump_once++ ) { |
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2036 tty->cr(); |
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2037 b->dump_head( &_cfg._bbs ); |
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2038 } |
361783318e7e
7004940: CTW: assert(!def_outside->member(r)) failed: Use of external LRG overlaps the same LRG
never
parents:
1972
diff
changeset
|
2039 dump(n); |
0 | 2040 } |
2041 } | |
2042 } | |
2043 } | |
2044 } // End of per-block dump | |
2045 tty->cr(); | |
2046 } | |
2047 #endif // not PRODUCT | |
2048 | |
2049 //------------------------------print_chaitin_statistics------------------------------- | |
2050 int PhaseChaitin::_final_loads = 0; | |
2051 int PhaseChaitin::_final_stores = 0; | |
2052 int PhaseChaitin::_final_memoves= 0; | |
2053 int PhaseChaitin::_final_copies = 0; | |
2054 double PhaseChaitin::_final_load_cost = 0; | |
2055 double PhaseChaitin::_final_store_cost = 0; | |
2056 double PhaseChaitin::_final_memove_cost= 0; | |
2057 double PhaseChaitin::_final_copy_cost = 0; | |
2058 int PhaseChaitin::_conserv_coalesce = 0; | |
2059 int PhaseChaitin::_conserv_coalesce_pair = 0; | |
2060 int PhaseChaitin::_conserv_coalesce_trie = 0; | |
2061 int PhaseChaitin::_conserv_coalesce_quad = 0; | |
2062 int PhaseChaitin::_post_alloc = 0; | |
2063 int PhaseChaitin::_lost_opp_pp_coalesce = 0; | |
2064 int PhaseChaitin::_lost_opp_cflow_coalesce = 0; | |
2065 int PhaseChaitin::_used_cisc_instructions = 0; | |
2066 int PhaseChaitin::_unused_cisc_instructions = 0; | |
2067 int PhaseChaitin::_allocator_attempts = 0; | |
2068 int PhaseChaitin::_allocator_successes = 0; | |
2069 | |
2070 #ifndef PRODUCT | |
2071 uint PhaseChaitin::_high_pressure = 0; | |
2072 uint PhaseChaitin::_low_pressure = 0; | |
2073 | |
2074 void PhaseChaitin::print_chaitin_statistics() { | |
2075 tty->print_cr("Inserted %d spill loads, %d spill stores, %d mem-mem moves and %d copies.", _final_loads, _final_stores, _final_memoves, _final_copies); | |
2076 tty->print_cr("Total load cost= %6.0f, store cost = %6.0f, mem-mem cost = %5.2f, copy cost = %5.0f.", _final_load_cost, _final_store_cost, _final_memove_cost, _final_copy_cost); | |
2077 tty->print_cr("Adjusted spill cost = %7.0f.", | |
2078 _final_load_cost*4.0 + _final_store_cost * 2.0 + | |
2079 _final_copy_cost*1.0 + _final_memove_cost*12.0); | |
2080 tty->print("Conservatively coalesced %d copies, %d pairs", | |
2081 _conserv_coalesce, _conserv_coalesce_pair); | |
2082 if( _conserv_coalesce_trie || _conserv_coalesce_quad ) | |
2083 tty->print(", %d tries, %d quads", _conserv_coalesce_trie, _conserv_coalesce_quad); | |
2084 tty->print_cr(", %d post alloc.", _post_alloc); | |
2085 if( _lost_opp_pp_coalesce || _lost_opp_cflow_coalesce ) | |
2086 tty->print_cr("Lost coalesce opportunity, %d private-private, and %d cflow interfered.", | |
2087 _lost_opp_pp_coalesce, _lost_opp_cflow_coalesce ); | |
2088 if( _used_cisc_instructions || _unused_cisc_instructions ) | |
2089 tty->print_cr("Used cisc instruction %d, remained in register %d", | |
2090 _used_cisc_instructions, _unused_cisc_instructions); | |
2091 if( _allocator_successes != 0 ) | |
2092 tty->print_cr("Average allocation trips %f", (float)_allocator_attempts/(float)_allocator_successes); | |
2093 tty->print_cr("High Pressure Blocks = %d, Low Pressure Blocks = %d", _high_pressure, _low_pressure); | |
2094 } | |
2095 #endif // not PRODUCT |