Mercurial > hg > truffle
annotate src/share/vm/opto/chaitin.cpp @ 1721:413ad0331a0c
6977924: Changes for 6975078 produce build error with certain gcc versions
Summary: The changes introduced for 6975078 assign badHeapOopVal to the _allocation field in the ResourceObj class. In 32 bit linux builds with certain versions of gcc this assignment will be flagged as an error while compiling allocation.cpp. In 32 bit builds the constant value badHeapOopVal (which is cast to an intptr_t) is negative. The _allocation field is typed as an unsigned intptr_t and gcc catches this as an error.
Reviewed-by: jcoomes, ysr, phh
author | johnc |
---|---|
date | Wed, 18 Aug 2010 10:59:06 -0700 |
parents | 0e35fa8ebccd |
children | f95d63e2154a |
rev | line source |
---|---|
0 | 1 /* |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1012
diff
changeset
|
2 * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1012
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1012
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1012
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
25 #include "incls/_precompiled.incl" | |
26 #include "incls/_chaitin.cpp.incl" | |
27 | |
28 //============================================================================= | |
29 | |
30 #ifndef PRODUCT | |
31 void LRG::dump( ) const { | |
32 ttyLocker ttyl; | |
33 tty->print("%d ",num_regs()); | |
34 _mask.dump(); | |
35 if( _msize_valid ) { | |
36 if( mask_size() == compute_mask_size() ) tty->print(", #%d ",_mask_size); | |
37 else tty->print(", #!!!_%d_vs_%d ",_mask_size,_mask.Size()); | |
38 } else { | |
39 tty->print(", #?(%d) ",_mask.Size()); | |
40 } | |
41 | |
42 tty->print("EffDeg: "); | |
43 if( _degree_valid ) tty->print( "%d ", _eff_degree ); | |
44 else tty->print("? "); | |
45 | |
295
ea18057223c4
6732194: Data corruption dependent on -server/-client/-Xbatch
never
parents:
196
diff
changeset
|
46 if( is_multidef() ) { |
0 | 47 tty->print("MultiDef "); |
48 if (_defs != NULL) { | |
49 tty->print("("); | |
50 for (int i = 0; i < _defs->length(); i++) { | |
51 tty->print("N%d ", _defs->at(i)->_idx); | |
52 } | |
53 tty->print(") "); | |
54 } | |
55 } | |
56 else if( _def == 0 ) tty->print("Dead "); | |
57 else tty->print("Def: N%d ",_def->_idx); | |
58 | |
59 tty->print("Cost:%4.2g Area:%4.2g Score:%4.2g ",_cost,_area, score()); | |
60 // Flags | |
61 if( _is_oop ) tty->print("Oop "); | |
62 if( _is_float ) tty->print("Float "); | |
63 if( _was_spilled1 ) tty->print("Spilled "); | |
64 if( _was_spilled2 ) tty->print("Spilled2 "); | |
65 if( _direct_conflict ) tty->print("Direct_conflict "); | |
66 if( _fat_proj ) tty->print("Fat "); | |
67 if( _was_lo ) tty->print("Lo "); | |
68 if( _has_copy ) tty->print("Copy "); | |
69 if( _at_risk ) tty->print("Risk "); | |
70 | |
71 if( _must_spill ) tty->print("Must_spill "); | |
72 if( _is_bound ) tty->print("Bound "); | |
73 if( _msize_valid ) { | |
74 if( _degree_valid && lo_degree() ) tty->print("Trivial "); | |
75 } | |
76 | |
77 tty->cr(); | |
78 } | |
79 #endif | |
80 | |
81 //------------------------------score------------------------------------------ | |
82 // Compute score from cost and area. Low score is best to spill. | |
83 static double raw_score( double cost, double area ) { | |
84 return cost - (area*RegisterCostAreaRatio) * 1.52588e-5; | |
85 } | |
86 | |
87 double LRG::score() const { | |
88 // Scale _area by RegisterCostAreaRatio/64K then subtract from cost. | |
89 // Bigger area lowers score, encourages spilling this live range. | |
90 // Bigger cost raise score, prevents spilling this live range. | |
91 // (Note: 1/65536 is the magic constant below; I dont trust the C optimizer | |
92 // to turn a divide by a constant into a multiply by the reciprical). | |
93 double score = raw_score( _cost, _area); | |
94 | |
95 // Account for area. Basically, LRGs covering large areas are better | |
96 // to spill because more other LRGs get freed up. | |
97 if( _area == 0.0 ) // No area? Then no progress to spill | |
98 return 1e35; | |
99 | |
100 if( _was_spilled2 ) // If spilled once before, we are unlikely | |
101 return score + 1e30; // to make progress again. | |
102 | |
103 if( _cost >= _area*3.0 ) // Tiny area relative to cost | |
104 return score + 1e17; // Probably no progress to spill | |
105 | |
106 if( (_cost+_cost) >= _area*3.0 ) // Small area relative to cost | |
107 return score + 1e10; // Likely no progress to spill | |
108 | |
109 return score; | |
110 } | |
111 | |
112 //------------------------------LRG_List--------------------------------------- | |
113 LRG_List::LRG_List( uint max ) : _cnt(max), _max(max), _lidxs(NEW_RESOURCE_ARRAY(uint,max)) { | |
114 memset( _lidxs, 0, sizeof(uint)*max ); | |
115 } | |
116 | |
117 void LRG_List::extend( uint nidx, uint lidx ) { | |
118 _nesting.check(); | |
119 if( nidx >= _max ) { | |
120 uint size = 16; | |
121 while( size <= nidx ) size <<=1; | |
122 _lidxs = REALLOC_RESOURCE_ARRAY( uint, _lidxs, _max, size ); | |
123 _max = size; | |
124 } | |
125 while( _cnt <= nidx ) | |
126 _lidxs[_cnt++] = 0; | |
127 _lidxs[nidx] = lidx; | |
128 } | |
129 | |
130 #define NUMBUCKS 3 | |
131 | |
132 //------------------------------Chaitin---------------------------------------- | |
133 PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher) | |
134 : PhaseRegAlloc(unique, cfg, matcher, | |
135 #ifndef PRODUCT | |
136 print_chaitin_statistics | |
137 #else | |
138 NULL | |
139 #endif | |
140 ), | |
141 _names(unique), _uf_map(unique), | |
142 _maxlrg(0), _live(0), | |
143 _spilled_once(Thread::current()->resource_area()), | |
144 _spilled_twice(Thread::current()->resource_area()), | |
145 _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0), | |
146 _oldphi(unique) | |
147 #ifndef PRODUCT | |
148 , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling")) | |
149 #endif | |
150 { | |
151 NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); ) | |
673 | 152 |
153 _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg._outer_loop_freq); | |
154 | |
0 | 155 uint i,j; |
156 // Build a list of basic blocks, sorted by frequency | |
157 _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks ); | |
158 // Experiment with sorting strategies to speed compilation | |
159 double cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket | |
160 Block **buckets[NUMBUCKS]; // Array of buckets | |
161 uint buckcnt[NUMBUCKS]; // Array of bucket counters | |
162 double buckval[NUMBUCKS]; // Array of bucket value cutoffs | |
163 for( i = 0; i < NUMBUCKS; i++ ) { | |
164 buckets[i] = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks ); | |
165 buckcnt[i] = 0; | |
166 // Bump by three orders of magnitude each time | |
167 cutoff *= 0.001; | |
168 buckval[i] = cutoff; | |
169 for( j = 0; j < _cfg._num_blocks; j++ ) { | |
170 buckets[i][j] = NULL; | |
171 } | |
172 } | |
173 // Sort blocks into buckets | |
174 for( i = 0; i < _cfg._num_blocks; i++ ) { | |
175 for( j = 0; j < NUMBUCKS; j++ ) { | |
176 if( (j == NUMBUCKS-1) || (_cfg._blocks[i]->_freq > buckval[j]) ) { | |
177 // Assign block to end of list for appropriate bucket | |
178 buckets[j][buckcnt[j]++] = _cfg._blocks[i]; | |
179 break; // kick out of inner loop | |
180 } | |
181 } | |
182 } | |
183 // Dump buckets into final block array | |
184 uint blkcnt = 0; | |
185 for( i = 0; i < NUMBUCKS; i++ ) { | |
186 for( j = 0; j < buckcnt[i]; j++ ) { | |
187 _blks[blkcnt++] = buckets[i][j]; | |
188 } | |
189 } | |
190 | |
191 assert(blkcnt == _cfg._num_blocks, "Block array not totally filled"); | |
192 } | |
193 | |
194 void PhaseChaitin::Register_Allocate() { | |
195 | |
196 // Above the OLD FP (and in registers) are the incoming arguments. Stack | |
197 // slots in this area are called "arg_slots". Above the NEW FP (and in | |
198 // registers) is the outgoing argument area; above that is the spill/temp | |
199 // area. These are all "frame_slots". Arg_slots start at the zero | |
200 // stack_slots and count up to the known arg_size. Frame_slots start at | |
201 // the stack_slot #arg_size and go up. After allocation I map stack | |
202 // slots to actual offsets. Stack-slots in the arg_slot area are biased | |
203 // by the frame_size; stack-slots in the frame_slot area are biased by 0. | |
204 | |
205 _trip_cnt = 0; | |
206 _alternate = 0; | |
207 _matcher._allocation_started = true; | |
208 | |
209 ResourceArea live_arena; // Arena for liveness & IFG info | |
210 ResourceMark rm(&live_arena); | |
211 | |
212 // Need live-ness for the IFG; need the IFG for coalescing. If the | |
213 // liveness is JUST for coalescing, then I can get some mileage by renaming | |
214 // all copy-related live ranges low and then using the max copy-related | |
215 // live range as a cut-off for LIVE and the IFG. In other words, I can | |
216 // build a subset of LIVE and IFG just for copies. | |
217 PhaseLive live(_cfg,_names,&live_arena); | |
218 | |
219 // Need IFG for coalescing and coloring | |
220 PhaseIFG ifg( &live_arena ); | |
221 _ifg = &ifg; | |
222 | |
223 if (C->unique() > _names.Size()) _names.extend(C->unique()-1, 0); | |
224 | |
225 // Come out of SSA world to the Named world. Assign (virtual) registers to | |
226 // Nodes. Use the same register for all inputs and the output of PhiNodes | |
227 // - effectively ending SSA form. This requires either coalescing live | |
228 // ranges or inserting copies. For the moment, we insert "virtual copies" | |
229 // - we pretend there is a copy prior to each Phi in predecessor blocks. | |
230 // We will attempt to coalesce such "virtual copies" before we manifest | |
231 // them for real. | |
232 de_ssa(); | |
233 | |
566
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
234 #ifdef ASSERT |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
235 // Veify the graph before RA. |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
236 verify(&live_arena); |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
237 #endif |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
238 |
0 | 239 { |
240 NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) | |
241 _live = NULL; // Mark live as being not available | |
242 rm.reset_to_mark(); // Reclaim working storage | |
243 IndexSet::reset_memory(C, &live_arena); | |
244 ifg.init(_maxlrg); // Empty IFG | |
245 gather_lrg_masks( false ); // Collect LRG masks | |
246 live.compute( _maxlrg ); // Compute liveness | |
247 _live = &live; // Mark LIVE as being available | |
248 } | |
249 | |
250 // Base pointers are currently "used" by instructions which define new | |
251 // derived pointers. This makes base pointers live up to the where the | |
252 // derived pointer is made, but not beyond. Really, they need to be live | |
253 // across any GC point where the derived value is live. So this code looks | |
254 // at all the GC points, and "stretches" the live range of any base pointer | |
255 // to the GC point. | |
256 if( stretch_base_pointer_live_ranges(&live_arena) ) { | |
257 NOT_PRODUCT( Compile::TracePhase t3("computeLive (sbplr)", &_t_computeLive, TimeCompiler); ) | |
258 // Since some live range stretched, I need to recompute live | |
259 _live = NULL; | |
260 rm.reset_to_mark(); // Reclaim working storage | |
261 IndexSet::reset_memory(C, &live_arena); | |
262 ifg.init(_maxlrg); | |
263 gather_lrg_masks( false ); | |
264 live.compute( _maxlrg ); | |
265 _live = &live; | |
266 } | |
267 // Create the interference graph using virtual copies | |
268 build_ifg_virtual( ); // Include stack slots this time | |
269 | |
270 // Aggressive (but pessimistic) copy coalescing. | |
271 // This pass works on virtual copies. Any virtual copies which are not | |
272 // coalesced get manifested as actual copies | |
273 { | |
274 // The IFG is/was triangular. I am 'squaring it up' so Union can run | |
275 // faster. Union requires a 'for all' operation which is slow on the | |
276 // triangular adjacency matrix (quick reminder: the IFG is 'sparse' - | |
277 // meaning I can visit all the Nodes neighbors less than a Node in time | |
278 // O(# of neighbors), but I have to visit all the Nodes greater than a | |
279 // given Node and search them for an instance, i.e., time O(#MaxLRG)). | |
280 _ifg->SquareUp(); | |
281 | |
282 PhaseAggressiveCoalesce coalesce( *this ); | |
283 coalesce.coalesce_driver( ); | |
284 // Insert un-coalesced copies. Visit all Phis. Where inputs to a Phi do | |
285 // not match the Phi itself, insert a copy. | |
286 coalesce.insert_copies(_matcher); | |
287 } | |
288 | |
289 // After aggressive coalesce, attempt a first cut at coloring. | |
290 // To color, we need the IFG and for that we need LIVE. | |
291 { | |
292 NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) | |
293 _live = NULL; | |
294 rm.reset_to_mark(); // Reclaim working storage | |
295 IndexSet::reset_memory(C, &live_arena); | |
296 ifg.init(_maxlrg); | |
297 gather_lrg_masks( true ); | |
298 live.compute( _maxlrg ); | |
299 _live = &live; | |
300 } | |
301 | |
302 // Build physical interference graph | |
303 uint must_spill = 0; | |
304 must_spill = build_ifg_physical( &live_arena ); | |
305 // If we have a guaranteed spill, might as well spill now | |
306 if( must_spill ) { | |
307 if( !_maxlrg ) return; | |
308 // Bail out if unique gets too large (ie - unique > MaxNodeLimit) | |
309 C->check_node_count(10*must_spill, "out of nodes before split"); | |
310 if (C->failing()) return; | |
311 _maxlrg = Split( _maxlrg ); // Split spilling LRG everywhere | |
312 // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor) | |
313 // or we failed to split | |
314 C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split"); | |
315 if (C->failing()) return; | |
316 | |
317 NOT_PRODUCT( C->verify_graph_edges(); ) | |
318 | |
319 compact(); // Compact LRGs; return new lower max lrg | |
320 | |
321 { | |
322 NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) | |
323 _live = NULL; | |
324 rm.reset_to_mark(); // Reclaim working storage | |
325 IndexSet::reset_memory(C, &live_arena); | |
326 ifg.init(_maxlrg); // Build a new interference graph | |
327 gather_lrg_masks( true ); // Collect intersect mask | |
328 live.compute( _maxlrg ); // Compute LIVE | |
329 _live = &live; | |
330 } | |
331 build_ifg_physical( &live_arena ); | |
332 _ifg->SquareUp(); | |
333 _ifg->Compute_Effective_Degree(); | |
334 // Only do conservative coalescing if requested | |
335 if( OptoCoalesce ) { | |
336 // Conservative (and pessimistic) copy coalescing of those spills | |
337 PhaseConservativeCoalesce coalesce( *this ); | |
338 // If max live ranges greater than cutoff, don't color the stack. | |
339 // This cutoff can be larger than below since it is only done once. | |
340 coalesce.coalesce_driver( ); | |
341 } | |
342 compress_uf_map_for_nodes(); | |
343 | |
344 #ifdef ASSERT | |
566
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
345 verify(&live_arena, true); |
0 | 346 #endif |
347 } else { | |
348 ifg.SquareUp(); | |
349 ifg.Compute_Effective_Degree(); | |
350 #ifdef ASSERT | |
351 set_was_low(); | |
352 #endif | |
353 } | |
354 | |
355 // Prepare for Simplify & Select | |
356 cache_lrg_info(); // Count degree of LRGs | |
357 | |
358 // Simplify the InterFerence Graph by removing LRGs of low degree. | |
359 // LRGs of low degree are trivially colorable. | |
360 Simplify(); | |
361 | |
362 // Select colors by re-inserting LRGs back into the IFG in reverse order. | |
363 // Return whether or not something spills. | |
364 uint spills = Select( ); | |
365 | |
366 // If we spill, split and recycle the entire thing | |
367 while( spills ) { | |
368 if( _trip_cnt++ > 24 ) { | |
369 DEBUG_ONLY( dump_for_spill_split_recycle(); ) | |
370 if( _trip_cnt > 27 ) { | |
371 C->record_method_not_compilable("failed spill-split-recycle sanity check"); | |
372 return; | |
373 } | |
374 } | |
375 | |
376 if( !_maxlrg ) return; | |
377 _maxlrg = Split( _maxlrg ); // Split spilling LRG everywhere | |
378 // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor) | |
379 C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after split"); | |
380 if (C->failing()) return; | |
381 | |
382 compact(); // Compact LRGs; return new lower max lrg | |
383 | |
384 // Nuke the live-ness and interference graph and LiveRanGe info | |
385 { | |
386 NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) | |
387 _live = NULL; | |
388 rm.reset_to_mark(); // Reclaim working storage | |
389 IndexSet::reset_memory(C, &live_arena); | |
390 ifg.init(_maxlrg); | |
391 | |
392 // Create LiveRanGe array. | |
393 // Intersect register masks for all USEs and DEFs | |
394 gather_lrg_masks( true ); | |
395 live.compute( _maxlrg ); | |
396 _live = &live; | |
397 } | |
398 must_spill = build_ifg_physical( &live_arena ); | |
399 _ifg->SquareUp(); | |
400 _ifg->Compute_Effective_Degree(); | |
401 | |
402 // Only do conservative coalescing if requested | |
403 if( OptoCoalesce ) { | |
404 // Conservative (and pessimistic) copy coalescing | |
405 PhaseConservativeCoalesce coalesce( *this ); | |
406 // Check for few live ranges determines how aggressive coalesce is. | |
407 coalesce.coalesce_driver( ); | |
408 } | |
409 compress_uf_map_for_nodes(); | |
410 #ifdef ASSERT | |
566
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
411 verify(&live_arena, true); |
0 | 412 #endif |
413 cache_lrg_info(); // Count degree of LRGs | |
414 | |
415 // Simplify the InterFerence Graph by removing LRGs of low degree. | |
416 // LRGs of low degree are trivially colorable. | |
417 Simplify(); | |
418 | |
419 // Select colors by re-inserting LRGs back into the IFG in reverse order. | |
420 // Return whether or not something spills. | |
421 spills = Select( ); | |
422 } | |
423 | |
424 // Count number of Simplify-Select trips per coloring success. | |
425 _allocator_attempts += _trip_cnt + 1; | |
426 _allocator_successes += 1; | |
427 | |
428 // Peephole remove copies | |
429 post_allocate_copy_removal(); | |
430 | |
566
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
431 #ifdef ASSERT |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
432 // Veify the graph after RA. |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
433 verify(&live_arena); |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
434 #endif |
91263420e1c6
6791852: assert(b->_nodes[insidx] == n,"got insidx set incorrectly")
kvn
parents:
550
diff
changeset
|
435 |
0 | 436 // max_reg is past the largest *register* used. |
437 // Convert that to a frame_slot number. | |
438 if( _max_reg <= _matcher._new_SP ) | |
439 _framesize = C->out_preserve_stack_slots(); | |
440 else _framesize = _max_reg -_matcher._new_SP; | |
441 assert((int)(_matcher._new_SP+_framesize) >= (int)_matcher._out_arg_limit, "framesize must be large enough"); | |
442 | |
443 // This frame must preserve the required fp alignment | |
419
0bf25c4807f9
6761594: framesize rounding code rounds using wrong units leading to slightly oversized frames
never
parents:
295
diff
changeset
|
444 _framesize = round_to(_framesize, Matcher::stack_alignment_in_slots()); |
0 | 445 assert( _framesize >= 0 && _framesize <= 1000000, "sanity check" ); |
446 #ifndef PRODUCT | |
447 _total_framesize += _framesize; | |
448 if( (int)_framesize > _max_framesize ) | |
449 _max_framesize = _framesize; | |
450 #endif | |
451 | |
452 // Convert CISC spills | |
453 fixup_spills(); | |
454 | |
455 // Log regalloc results | |
456 CompileLog* log = Compile::current()->log(); | |
457 if (log != NULL) { | |
458 log->elem("regalloc attempts='%d' success='%d'", _trip_cnt, !C->failing()); | |
459 } | |
460 | |
461 if (C->failing()) return; | |
462 | |
463 NOT_PRODUCT( C->verify_graph_edges(); ) | |
464 | |
465 // Move important info out of the live_arena to longer lasting storage. | |
466 alloc_node_regs(_names.Size()); | |
467 for( uint i=0; i < _names.Size(); i++ ) { | |
468 if( _names[i] ) { // Live range associated with Node? | |
469 LRG &lrg = lrgs( _names[i] ); | |
470 if( lrg.num_regs() == 1 ) { | |
471 _node_regs[i].set1( lrg.reg() ); | |
472 } else { // Must be a register-pair | |
473 if( !lrg._fat_proj ) { // Must be aligned adjacent register pair | |
474 // Live ranges record the highest register in their mask. | |
475 // We want the low register for the AD file writer's convenience. | |
476 _node_regs[i].set2( OptoReg::add(lrg.reg(),-1) ); | |
477 } else { // Misaligned; extract 2 bits | |
478 OptoReg::Name hi = lrg.reg(); // Get hi register | |
479 lrg.Remove(hi); // Yank from mask | |
480 int lo = lrg.mask().find_first_elem(); // Find lo | |
481 _node_regs[i].set_pair( hi, lo ); | |
482 } | |
483 } | |
484 if( lrg._is_oop ) _node_oops.set(i); | |
485 } else { | |
486 _node_regs[i].set_bad(); | |
487 } | |
488 } | |
489 | |
490 // Done! | |
491 _live = NULL; | |
492 _ifg = NULL; | |
493 C->set_indexSet_arena(NULL); // ResourceArea is at end of scope | |
494 } | |
495 | |
496 //------------------------------de_ssa----------------------------------------- | |
497 void PhaseChaitin::de_ssa() { | |
498 // Set initial Names for all Nodes. Most Nodes get the virtual register | |
499 // number. A few get the ZERO live range number. These do not | |
500 // get allocated, but instead rely on correct scheduling to ensure that | |
501 // only one instance is simultaneously live at a time. | |
502 uint lr_counter = 1; | |
503 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
504 Block *b = _cfg._blocks[i]; | |
505 uint cnt = b->_nodes.size(); | |
506 | |
507 // Handle all the normal Nodes in the block | |
508 for( uint j = 0; j < cnt; j++ ) { | |
509 Node *n = b->_nodes[j]; | |
510 // Pre-color to the zero live range, or pick virtual register | |
511 const RegMask &rm = n->out_RegMask(); | |
512 _names.map( n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0 ); | |
513 } | |
514 } | |
515 // Reset the Union-Find mapping to be identity | |
516 reset_uf_map(lr_counter); | |
517 } | |
518 | |
519 | |
520 //------------------------------gather_lrg_masks------------------------------- | |
521 // Gather LiveRanGe information, including register masks. Modification of | |
522 // cisc spillable in_RegMasks should not be done before AggressiveCoalesce. | |
523 void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { | |
524 | |
525 // Nail down the frame pointer live range | |
526 uint fp_lrg = n2lidx(_cfg._root->in(1)->in(TypeFunc::FramePtr)); | |
527 lrgs(fp_lrg)._cost += 1e12; // Cost is infinite | |
528 | |
529 // For all blocks | |
530 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
531 Block *b = _cfg._blocks[i]; | |
532 | |
533 // For all instructions | |
534 for( uint j = 1; j < b->_nodes.size(); j++ ) { | |
535 Node *n = b->_nodes[j]; | |
536 uint input_edge_start =1; // Skip control most nodes | |
537 if( n->is_Mach() ) input_edge_start = n->as_Mach()->oper_input_base(); | |
538 uint idx = n->is_Copy(); | |
539 | |
540 // Get virtual register number, same as LiveRanGe index | |
541 uint vreg = n2lidx(n); | |
542 LRG &lrg = lrgs(vreg); | |
543 if( vreg ) { // No vreg means un-allocable (e.g. memory) | |
544 | |
545 // Collect has-copy bit | |
546 if( idx ) { | |
547 lrg._has_copy = 1; | |
548 uint clidx = n2lidx(n->in(idx)); | |
549 LRG ©_src = lrgs(clidx); | |
550 copy_src._has_copy = 1; | |
551 } | |
552 | |
553 // Check for float-vs-int live range (used in register-pressure | |
554 // calculations) | |
555 const Type *n_type = n->bottom_type(); | |
556 if( n_type->is_floatingpoint() ) | |
557 lrg._is_float = 1; | |
558 | |
559 // Check for twice prior spilling. Once prior spilling might have | |
560 // spilled 'soft', 2nd prior spill should have spilled 'hard' and | |
561 // further spilling is unlikely to make progress. | |
562 if( _spilled_once.test(n->_idx) ) { | |
563 lrg._was_spilled1 = 1; | |
564 if( _spilled_twice.test(n->_idx) ) | |
565 lrg._was_spilled2 = 1; | |
566 } | |
567 | |
568 #ifndef PRODUCT | |
569 if (trace_spilling() && lrg._def != NULL) { | |
570 // collect defs for MultiDef printing | |
571 if (lrg._defs == NULL) { | |
1685 | 572 lrg._defs = new (_ifg->_arena) GrowableArray<Node*>(_ifg->_arena, 2, 0, NULL); |
0 | 573 lrg._defs->append(lrg._def); |
574 } | |
575 lrg._defs->append(n); | |
576 } | |
577 #endif | |
578 | |
579 // Check for a single def LRG; these can spill nicely | |
580 // via rematerialization. Flag as NULL for no def found | |
581 // yet, or 'n' for single def or -1 for many defs. | |
582 lrg._def = lrg._def ? NodeSentinel : n; | |
583 | |
584 // Limit result register mask to acceptable registers | |
585 const RegMask &rm = n->out_RegMask(); | |
586 lrg.AND( rm ); | |
587 // Check for bound register masks | |
588 const RegMask &lrgmask = lrg.mask(); | |
589 if( lrgmask.is_bound1() || lrgmask.is_bound2() ) | |
590 lrg._is_bound = 1; | |
591 | |
592 // Check for maximum frequency value | |
593 if( lrg._maxfreq < b->_freq ) | |
594 lrg._maxfreq = b->_freq; | |
595 | |
596 int ireg = n->ideal_reg(); | |
597 assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP, | |
598 "oops must be in Op_RegP's" ); | |
599 // Check for oop-iness, or long/double | |
600 // Check for multi-kill projection | |
601 switch( ireg ) { | |
602 case MachProjNode::fat_proj: | |
603 // Fat projections have size equal to number of registers killed | |
604 lrg.set_num_regs(rm.Size()); | |
605 lrg.set_reg_pressure(lrg.num_regs()); | |
606 lrg._fat_proj = 1; | |
607 lrg._is_bound = 1; | |
608 break; | |
609 case Op_RegP: | |
610 #ifdef _LP64 | |
611 lrg.set_num_regs(2); // Size is 2 stack words | |
612 #else | |
613 lrg.set_num_regs(1); // Size is 1 stack word | |
614 #endif | |
615 // Register pressure is tracked relative to the maximum values | |
616 // suggested for that platform, INTPRESSURE and FLOATPRESSURE, | |
617 // and relative to other types which compete for the same regs. | |
618 // | |
619 // The following table contains suggested values based on the | |
620 // architectures as defined in each .ad file. | |
621 // INTPRESSURE and FLOATPRESSURE may be tuned differently for | |
622 // compile-speed or performance. | |
623 // Note1: | |
624 // SPARC and SPARCV9 reg_pressures are at 2 instead of 1 | |
625 // since .ad registers are defined as high and low halves. | |
626 // These reg_pressure values remain compatible with the code | |
627 // in is_high_pressure() which relates get_invalid_mask_size(), | |
628 // Block::_reg_pressure and INTPRESSURE, FLOATPRESSURE. | |
629 // Note2: | |
630 // SPARC -d32 has 24 registers available for integral values, | |
631 // but only 10 of these are safe for 64-bit longs. | |
632 // Using set_reg_pressure(2) for both int and long means | |
633 // the allocator will believe it can fit 26 longs into | |
634 // registers. Using 2 for longs and 1 for ints means the | |
635 // allocator will attempt to put 52 integers into registers. | |
636 // The settings below limit this problem to methods with | |
637 // many long values which are being run on 32-bit SPARC. | |
638 // | |
639 // ------------------- reg_pressure -------------------- | |
640 // Each entry is reg_pressure_per_value,number_of_regs | |
641 // RegL RegI RegFlags RegF RegD INTPRESSURE FLOATPRESSURE | |
642 // IA32 2 1 1 1 1 6 6 | |
643 // IA64 1 1 1 1 1 50 41 | |
644 // SPARC 2 2 2 2 2 48 (24) 52 (26) | |
645 // SPARCV9 2 2 2 2 2 48 (24) 52 (26) | |
646 // AMD64 1 1 1 1 1 14 15 | |
647 // ----------------------------------------------------- | |
648 #if defined(SPARC) | |
649 lrg.set_reg_pressure(2); // use for v9 as well | |
650 #else | |
651 lrg.set_reg_pressure(1); // normally one value per register | |
652 #endif | |
653 if( n_type->isa_oop_ptr() ) { | |
654 lrg._is_oop = 1; | |
655 } | |
656 break; | |
657 case Op_RegL: // Check for long or double | |
658 case Op_RegD: | |
659 lrg.set_num_regs(2); | |
660 // Define platform specific register pressure | |
661 #ifdef SPARC | |
662 lrg.set_reg_pressure(2); | |
663 #elif defined(IA32) | |
664 if( ireg == Op_RegL ) { | |
665 lrg.set_reg_pressure(2); | |
666 } else { | |
667 lrg.set_reg_pressure(1); | |
668 } | |
669 #else | |
670 lrg.set_reg_pressure(1); // normally one value per register | |
671 #endif | |
672 // If this def of a double forces a mis-aligned double, | |
673 // flag as '_fat_proj' - really flag as allowing misalignment | |
674 // AND changes how we count interferences. A mis-aligned | |
675 // double can interfere with TWO aligned pairs, or effectively | |
676 // FOUR registers! | |
677 if( rm.is_misaligned_Pair() ) { | |
678 lrg._fat_proj = 1; | |
679 lrg._is_bound = 1; | |
680 } | |
681 break; | |
682 case Op_RegF: | |
683 case Op_RegI: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
684 case Op_RegN: |
0 | 685 case Op_RegFlags: |
686 case 0: // not an ideal register | |
687 lrg.set_num_regs(1); | |
688 #ifdef SPARC | |
689 lrg.set_reg_pressure(2); | |
690 #else | |
691 lrg.set_reg_pressure(1); | |
692 #endif | |
693 break; | |
694 default: | |
695 ShouldNotReachHere(); | |
696 } | |
697 } | |
698 | |
699 // Now do the same for inputs | |
700 uint cnt = n->req(); | |
701 // Setup for CISC SPILLING | |
702 uint inp = (uint)AdlcVMDeps::Not_cisc_spillable; | |
703 if( UseCISCSpill && after_aggressive ) { | |
704 inp = n->cisc_operand(); | |
705 if( inp != (uint)AdlcVMDeps::Not_cisc_spillable ) | |
706 // Convert operand number to edge index number | |
707 inp = n->as_Mach()->operand_index(inp); | |
708 } | |
709 // Prepare register mask for each input | |
710 for( uint k = input_edge_start; k < cnt; k++ ) { | |
711 uint vreg = n2lidx(n->in(k)); | |
712 if( !vreg ) continue; | |
713 | |
714 // If this instruction is CISC Spillable, add the flags | |
715 // bit to its appropriate input | |
716 if( UseCISCSpill && after_aggressive && inp == k ) { | |
717 #ifndef PRODUCT | |
718 if( TraceCISCSpill ) { | |
719 tty->print(" use_cisc_RegMask: "); | |
720 n->dump(); | |
721 } | |
722 #endif | |
723 n->as_Mach()->use_cisc_RegMask(); | |
724 } | |
725 | |
726 LRG &lrg = lrgs(vreg); | |
727 // // Testing for floating point code shape | |
728 // Node *test = n->in(k); | |
729 // if( test->is_Mach() ) { | |
730 // MachNode *m = test->as_Mach(); | |
731 // int op = m->ideal_Opcode(); | |
732 // if (n->is_Call() && (op == Op_AddF || op == Op_MulF) ) { | |
733 // int zzz = 1; | |
734 // } | |
735 // } | |
736 | |
737 // Limit result register mask to acceptable registers. | |
738 // Do not limit registers from uncommon uses before | |
739 // AggressiveCoalesce. This effectively pre-virtual-splits | |
740 // around uncommon uses of common defs. | |
741 const RegMask &rm = n->in_RegMask(k); | |
742 if( !after_aggressive && | |
743 _cfg._bbs[n->in(k)->_idx]->_freq > 1000*b->_freq ) { | |
744 // Since we are BEFORE aggressive coalesce, leave the register | |
745 // mask untrimmed by the call. This encourages more coalescing. | |
746 // Later, AFTER aggressive, this live range will have to spill | |
747 // but the spiller handles slow-path calls very nicely. | |
748 } else { | |
749 lrg.AND( rm ); | |
750 } | |
751 // Check for bound register masks | |
752 const RegMask &lrgmask = lrg.mask(); | |
753 if( lrgmask.is_bound1() || lrgmask.is_bound2() ) | |
754 lrg._is_bound = 1; | |
755 // If this use of a double forces a mis-aligned double, | |
756 // flag as '_fat_proj' - really flag as allowing misalignment | |
757 // AND changes how we count interferences. A mis-aligned | |
758 // double can interfere with TWO aligned pairs, or effectively | |
759 // FOUR registers! | |
760 if( lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_Pair() ) { | |
761 lrg._fat_proj = 1; | |
762 lrg._is_bound = 1; | |
763 } | |
764 // if the LRG is an unaligned pair, we will have to spill | |
765 // so clear the LRG's register mask if it is not already spilled | |
766 if ( !n->is_SpillCopy() && | |
295
ea18057223c4
6732194: Data corruption dependent on -server/-client/-Xbatch
never
parents:
196
diff
changeset
|
767 (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) && |
0 | 768 lrgmask.is_misaligned_Pair()) { |
769 lrg.Clear(); | |
770 } | |
771 | |
772 // Check for maximum frequency value | |
773 if( lrg._maxfreq < b->_freq ) | |
774 lrg._maxfreq = b->_freq; | |
775 | |
776 } // End for all allocated inputs | |
777 } // end for all instructions | |
778 } // end for all blocks | |
779 | |
780 // Final per-liverange setup | |
781 for( uint i2=0; i2<_maxlrg; i2++ ) { | |
782 LRG &lrg = lrgs(i2); | |
783 if( lrg.num_regs() == 2 && !lrg._fat_proj ) | |
784 lrg.ClearToPairs(); | |
785 lrg.compute_set_mask_size(); | |
786 if( lrg.not_free() ) { // Handle case where we lose from the start | |
787 lrg.set_reg(OptoReg::Name(LRG::SPILL_REG)); | |
788 lrg._direct_conflict = 1; | |
789 } | |
790 lrg.set_degree(0); // no neighbors in IFG yet | |
791 } | |
792 } | |
793 | |
794 //------------------------------set_was_low------------------------------------ | |
795 // Set the was-lo-degree bit. Conservative coalescing should not change the | |
796 // colorability of the graph. If any live range was of low-degree before | |
797 // coalescing, it should Simplify. This call sets the was-lo-degree bit. | |
798 // The bit is checked in Simplify. | |
799 void PhaseChaitin::set_was_low() { | |
800 #ifdef ASSERT | |
801 for( uint i = 1; i < _maxlrg; i++ ) { | |
802 int size = lrgs(i).num_regs(); | |
803 uint old_was_lo = lrgs(i)._was_lo; | |
804 lrgs(i)._was_lo = 0; | |
805 if( lrgs(i).lo_degree() ) { | |
806 lrgs(i)._was_lo = 1; // Trivially of low degree | |
807 } else { // Else check the Brigg's assertion | |
808 // Brigg's observation is that the lo-degree neighbors of a | |
809 // hi-degree live range will not interfere with the color choices | |
810 // of said hi-degree live range. The Simplify reverse-stack-coloring | |
811 // order takes care of the details. Hence you do not have to count | |
812 // low-degree neighbors when determining if this guy colors. | |
813 int briggs_degree = 0; | |
814 IndexSet *s = _ifg->neighbors(i); | |
815 IndexSetIterator elements(s); | |
816 uint lidx; | |
817 while((lidx = elements.next()) != 0) { | |
818 if( !lrgs(lidx).lo_degree() ) | |
819 briggs_degree += MAX2(size,lrgs(lidx).num_regs()); | |
820 } | |
821 if( briggs_degree < lrgs(i).degrees_of_freedom() ) | |
822 lrgs(i)._was_lo = 1; // Low degree via the briggs assertion | |
823 } | |
824 assert(old_was_lo <= lrgs(i)._was_lo, "_was_lo may not decrease"); | |
825 } | |
826 #endif | |
827 } | |
828 | |
829 #define REGISTER_CONSTRAINED 16 | |
830 | |
831 //------------------------------cache_lrg_info--------------------------------- | |
832 // Compute cost/area ratio, in case we spill. Build the lo-degree list. | |
833 void PhaseChaitin::cache_lrg_info( ) { | |
834 | |
835 for( uint i = 1; i < _maxlrg; i++ ) { | |
836 LRG &lrg = lrgs(i); | |
837 | |
838 // Check for being of low degree: means we can be trivially colored. | |
839 // Low degree, dead or must-spill guys just get to simplify right away | |
840 if( lrg.lo_degree() || | |
841 !lrg.alive() || | |
842 lrg._must_spill ) { | |
843 // Split low degree list into those guys that must get a | |
844 // register and those that can go to register or stack. | |
845 // The idea is LRGs that can go register or stack color first when | |
846 // they have a good chance of getting a register. The register-only | |
847 // lo-degree live ranges always get a register. | |
848 OptoReg::Name hi_reg = lrg.mask().find_last_elem(); | |
849 if( OptoReg::is_stack(hi_reg)) { // Can go to stack? | |
850 lrg._next = _lo_stk_degree; | |
851 _lo_stk_degree = i; | |
852 } else { | |
853 lrg._next = _lo_degree; | |
854 _lo_degree = i; | |
855 } | |
856 } else { // Else high degree | |
857 lrgs(_hi_degree)._prev = i; | |
858 lrg._next = _hi_degree; | |
859 lrg._prev = 0; | |
860 _hi_degree = i; | |
861 } | |
862 } | |
863 } | |
864 | |
865 //------------------------------Pre-Simplify----------------------------------- | |
866 // Simplify the IFG by removing LRGs of low degree that have NO copies | |
867 void PhaseChaitin::Pre_Simplify( ) { | |
868 | |
869 // Warm up the lo-degree no-copy list | |
870 int lo_no_copy = 0; | |
871 for( uint i = 1; i < _maxlrg; i++ ) { | |
872 if( (lrgs(i).lo_degree() && !lrgs(i)._has_copy) || | |
873 !lrgs(i).alive() || | |
874 lrgs(i)._must_spill ) { | |
875 lrgs(i)._next = lo_no_copy; | |
876 lo_no_copy = i; | |
877 } | |
878 } | |
879 | |
880 while( lo_no_copy ) { | |
881 uint lo = lo_no_copy; | |
882 lo_no_copy = lrgs(lo)._next; | |
883 int size = lrgs(lo).num_regs(); | |
884 | |
885 // Put the simplified guy on the simplified list. | |
886 lrgs(lo)._next = _simplified; | |
887 _simplified = lo; | |
888 | |
889 // Yank this guy from the IFG. | |
890 IndexSet *adj = _ifg->remove_node( lo ); | |
891 | |
892 // If any neighbors' degrees fall below their number of | |
893 // allowed registers, then put that neighbor on the low degree | |
894 // list. Note that 'degree' can only fall and 'numregs' is | |
895 // unchanged by this action. Thus the two are equal at most once, | |
896 // so LRGs hit the lo-degree worklists at most once. | |
897 IndexSetIterator elements(adj); | |
898 uint neighbor; | |
899 while ((neighbor = elements.next()) != 0) { | |
900 LRG *n = &lrgs(neighbor); | |
901 assert( _ifg->effective_degree(neighbor) == n->degree(), "" ); | |
902 | |
903 // Check for just becoming of-low-degree | |
904 if( n->just_lo_degree() && !n->_has_copy ) { | |
905 assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice"); | |
906 // Put on lo-degree list | |
907 n->_next = lo_no_copy; | |
908 lo_no_copy = neighbor; | |
909 } | |
910 } | |
911 } // End of while lo-degree no_copy worklist not empty | |
912 | |
913 // No more lo-degree no-copy live ranges to simplify | |
914 } | |
915 | |
916 //------------------------------Simplify--------------------------------------- | |
917 // Simplify the IFG by removing LRGs of low degree. | |
918 void PhaseChaitin::Simplify( ) { | |
919 | |
920 while( 1 ) { // Repeat till simplified it all | |
921 // May want to explore simplifying lo_degree before _lo_stk_degree. | |
922 // This might result in more spills coloring into registers during | |
923 // Select(). | |
924 while( _lo_degree || _lo_stk_degree ) { | |
925 // If possible, pull from lo_stk first | |
926 uint lo; | |
927 if( _lo_degree ) { | |
928 lo = _lo_degree; | |
929 _lo_degree = lrgs(lo)._next; | |
930 } else { | |
931 lo = _lo_stk_degree; | |
932 _lo_stk_degree = lrgs(lo)._next; | |
933 } | |
934 | |
935 // Put the simplified guy on the simplified list. | |
936 lrgs(lo)._next = _simplified; | |
937 _simplified = lo; | |
938 // If this guy is "at risk" then mark his current neighbors | |
939 if( lrgs(lo)._at_risk ) { | |
940 IndexSetIterator elements(_ifg->neighbors(lo)); | |
941 uint datum; | |
942 while ((datum = elements.next()) != 0) { | |
943 lrgs(datum)._risk_bias = lo; | |
944 } | |
945 } | |
946 | |
947 // Yank this guy from the IFG. | |
948 IndexSet *adj = _ifg->remove_node( lo ); | |
949 | |
950 // If any neighbors' degrees fall below their number of | |
951 // allowed registers, then put that neighbor on the low degree | |
952 // list. Note that 'degree' can only fall and 'numregs' is | |
953 // unchanged by this action. Thus the two are equal at most once, | |
954 // so LRGs hit the lo-degree worklist at most once. | |
955 IndexSetIterator elements(adj); | |
956 uint neighbor; | |
957 while ((neighbor = elements.next()) != 0) { | |
958 LRG *n = &lrgs(neighbor); | |
959 #ifdef ASSERT | |
550
96964ebdb154
6782232: assert("CreateEx must be first instruction in block" )
kvn
parents:
419
diff
changeset
|
960 if( VerifyOpto || VerifyRegisterAllocator ) { |
0 | 961 assert( _ifg->effective_degree(neighbor) == n->degree(), "" ); |
962 } | |
963 #endif | |
964 | |
965 // Check for just becoming of-low-degree just counting registers. | |
966 // _must_spill live ranges are already on the low degree list. | |
967 if( n->just_lo_degree() && !n->_must_spill ) { | |
968 assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice"); | |
969 // Pull from hi-degree list | |
970 uint prev = n->_prev; | |
971 uint next = n->_next; | |
972 if( prev ) lrgs(prev)._next = next; | |
973 else _hi_degree = next; | |
974 lrgs(next)._prev = prev; | |
975 n->_next = _lo_degree; | |
976 _lo_degree = neighbor; | |
977 } | |
978 } | |
979 } // End of while lo-degree/lo_stk_degree worklist not empty | |
980 | |
981 // Check for got everything: is hi-degree list empty? | |
982 if( !_hi_degree ) break; | |
983 | |
984 // Time to pick a potential spill guy | |
985 uint lo_score = _hi_degree; | |
986 double score = lrgs(lo_score).score(); | |
987 double area = lrgs(lo_score)._area; | |
1008
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
988 double cost = lrgs(lo_score)._cost; |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
989 bool bound = lrgs(lo_score)._is_bound; |
0 | 990 |
991 // Find cheapest guy | |
992 debug_only( int lo_no_simplify=0; ); | |
1012
5f29a958a545
6889656: assert(lo_lrg->lo_degree() || !lo_no_simplify,"Live range was lo-degree before coalesce
kvn
parents:
1008
diff
changeset
|
993 for( uint i = _hi_degree; i; i = lrgs(i)._next ) { |
0 | 994 assert( !(*_ifg->_yanked)[i], "" ); |
995 // It's just vaguely possible to move hi-degree to lo-degree without | |
996 // going through a just-lo-degree stage: If you remove a double from | |
997 // a float live range it's degree will drop by 2 and you can skip the | |
998 // just-lo-degree stage. It's very rare (shows up after 5000+ methods | |
999 // in -Xcomp of Java2Demo). So just choose this guy to simplify next. | |
1000 if( lrgs(i).lo_degree() ) { | |
1001 lo_score = i; | |
1002 break; | |
1003 } | |
1004 debug_only( if( lrgs(i)._was_lo ) lo_no_simplify=i; ); | |
1005 double iscore = lrgs(i).score(); | |
1006 double iarea = lrgs(i)._area; | |
1008
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1007 double icost = lrgs(i)._cost; |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1008 bool ibound = lrgs(i)._is_bound; |
0 | 1009 |
1010 // Compare cost/area of i vs cost/area of lo_score. Smaller cost/area | |
1011 // wins. Ties happen because all live ranges in question have spilled | |
1012 // a few times before and the spill-score adds a huge number which | |
1013 // washes out the low order bits. We are choosing the lesser of 2 | |
1014 // evils; in this case pick largest area to spill. | |
1008
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1015 // Ties also happen when live ranges are defined and used only inside |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1016 // one block. In which case their area is 0 and score set to max. |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1017 // In such case choose bound live range over unbound to free registers |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1018 // or with smaller cost to spill. |
0 | 1019 if( iscore < score || |
1008
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1020 (iscore == score && iarea > area && lrgs(lo_score)._was_spilled2) || |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1021 (iscore == score && iarea == area && |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1022 ( (ibound && !bound) || ibound == bound && (icost < cost) )) ) { |
0 | 1023 lo_score = i; |
1024 score = iscore; | |
1025 area = iarea; | |
1008
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1026 cost = icost; |
e90521d61f9a
6875959: CTW fails hotspot/src/share/vm/opto/reg_split.cpp:1087
kvn
parents:
729
diff
changeset
|
1027 bound = ibound; |
0 | 1028 } |
1029 } | |
1030 LRG *lo_lrg = &lrgs(lo_score); | |
1031 // The live range we choose for spilling is either hi-degree, or very | |
1032 // rarely it can be low-degree. If we choose a hi-degree live range | |
1033 // there better not be any lo-degree choices. | |
1034 assert( lo_lrg->lo_degree() || !lo_no_simplify, "Live range was lo-degree before coalesce; should simplify" ); | |
1035 | |
1036 // Pull from hi-degree list | |
1037 uint prev = lo_lrg->_prev; | |
1038 uint next = lo_lrg->_next; | |
1039 if( prev ) lrgs(prev)._next = next; | |
1040 else _hi_degree = next; | |
1041 lrgs(next)._prev = prev; | |
1042 // Jam him on the lo-degree list, despite his high degree. | |
1043 // Maybe he'll get a color, and maybe he'll spill. | |
1044 // Only Select() will know. | |
1045 lrgs(lo_score)._at_risk = true; | |
1046 _lo_degree = lo_score; | |
1047 lo_lrg->_next = 0; | |
1048 | |
1049 } // End of while not simplified everything | |
1050 | |
1051 } | |
1052 | |
1053 //------------------------------bias_color------------------------------------- | |
1054 // Choose a color using the biasing heuristic | |
1055 OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { | |
1056 | |
1057 // Check for "at_risk" LRG's | |
1058 uint risk_lrg = Find(lrg._risk_bias); | |
1059 if( risk_lrg != 0 ) { | |
1060 // Walk the colored neighbors of the "at_risk" candidate | |
1061 // Choose a color which is both legal and already taken by a neighbor | |
1062 // of the "at_risk" candidate in order to improve the chances of the | |
1063 // "at_risk" candidate of coloring | |
1064 IndexSetIterator elements(_ifg->neighbors(risk_lrg)); | |
1065 uint datum; | |
1066 while ((datum = elements.next()) != 0) { | |
1067 OptoReg::Name reg = lrgs(datum).reg(); | |
1068 // If this LRG's register is legal for us, choose it | |
1069 if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE && | |
1070 lrg.mask().Member(OptoReg::add(reg,-chunk)) && | |
1071 (lrg.num_regs()==1 || // either size 1 | |
1072 (reg&1) == 1) ) // or aligned (adjacent reg is available since we already cleared-to-pairs) | |
1073 return reg; | |
1074 } | |
1075 } | |
1076 | |
1077 uint copy_lrg = Find(lrg._copy_bias); | |
1078 if( copy_lrg != 0 ) { | |
1079 // If he has a color, | |
1080 if( !(*(_ifg->_yanked))[copy_lrg] ) { | |
1081 OptoReg::Name reg = lrgs(copy_lrg).reg(); | |
1082 // And it is legal for you, | |
1083 if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE && | |
1084 lrg.mask().Member(OptoReg::add(reg,-chunk)) && | |
1085 (lrg.num_regs()==1 || // either size 1 | |
1086 (reg&1) == 1) ) // or aligned (adjacent reg is available since we already cleared-to-pairs) | |
1087 return reg; | |
1088 } else if( chunk == 0 ) { | |
1089 // Choose a color which is legal for him | |
1090 RegMask tempmask = lrg.mask(); | |
1091 tempmask.AND(lrgs(copy_lrg).mask()); | |
1092 OptoReg::Name reg; | |
1093 if( lrg.num_regs() == 1 ) { | |
1094 reg = tempmask.find_first_elem(); | |
1095 } else { | |
1096 tempmask.ClearToPairs(); | |
1097 reg = tempmask.find_first_pair(); | |
1098 } | |
1099 if( OptoReg::is_valid(reg) ) | |
1100 return reg; | |
1101 } | |
1102 } | |
1103 | |
1104 // If no bias info exists, just go with the register selection ordering | |
1105 if( lrg.num_regs() == 2 ) { | |
1106 // Find an aligned pair | |
1107 return OptoReg::add(lrg.mask().find_first_pair(),chunk); | |
1108 } | |
1109 | |
1110 // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate | |
1111 // copy removal to remove many more copies, by preventing a just-assigned | |
1112 // register from being repeatedly assigned. | |
1113 OptoReg::Name reg = lrg.mask().find_first_elem(); | |
1114 if( (++_alternate & 1) && OptoReg::is_valid(reg) ) { | |
1115 // This 'Remove; find; Insert' idiom is an expensive way to find the | |
1116 // SECOND element in the mask. | |
1117 lrg.Remove(reg); | |
1118 OptoReg::Name reg2 = lrg.mask().find_first_elem(); | |
1119 lrg.Insert(reg); | |
1120 if( OptoReg::is_reg(reg2)) | |
1121 reg = reg2; | |
1122 } | |
1123 return OptoReg::add( reg, chunk ); | |
1124 } | |
1125 | |
1126 //------------------------------choose_color----------------------------------- | |
1127 // Choose a color in the current chunk | |
1128 OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) { | |
1129 assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)"); | |
1130 assert(C->out_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP+0)), "must not allocate stack0 (inside preserve area)"); | |
1131 | |
1132 if( lrg.num_regs() == 1 || // Common Case | |
1133 !lrg._fat_proj ) // Aligned+adjacent pairs ok | |
1134 // Use a heuristic to "bias" the color choice | |
1135 return bias_color(lrg, chunk); | |
1136 | |
1137 assert( lrg.num_regs() >= 2, "dead live ranges do not color" ); | |
1138 | |
1139 // Fat-proj case or misaligned double argument. | |
1140 assert(lrg.compute_mask_size() == lrg.num_regs() || | |
1141 lrg.num_regs() == 2,"fat projs exactly color" ); | |
1142 assert( !chunk, "always color in 1st chunk" ); | |
1143 // Return the highest element in the set. | |
1144 return lrg.mask().find_last_elem(); | |
1145 } | |
1146 | |
1147 //------------------------------Select----------------------------------------- | |
1148 // Select colors by re-inserting LRGs back into the IFG. LRGs are re-inserted | |
1149 // in reverse order of removal. As long as nothing of hi-degree was yanked, | |
1150 // everything going back is guaranteed a color. Select that color. If some | |
1151 // hi-degree LRG cannot get a color then we record that we must spill. | |
1152 uint PhaseChaitin::Select( ) { | |
1153 uint spill_reg = LRG::SPILL_REG; | |
1154 _max_reg = OptoReg::Name(0); // Past max register used | |
1155 while( _simplified ) { | |
1156 // Pull next LRG from the simplified list - in reverse order of removal | |
1157 uint lidx = _simplified; | |
1158 LRG *lrg = &lrgs(lidx); | |
1159 _simplified = lrg->_next; | |
1160 | |
1161 | |
1162 #ifndef PRODUCT | |
1163 if (trace_spilling()) { | |
1164 ttyLocker ttyl; | |
1165 tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(), | |
1166 lrg->degrees_of_freedom()); | |
1167 lrg->dump(); | |
1168 } | |
1169 #endif | |
1170 | |
1171 // Re-insert into the IFG | |
1172 _ifg->re_insert(lidx); | |
1173 if( !lrg->alive() ) continue; | |
1174 // capture allstackedness flag before mask is hacked | |
1175 const int is_allstack = lrg->mask().is_AllStack(); | |
1176 | |
1177 // Yeah, yeah, yeah, I know, I know. I can refactor this | |
1178 // to avoid the GOTO, although the refactored code will not | |
1179 // be much clearer. We arrive here IFF we have a stack-based | |
1180 // live range that cannot color in the current chunk, and it | |
1181 // has to move into the next free stack chunk. | |
1182 int chunk = 0; // Current chunk is first chunk | |
1183 retry_next_chunk: | |
1184 | |
1185 // Remove neighbor colors | |
1186 IndexSet *s = _ifg->neighbors(lidx); | |
1187 | |
1188 debug_only(RegMask orig_mask = lrg->mask();) | |
1189 IndexSetIterator elements(s); | |
1190 uint neighbor; | |
1191 while ((neighbor = elements.next()) != 0) { | |
1192 // Note that neighbor might be a spill_reg. In this case, exclusion | |
1193 // of its color will be a no-op, since the spill_reg chunk is in outer | |
1194 // space. Also, if neighbor is in a different chunk, this exclusion | |
1195 // will be a no-op. (Later on, if lrg runs out of possible colors in | |
1196 // its chunk, a new chunk of color may be tried, in which case | |
1197 // examination of neighbors is started again, at retry_next_chunk.) | |
1198 LRG &nlrg = lrgs(neighbor); | |
1199 OptoReg::Name nreg = nlrg.reg(); | |
1200 // Only subtract masks in the same chunk | |
1201 if( nreg >= chunk && nreg < chunk + RegMask::CHUNK_SIZE ) { | |
1202 #ifndef PRODUCT | |
1203 uint size = lrg->mask().Size(); | |
1204 RegMask rm = lrg->mask(); | |
1205 #endif | |
1206 lrg->SUBTRACT(nlrg.mask()); | |
1207 #ifndef PRODUCT | |
1208 if (trace_spilling() && lrg->mask().Size() != size) { | |
1209 ttyLocker ttyl; | |
1210 tty->print("L%d ", lidx); | |
1211 rm.dump(); | |
1212 tty->print(" intersected L%d ", neighbor); | |
1213 nlrg.mask().dump(); | |
1214 tty->print(" removed "); | |
1215 rm.SUBTRACT(lrg->mask()); | |
1216 rm.dump(); | |
1217 tty->print(" leaving "); | |
1218 lrg->mask().dump(); | |
1219 tty->cr(); | |
1220 } | |
1221 #endif | |
1222 } | |
1223 } | |
1224 //assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness"); | |
1225 // Aligned pairs need aligned masks | |
1226 if( lrg->num_regs() == 2 && !lrg->_fat_proj ) | |
1227 lrg->ClearToPairs(); | |
1228 | |
1229 // Check if a color is available and if so pick the color | |
1230 OptoReg::Name reg = choose_color( *lrg, chunk ); | |
1231 #ifdef SPARC | |
1232 debug_only(lrg->compute_set_mask_size()); | |
1233 assert(lrg->num_regs() != 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned"); | |
1234 #endif | |
1235 | |
1236 //--------------- | |
1237 // If we fail to color and the AllStack flag is set, trigger | |
1238 // a chunk-rollover event | |
1239 if(!OptoReg::is_valid(OptoReg::add(reg,-chunk)) && is_allstack) { | |
1240 // Bump register mask up to next stack chunk | |
1241 chunk += RegMask::CHUNK_SIZE; | |
1242 lrg->Set_All(); | |
1243 | |
1244 goto retry_next_chunk; | |
1245 } | |
1246 | |
1247 //--------------- | |
1248 // Did we get a color? | |
1249 else if( OptoReg::is_valid(reg)) { | |
1250 #ifndef PRODUCT | |
1251 RegMask avail_rm = lrg->mask(); | |
1252 #endif | |
1253 | |
1254 // Record selected register | |
1255 lrg->set_reg(reg); | |
1256 | |
1257 if( reg >= _max_reg ) // Compute max register limit | |
1258 _max_reg = OptoReg::add(reg,1); | |
1259 // Fold reg back into normal space | |
1260 reg = OptoReg::add(reg,-chunk); | |
1261 | |
1262 // If the live range is not bound, then we actually had some choices | |
1263 // to make. In this case, the mask has more bits in it than the colors | |
605 | 1264 // chosen. Restrict the mask to just what was picked. |
0 | 1265 if( lrg->num_regs() == 1 ) { // Size 1 live range |
1266 lrg->Clear(); // Clear the mask | |
1267 lrg->Insert(reg); // Set regmask to match selected reg | |
1268 lrg->set_mask_size(1); | |
1269 } else if( !lrg->_fat_proj ) { | |
1270 // For pairs, also insert the low bit of the pair | |
1271 assert( lrg->num_regs() == 2, "unbound fatproj???" ); | |
1272 lrg->Clear(); // Clear the mask | |
1273 lrg->Insert(reg); // Set regmask to match selected reg | |
1274 lrg->Insert(OptoReg::add(reg,-1)); | |
1275 lrg->set_mask_size(2); | |
1276 } else { // Else fatproj | |
1277 // mask must be equal to fatproj bits, by definition | |
1278 } | |
1279 #ifndef PRODUCT | |
1280 if (trace_spilling()) { | |
1281 ttyLocker ttyl; | |
1282 tty->print("L%d selected ", lidx); | |
1283 lrg->mask().dump(); | |
1284 tty->print(" from "); | |
1285 avail_rm.dump(); | |
1286 tty->cr(); | |
1287 } | |
1288 #endif | |
1289 // Note that reg is the highest-numbered register in the newly-bound mask. | |
1290 } // end color available case | |
1291 | |
1292 //--------------- | |
1293 // Live range is live and no colors available | |
1294 else { | |
1295 assert( lrg->alive(), "" ); | |
295
ea18057223c4
6732194: Data corruption dependent on -server/-client/-Xbatch
never
parents:
196
diff
changeset
|
1296 assert( !lrg->_fat_proj || lrg->is_multidef() || |
0 | 1297 lrg->_def->outcnt() > 0, "fat_proj cannot spill"); |
1298 assert( !orig_mask.is_AllStack(), "All Stack does not spill" ); | |
1299 | |
1300 // Assign the special spillreg register | |
1301 lrg->set_reg(OptoReg::Name(spill_reg++)); | |
1302 // Do not empty the regmask; leave mask_size lying around | |
1303 // for use during Spilling | |
1304 #ifndef PRODUCT | |
1305 if( trace_spilling() ) { | |
1306 ttyLocker ttyl; | |
1307 tty->print("L%d spilling with neighbors: ", lidx); | |
1308 s->dump(); | |
1309 debug_only(tty->print(" original mask: ")); | |
1310 debug_only(orig_mask.dump()); | |
1311 dump_lrg(lidx); | |
1312 } | |
1313 #endif | |
1314 } // end spill case | |
1315 | |
1316 } | |
1317 | |
1318 return spill_reg-LRG::SPILL_REG; // Return number of spills | |
1319 } | |
1320 | |
1321 | |
1322 //------------------------------copy_was_spilled------------------------------- | |
1323 // Copy 'was_spilled'-edness from the source Node to the dst Node. | |
1324 void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) { | |
1325 if( _spilled_once.test(src->_idx) ) { | |
1326 _spilled_once.set(dst->_idx); | |
1327 lrgs(Find(dst))._was_spilled1 = 1; | |
1328 if( _spilled_twice.test(src->_idx) ) { | |
1329 _spilled_twice.set(dst->_idx); | |
1330 lrgs(Find(dst))._was_spilled2 = 1; | |
1331 } | |
1332 } | |
1333 } | |
1334 | |
1335 //------------------------------set_was_spilled-------------------------------- | |
1336 // Set the 'spilled_once' or 'spilled_twice' flag on a node. | |
1337 void PhaseChaitin::set_was_spilled( Node *n ) { | |
1338 if( _spilled_once.test_set(n->_idx) ) | |
1339 _spilled_twice.set(n->_idx); | |
1340 } | |
1341 | |
1342 //------------------------------fixup_spills----------------------------------- | |
1343 // Convert Ideal spill instructions into proper FramePtr + offset Loads and | |
1344 // Stores. Use-def chains are NOT preserved, but Node->LRG->reg maps are. | |
1345 void PhaseChaitin::fixup_spills() { | |
1346 // This function does only cisc spill work. | |
1347 if( !UseCISCSpill ) return; | |
1348 | |
1349 NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); ) | |
1350 | |
1351 // Grab the Frame Pointer | |
1352 Node *fp = _cfg._broot->head()->in(1)->in(TypeFunc::FramePtr); | |
1353 | |
1354 // For all blocks | |
1355 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
1356 Block *b = _cfg._blocks[i]; | |
1357 | |
1358 // For all instructions in block | |
1359 uint last_inst = b->end_idx(); | |
1360 for( uint j = 1; j <= last_inst; j++ ) { | |
1361 Node *n = b->_nodes[j]; | |
1362 | |
1363 // Dead instruction??? | |
1364 assert( n->outcnt() != 0 ||// Nothing dead after post alloc | |
1365 C->top() == n || // Or the random TOP node | |
1366 n->is_Proj(), // Or a fat-proj kill node | |
1367 "No dead instructions after post-alloc" ); | |
1368 | |
1369 int inp = n->cisc_operand(); | |
1370 if( inp != AdlcVMDeps::Not_cisc_spillable ) { | |
1371 // Convert operand number to edge index number | |
1372 MachNode *mach = n->as_Mach(); | |
1373 inp = mach->operand_index(inp); | |
1374 Node *src = n->in(inp); // Value to load or store | |
1375 LRG &lrg_cisc = lrgs( Find_const(src) ); | |
1376 OptoReg::Name src_reg = lrg_cisc.reg(); | |
1377 // Doubles record the HIGH register of an adjacent pair. | |
1378 src_reg = OptoReg::add(src_reg,1-lrg_cisc.num_regs()); | |
1379 if( OptoReg::is_stack(src_reg) ) { // If input is on stack | |
1380 // This is a CISC Spill, get stack offset and construct new node | |
1381 #ifndef PRODUCT | |
1382 if( TraceCISCSpill ) { | |
1383 tty->print(" reg-instr: "); | |
1384 n->dump(); | |
1385 } | |
1386 #endif | |
1387 int stk_offset = reg2offset(src_reg); | |
1388 // Bailout if we might exceed node limit when spilling this instruction | |
1389 C->check_node_count(0, "out of nodes fixing spills"); | |
1390 if (C->failing()) return; | |
1391 // Transform node | |
1392 MachNode *cisc = mach->cisc_version(stk_offset, C)->as_Mach(); | |
1393 cisc->set_req(inp,fp); // Base register is frame pointer | |
1394 if( cisc->oper_input_base() > 1 && mach->oper_input_base() <= 1 ) { | |
1395 assert( cisc->oper_input_base() == 2, "Only adding one edge"); | |
1396 cisc->ins_req(1,src); // Requires a memory edge | |
1397 } | |
1398 b->_nodes.map(j,cisc); // Insert into basic block | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
113
diff
changeset
|
1399 n->subsume_by(cisc); // Correct graph |
0 | 1400 // |
1401 ++_used_cisc_instructions; | |
1402 #ifndef PRODUCT | |
1403 if( TraceCISCSpill ) { | |
1404 tty->print(" cisc-instr: "); | |
1405 cisc->dump(); | |
1406 } | |
1407 #endif | |
1408 } else { | |
1409 #ifndef PRODUCT | |
1410 if( TraceCISCSpill ) { | |
1411 tty->print(" using reg-instr: "); | |
1412 n->dump(); | |
1413 } | |
1414 #endif | |
1415 ++_unused_cisc_instructions; // input can be on stack | |
1416 } | |
1417 } | |
1418 | |
1419 } // End of for all instructions | |
1420 | |
1421 } // End of for all blocks | |
1422 } | |
1423 | |
1424 //------------------------------find_base_for_derived-------------------------- | |
1425 // Helper to stretch above; recursively discover the base Node for a | |
1426 // given derived Node. Easy for AddP-related machine nodes, but needs | |
1427 // to be recursive for derived Phis. | |
1428 Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derived, uint &maxlrg ) { | |
1429 // See if already computed; if so return it | |
1430 if( derived_base_map[derived->_idx] ) | |
1431 return derived_base_map[derived->_idx]; | |
1432 | |
1433 // See if this happens to be a base. | |
1434 // NOTE: we use TypePtr instead of TypeOopPtr because we can have | |
1435 // pointers derived from NULL! These are always along paths that | |
1436 // can't happen at run-time but the optimizer cannot deduce it so | |
1437 // we have to handle it gracefully. | |
729
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1438 assert(!derived->bottom_type()->isa_narrowoop() || |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1439 derived->bottom_type()->make_ptr()->is_ptr()->_offset == 0, "sanity"); |
0 | 1440 const TypePtr *tj = derived->bottom_type()->isa_ptr(); |
1441 // If its an OOP with a non-zero offset, then it is derived. | |
729
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1442 if( tj == NULL || tj->_offset == 0 ) { |
0 | 1443 derived_base_map[derived->_idx] = derived; |
1444 return derived; | |
1445 } | |
1446 // Derived is NULL+offset? Base is NULL! | |
1447 if( derived->is_Con() ) { | |
729
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1448 Node *base = _matcher.mach_null(); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1449 assert(base != NULL, "sanity"); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1450 if (base->in(0) == NULL) { |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1451 // Initialize it once and make it shared: |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1452 // set control to _root and place it into Start block |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1453 // (where top() node is placed). |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1454 base->init_req(0, _cfg._root); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1455 Block *startb = _cfg._bbs[C->top()->_idx]; |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1456 startb->_nodes.insert(startb->find_node(C->top()), base ); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1457 _cfg._bbs.map( base->_idx, startb ); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1458 assert (n2lidx(base) == 0, "should not have LRG yet"); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1459 } |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1460 if (n2lidx(base) == 0) { |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1461 new_lrg(base, maxlrg++); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1462 } |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1463 assert(base->in(0) == _cfg._root && |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1464 _cfg._bbs[base->_idx] == _cfg._bbs[C->top()->_idx], "base NULL should be shared"); |
0 | 1465 derived_base_map[derived->_idx] = base; |
1466 return base; | |
1467 } | |
1468 | |
1469 // Check for AddP-related opcodes | |
1470 if( !derived->is_Phi() ) { | |
1471 assert( derived->as_Mach()->ideal_Opcode() == Op_AddP, "" ); | |
1472 Node *base = derived->in(AddPNode::Base); | |
1473 derived_base_map[derived->_idx] = base; | |
1474 return base; | |
1475 } | |
1476 | |
1477 // Recursively find bases for Phis. | |
1478 // First check to see if we can avoid a base Phi here. | |
1479 Node *base = find_base_for_derived( derived_base_map, derived->in(1),maxlrg); | |
1480 uint i; | |
1481 for( i = 2; i < derived->req(); i++ ) | |
1482 if( base != find_base_for_derived( derived_base_map,derived->in(i),maxlrg)) | |
1483 break; | |
1484 // Went to the end without finding any different bases? | |
1485 if( i == derived->req() ) { // No need for a base Phi here | |
1486 derived_base_map[derived->_idx] = base; | |
1487 return base; | |
1488 } | |
1489 | |
1490 // Now we see we need a base-Phi here to merge the bases | |
729
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1491 const Type *t = base->bottom_type(); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1492 base = new (C, derived->req()) PhiNode( derived->in(0), t ); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1493 for( i = 1; i < derived->req(); i++ ) { |
0 | 1494 base->init_req(i, find_base_for_derived(derived_base_map, derived->in(i), maxlrg)); |
729
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1495 t = t->meet(base->in(i)->bottom_type()); |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1496 } |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1497 base->as_Phi()->set_type(t); |
0 | 1498 |
1499 // Search the current block for an existing base-Phi | |
1500 Block *b = _cfg._bbs[derived->_idx]; | |
1501 for( i = 1; i <= b->end_idx(); i++ ) {// Search for matching Phi | |
1502 Node *phi = b->_nodes[i]; | |
1503 if( !phi->is_Phi() ) { // Found end of Phis with no match? | |
1504 b->_nodes.insert( i, base ); // Must insert created Phi here as base | |
1505 _cfg._bbs.map( base->_idx, b ); | |
1506 new_lrg(base,maxlrg++); | |
1507 break; | |
1508 } | |
1509 // See if Phi matches. | |
1510 uint j; | |
1511 for( j = 1; j < base->req(); j++ ) | |
1512 if( phi->in(j) != base->in(j) && | |
1513 !(phi->in(j)->is_Con() && base->in(j)->is_Con()) ) // allow different NULLs | |
1514 break; | |
1515 if( j == base->req() ) { // All inputs match? | |
1516 base = phi; // Then use existing 'phi' and drop 'base' | |
1517 break; | |
1518 } | |
1519 } | |
1520 | |
1521 | |
1522 // Cache info for later passes | |
1523 derived_base_map[derived->_idx] = base; | |
1524 return base; | |
1525 } | |
1526 | |
1527 | |
1528 //------------------------------stretch_base_pointer_live_ranges--------------- | |
1529 // At each Safepoint, insert extra debug edges for each pair of derived value/ | |
1530 // base pointer that is live across the Safepoint for oopmap building. The | |
1531 // edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the | |
1532 // required edge set. | |
1533 bool PhaseChaitin::stretch_base_pointer_live_ranges( ResourceArea *a ) { | |
1534 int must_recompute_live = false; | |
1535 uint maxlrg = _maxlrg; | |
1536 Node **derived_base_map = (Node**)a->Amalloc(sizeof(Node*)*C->unique()); | |
1537 memset( derived_base_map, 0, sizeof(Node*)*C->unique() ); | |
1538 | |
1539 // For all blocks in RPO do... | |
1540 for( uint i=0; i<_cfg._num_blocks; i++ ) { | |
1541 Block *b = _cfg._blocks[i]; | |
1542 // Note use of deep-copy constructor. I cannot hammer the original | |
1543 // liveout bits, because they are needed by the following coalesce pass. | |
1544 IndexSet liveout(_live->live(b)); | |
1545 | |
1546 for( uint j = b->end_idx() + 1; j > 1; j-- ) { | |
1547 Node *n = b->_nodes[j-1]; | |
1548 | |
1549 // Pre-split compares of loop-phis. Loop-phis form a cycle we would | |
1550 // like to see in the same register. Compare uses the loop-phi and so | |
1551 // extends its live range BUT cannot be part of the cycle. If this | |
1552 // extended live range overlaps with the update of the loop-phi value | |
1553 // we need both alive at the same time -- which requires at least 1 | |
1554 // copy. But because Intel has only 2-address registers we end up with | |
1555 // at least 2 copies, one before the loop-phi update instruction and | |
1556 // one after. Instead we split the input to the compare just after the | |
1557 // phi. | |
1558 if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CmpI ) { | |
1559 Node *phi = n->in(1); | |
1560 if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) { | |
1561 Block *phi_block = _cfg._bbs[phi->_idx]; | |
1562 if( _cfg._bbs[phi_block->pred(2)->_idx] == b ) { | |
1563 const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI]; | |
1564 Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask ); | |
1565 insert_proj( phi_block, 1, spill, maxlrg++ ); | |
1566 n->set_req(1,spill); | |
1567 must_recompute_live = true; | |
1568 } | |
1569 } | |
1570 } | |
1571 | |
1572 // Get value being defined | |
1573 uint lidx = n2lidx(n); | |
1574 if( lidx && lidx < _maxlrg /* Ignore the occasional brand-new live range */) { | |
1575 // Remove from live-out set | |
1576 liveout.remove(lidx); | |
1577 | |
1578 // Copies do not define a new value and so do not interfere. | |
1579 // Remove the copies source from the liveout set before interfering. | |
1580 uint idx = n->is_Copy(); | |
1581 if( idx ) liveout.remove( n2lidx(n->in(idx)) ); | |
1582 } | |
1583 | |
1584 // Found a safepoint? | |
1585 JVMState *jvms = n->jvms(); | |
1586 if( jvms ) { | |
1587 // Now scan for a live derived pointer | |
1588 IndexSetIterator elements(&liveout); | |
1589 uint neighbor; | |
1590 while ((neighbor = elements.next()) != 0) { | |
1591 // Find reaching DEF for base and derived values | |
1592 // This works because we are still in SSA during this call. | |
1593 Node *derived = lrgs(neighbor)._def; | |
1594 const TypePtr *tj = derived->bottom_type()->isa_ptr(); | |
729
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1595 assert(!derived->bottom_type()->isa_narrowoop() || |
04fa5affa478
6709742: find_base_for_derived's use of Ideal NULL is unsafe causing crashes during register allocation
kvn
parents:
673
diff
changeset
|
1596 derived->bottom_type()->make_ptr()->is_ptr()->_offset == 0, "sanity"); |
0 | 1597 // If its an OOP with a non-zero offset, then it is derived. |
1598 if( tj && tj->_offset != 0 && tj->isa_oop_ptr() ) { | |
1599 Node *base = find_base_for_derived( derived_base_map, derived, maxlrg ); | |
1600 assert( base->_idx < _names.Size(), "" ); | |
1601 // Add reaching DEFs of derived pointer and base pointer as a | |
1602 // pair of inputs | |
1603 n->add_req( derived ); | |
1604 n->add_req( base ); | |
1605 | |
1606 // See if the base pointer is already live to this point. | |
1607 // Since I'm working on the SSA form, live-ness amounts to | |
1608 // reaching def's. So if I find the base's live range then | |
1609 // I know the base's def reaches here. | |
1610 if( (n2lidx(base) >= _maxlrg ||// (Brand new base (hence not live) or | |
1611 !liveout.member( n2lidx(base) ) ) && // not live) AND | |
1612 (n2lidx(base) > 0) && // not a constant | |
1613 _cfg._bbs[base->_idx] != b ) { // base not def'd in blk) | |
1614 // Base pointer is not currently live. Since I stretched | |
1615 // the base pointer to here and it crosses basic-block | |
1616 // boundaries, the global live info is now incorrect. | |
1617 // Recompute live. | |
1618 must_recompute_live = true; | |
1619 } // End of if base pointer is not live to debug info | |
1620 } | |
1621 } // End of scan all live data for derived ptrs crossing GC point | |
1622 } // End of if found a GC point | |
1623 | |
1624 // Make all inputs live | |
1625 if( !n->is_Phi() ) { // Phi function uses come from prior block | |
1626 for( uint k = 1; k < n->req(); k++ ) { | |
1627 uint lidx = n2lidx(n->in(k)); | |
1628 if( lidx < _maxlrg ) | |
1629 liveout.insert( lidx ); | |
1630 } | |
1631 } | |
1632 | |
1633 } // End of forall instructions in block | |
1634 liveout.clear(); // Free the memory used by liveout. | |
1635 | |
1636 } // End of forall blocks | |
1637 _maxlrg = maxlrg; | |
1638 | |
1639 // If I created a new live range I need to recompute live | |
1640 if( maxlrg != _ifg->_maxlrg ) | |
1641 must_recompute_live = true; | |
1642 | |
1643 return must_recompute_live != 0; | |
1644 } | |
1645 | |
1646 | |
1647 //------------------------------add_reference---------------------------------- | |
1648 // Extend the node to LRG mapping | |
1649 void PhaseChaitin::add_reference( const Node *node, const Node *old_node ) { | |
1650 _names.extend( node->_idx, n2lidx(old_node) ); | |
1651 } | |
1652 | |
1653 //------------------------------dump------------------------------------------- | |
1654 #ifndef PRODUCT | |
1655 void PhaseChaitin::dump( const Node *n ) const { | |
1656 uint r = (n->_idx < _names.Size() ) ? Find_const(n) : 0; | |
1657 tty->print("L%d",r); | |
1658 if( r && n->Opcode() != Op_Phi ) { | |
1659 if( _node_regs ) { // Got a post-allocation copy of allocation? | |
1660 tty->print("["); | |
1661 OptoReg::Name second = get_reg_second(n); | |
1662 if( OptoReg::is_valid(second) ) { | |
1663 if( OptoReg::is_reg(second) ) | |
1664 tty->print("%s:",Matcher::regName[second]); | |
1665 else | |
1666 tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(second)); | |
1667 } | |
1668 OptoReg::Name first = get_reg_first(n); | |
1669 if( OptoReg::is_reg(first) ) | |
1670 tty->print("%s]",Matcher::regName[first]); | |
1671 else | |
1672 tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(first)); | |
1673 } else | |
1674 n->out_RegMask().dump(); | |
1675 } | |
1676 tty->print("/N%d\t",n->_idx); | |
1677 tty->print("%s === ", n->Name()); | |
1678 uint k; | |
1679 for( k = 0; k < n->req(); k++) { | |
1680 Node *m = n->in(k); | |
1681 if( !m ) tty->print("_ "); | |
1682 else { | |
1683 uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0; | |
1684 tty->print("L%d",r); | |
1685 // Data MultiNode's can have projections with no real registers. | |
1686 // Don't die while dumping them. | |
1687 int op = n->Opcode(); | |
1688 if( r && op != Op_Phi && op != Op_Proj && op != Op_SCMemProj) { | |
1689 if( _node_regs ) { | |
1690 tty->print("["); | |
1691 OptoReg::Name second = get_reg_second(n->in(k)); | |
1692 if( OptoReg::is_valid(second) ) { | |
1693 if( OptoReg::is_reg(second) ) | |
1694 tty->print("%s:",Matcher::regName[second]); | |
1695 else | |
1696 tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer), | |
1697 reg2offset_unchecked(second)); | |
1698 } | |
1699 OptoReg::Name first = get_reg_first(n->in(k)); | |
1700 if( OptoReg::is_reg(first) ) | |
1701 tty->print("%s]",Matcher::regName[first]); | |
1702 else | |
1703 tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer), | |
1704 reg2offset_unchecked(first)); | |
1705 } else | |
1706 n->in_RegMask(k).dump(); | |
1707 } | |
1708 tty->print("/N%d ",m->_idx); | |
1709 } | |
1710 } | |
1711 if( k < n->len() && n->in(k) ) tty->print("| "); | |
1712 for( ; k < n->len(); k++ ) { | |
1713 Node *m = n->in(k); | |
1714 if( !m ) break; | |
1715 uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0; | |
1716 tty->print("L%d",r); | |
1717 tty->print("/N%d ",m->_idx); | |
1718 } | |
1719 if( n->is_Mach() ) n->as_Mach()->dump_spec(tty); | |
1720 else n->dump_spec(tty); | |
1721 if( _spilled_once.test(n->_idx ) ) { | |
1722 tty->print(" Spill_1"); | |
1723 if( _spilled_twice.test(n->_idx ) ) | |
1724 tty->print(" Spill_2"); | |
1725 } | |
1726 tty->print("\n"); | |
1727 } | |
1728 | |
1729 void PhaseChaitin::dump( const Block * b ) const { | |
1730 b->dump_head( &_cfg._bbs ); | |
1731 | |
1732 // For all instructions | |
1733 for( uint j = 0; j < b->_nodes.size(); j++ ) | |
1734 dump(b->_nodes[j]); | |
1735 // Print live-out info at end of block | |
1736 if( _live ) { | |
1737 tty->print("Liveout: "); | |
1738 IndexSet *live = _live->live(b); | |
1739 IndexSetIterator elements(live); | |
1740 tty->print("{"); | |
1741 uint i; | |
1742 while ((i = elements.next()) != 0) { | |
1743 tty->print("L%d ", Find_const(i)); | |
1744 } | |
1745 tty->print_cr("}"); | |
1746 } | |
1747 tty->print("\n"); | |
1748 } | |
1749 | |
1750 void PhaseChaitin::dump() const { | |
1751 tty->print( "--- Chaitin -- argsize: %d framesize: %d ---\n", | |
1752 _matcher._new_SP, _framesize ); | |
1753 | |
1754 // For all blocks | |
1755 for( uint i = 0; i < _cfg._num_blocks; i++ ) | |
1756 dump(_cfg._blocks[i]); | |
1757 // End of per-block dump | |
1758 tty->print("\n"); | |
1759 | |
1760 if (!_ifg) { | |
1761 tty->print("(No IFG.)\n"); | |
1762 return; | |
1763 } | |
1764 | |
1765 // Dump LRG array | |
1766 tty->print("--- Live RanGe Array ---\n"); | |
1767 for(uint i2 = 1; i2 < _maxlrg; i2++ ) { | |
1768 tty->print("L%d: ",i2); | |
1769 if( i2 < _ifg->_maxlrg ) lrgs(i2).dump( ); | |
1770 else tty->print("new LRG"); | |
1771 } | |
1772 tty->print_cr(""); | |
1773 | |
1774 // Dump lo-degree list | |
1775 tty->print("Lo degree: "); | |
1776 for(uint i3 = _lo_degree; i3; i3 = lrgs(i3)._next ) | |
1777 tty->print("L%d ",i3); | |
1778 tty->print_cr(""); | |
1779 | |
1780 // Dump lo-stk-degree list | |
1781 tty->print("Lo stk degree: "); | |
1782 for(uint i4 = _lo_stk_degree; i4; i4 = lrgs(i4)._next ) | |
1783 tty->print("L%d ",i4); | |
1784 tty->print_cr(""); | |
1785 | |
1786 // Dump lo-degree list | |
1787 tty->print("Hi degree: "); | |
1788 for(uint i5 = _hi_degree; i5; i5 = lrgs(i5)._next ) | |
1789 tty->print("L%d ",i5); | |
1790 tty->print_cr(""); | |
1791 } | |
1792 | |
1793 //------------------------------dump_degree_lists------------------------------ | |
1794 void PhaseChaitin::dump_degree_lists() const { | |
1795 // Dump lo-degree list | |
1796 tty->print("Lo degree: "); | |
1797 for( uint i = _lo_degree; i; i = lrgs(i)._next ) | |
1798 tty->print("L%d ",i); | |
1799 tty->print_cr(""); | |
1800 | |
1801 // Dump lo-stk-degree list | |
1802 tty->print("Lo stk degree: "); | |
1803 for(uint i2 = _lo_stk_degree; i2; i2 = lrgs(i2)._next ) | |
1804 tty->print("L%d ",i2); | |
1805 tty->print_cr(""); | |
1806 | |
1807 // Dump lo-degree list | |
1808 tty->print("Hi degree: "); | |
1809 for(uint i3 = _hi_degree; i3; i3 = lrgs(i3)._next ) | |
1810 tty->print("L%d ",i3); | |
1811 tty->print_cr(""); | |
1812 } | |
1813 | |
1814 //------------------------------dump_simplified-------------------------------- | |
1815 void PhaseChaitin::dump_simplified() const { | |
1816 tty->print("Simplified: "); | |
1817 for( uint i = _simplified; i; i = lrgs(i)._next ) | |
1818 tty->print("L%d ",i); | |
1819 tty->print_cr(""); | |
1820 } | |
1821 | |
1822 static char *print_reg( OptoReg::Name reg, const PhaseChaitin *pc, char *buf ) { | |
1823 if ((int)reg < 0) | |
1824 sprintf(buf, "<OptoReg::%d>", (int)reg); | |
1825 else if (OptoReg::is_reg(reg)) | |
1826 strcpy(buf, Matcher::regName[reg]); | |
1827 else | |
1828 sprintf(buf,"%s + #%d",OptoReg::regname(OptoReg::c_frame_pointer), | |
1829 pc->reg2offset(reg)); | |
1830 return buf+strlen(buf); | |
1831 } | |
1832 | |
1833 //------------------------------dump_register---------------------------------- | |
1834 // Dump a register name into a buffer. Be intelligent if we get called | |
1835 // before allocation is complete. | |
1836 char *PhaseChaitin::dump_register( const Node *n, char *buf ) const { | |
1837 if( !this ) { // Not got anything? | |
1838 sprintf(buf,"N%d",n->_idx); // Then use Node index | |
1839 } else if( _node_regs ) { | |
1840 // Post allocation, use direct mappings, no LRG info available | |
1841 print_reg( get_reg_first(n), this, buf ); | |
1842 } else { | |
1843 uint lidx = Find_const(n); // Grab LRG number | |
1844 if( !_ifg ) { | |
1845 sprintf(buf,"L%d",lidx); // No register binding yet | |
1846 } else if( !lidx ) { // Special, not allocated value | |
1847 strcpy(buf,"Special"); | |
1848 } else if( (lrgs(lidx).num_regs() == 1) | |
1849 ? !lrgs(lidx).mask().is_bound1() | |
1850 : !lrgs(lidx).mask().is_bound2() ) { | |
1851 sprintf(buf,"L%d",lidx); // No register binding yet | |
1852 } else { // Hah! We have a bound machine register | |
1853 print_reg( lrgs(lidx).reg(), this, buf ); | |
1854 } | |
1855 } | |
1856 return buf+strlen(buf); | |
1857 } | |
1858 | |
1859 //----------------------dump_for_spill_split_recycle-------------------------- | |
1860 void PhaseChaitin::dump_for_spill_split_recycle() const { | |
1861 if( WizardMode && (PrintCompilation || PrintOpto) ) { | |
1862 // Display which live ranges need to be split and the allocator's state | |
1863 tty->print_cr("Graph-Coloring Iteration %d will split the following live ranges", _trip_cnt); | |
1864 for( uint bidx = 1; bidx < _maxlrg; bidx++ ) { | |
1865 if( lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG ) { | |
1866 tty->print("L%d: ", bidx); | |
1867 lrgs(bidx).dump(); | |
1868 } | |
1869 } | |
1870 tty->cr(); | |
1871 dump(); | |
1872 } | |
1873 } | |
1874 | |
1875 //------------------------------dump_frame------------------------------------ | |
1876 void PhaseChaitin::dump_frame() const { | |
1877 const char *fp = OptoReg::regname(OptoReg::c_frame_pointer); | |
1878 const TypeTuple *domain = C->tf()->domain(); | |
1879 const int argcnt = domain->cnt() - TypeFunc::Parms; | |
1880 | |
1881 // Incoming arguments in registers dump | |
1882 for( int k = 0; k < argcnt; k++ ) { | |
1883 OptoReg::Name parmreg = _matcher._parm_regs[k].first(); | |
1884 if( OptoReg::is_reg(parmreg)) { | |
1885 const char *reg_name = OptoReg::regname(parmreg); | |
1886 tty->print("#r%3.3d %s", parmreg, reg_name); | |
1887 parmreg = _matcher._parm_regs[k].second(); | |
1888 if( OptoReg::is_reg(parmreg)) { | |
1889 tty->print(":%s", OptoReg::regname(parmreg)); | |
1890 } | |
1891 tty->print(" : parm %d: ", k); | |
1892 domain->field_at(k + TypeFunc::Parms)->dump(); | |
1893 tty->print_cr(""); | |
1894 } | |
1895 } | |
1896 | |
1897 // Check for un-owned padding above incoming args | |
1898 OptoReg::Name reg = _matcher._new_SP; | |
1899 if( reg > _matcher._in_arg_limit ) { | |
1900 reg = OptoReg::add(reg, -1); | |
1901 tty->print_cr("#r%3.3d %s+%2d: pad0, owned by CALLER", reg, fp, reg2offset_unchecked(reg)); | |
1902 } | |
1903 | |
1904 // Incoming argument area dump | |
1905 OptoReg::Name begin_in_arg = OptoReg::add(_matcher._old_SP,C->out_preserve_stack_slots()); | |
1906 while( reg > begin_in_arg ) { | |
1907 reg = OptoReg::add(reg, -1); | |
1908 tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg)); | |
1909 int j; | |
1910 for( j = 0; j < argcnt; j++) { | |
1911 if( _matcher._parm_regs[j].first() == reg || | |
1912 _matcher._parm_regs[j].second() == reg ) { | |
1913 tty->print("parm %d: ",j); | |
1914 domain->field_at(j + TypeFunc::Parms)->dump(); | |
1915 tty->print_cr(""); | |
1916 break; | |
1917 } | |
1918 } | |
1919 if( j >= argcnt ) | |
1920 tty->print_cr("HOLE, owned by SELF"); | |
1921 } | |
1922 | |
1923 // Old outgoing preserve area | |
1924 while( reg > _matcher._old_SP ) { | |
1925 reg = OptoReg::add(reg, -1); | |
1926 tty->print_cr("#r%3.3d %s+%2d: old out preserve",reg,fp,reg2offset_unchecked(reg)); | |
1927 } | |
1928 | |
1929 // Old SP | |
1930 tty->print_cr("# -- Old %s -- Framesize: %d --",fp, | |
1931 reg2offset_unchecked(OptoReg::add(_matcher._old_SP,-1)) - reg2offset_unchecked(_matcher._new_SP)+jintSize); | |
1932 | |
1933 // Preserve area dump | |
1934 reg = OptoReg::add(reg, -1); | |
1935 while( OptoReg::is_stack(reg)) { | |
1936 tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg)); | |
1937 if( _matcher.return_addr() == reg ) | |
1938 tty->print_cr("return address"); | |
1939 else if( _matcher.return_addr() == OptoReg::add(reg,1) && | |
1940 VerifyStackAtCalls ) | |
1941 tty->print_cr("0xBADB100D +VerifyStackAtCalls"); | |
1942 else if ((int)OptoReg::reg2stack(reg) < C->fixed_slots()) | |
1943 tty->print_cr("Fixed slot %d", OptoReg::reg2stack(reg)); | |
1944 else | |
1945 tty->print_cr("pad2, in_preserve"); | |
1946 reg = OptoReg::add(reg, -1); | |
1947 } | |
1948 | |
1949 // Spill area dump | |
1950 reg = OptoReg::add(_matcher._new_SP, _framesize ); | |
1951 while( reg > _matcher._out_arg_limit ) { | |
1952 reg = OptoReg::add(reg, -1); | |
1953 tty->print_cr("#r%3.3d %s+%2d: spill",reg,fp,reg2offset_unchecked(reg)); | |
1954 } | |
1955 | |
1956 // Outgoing argument area dump | |
1957 while( reg > OptoReg::add(_matcher._new_SP, C->out_preserve_stack_slots()) ) { | |
1958 reg = OptoReg::add(reg, -1); | |
1959 tty->print_cr("#r%3.3d %s+%2d: outgoing argument",reg,fp,reg2offset_unchecked(reg)); | |
1960 } | |
1961 | |
1962 // Outgoing new preserve area | |
1963 while( reg > _matcher._new_SP ) { | |
1964 reg = OptoReg::add(reg, -1); | |
1965 tty->print_cr("#r%3.3d %s+%2d: new out preserve",reg,fp,reg2offset_unchecked(reg)); | |
1966 } | |
1967 tty->print_cr("#"); | |
1968 } | |
1969 | |
1970 //------------------------------dump_bb---------------------------------------- | |
1971 void PhaseChaitin::dump_bb( uint pre_order ) const { | |
1972 tty->print_cr("---dump of B%d---",pre_order); | |
1973 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
1974 Block *b = _cfg._blocks[i]; | |
1975 if( b->_pre_order == pre_order ) | |
1976 dump(b); | |
1977 } | |
1978 } | |
1979 | |
1980 //------------------------------dump_lrg--------------------------------------- | |
1981 void PhaseChaitin::dump_lrg( uint lidx ) const { | |
1982 tty->print_cr("---dump of L%d---",lidx); | |
1983 | |
1984 if( _ifg ) { | |
1985 if( lidx >= _maxlrg ) { | |
1986 tty->print("Attempt to print live range index beyond max live range.\n"); | |
1987 return; | |
1988 } | |
1989 tty->print("L%d: ",lidx); | |
1990 lrgs(lidx).dump( ); | |
1991 } | |
1992 if( _ifg ) { tty->print("Neighbors: %d - ", _ifg->neighbor_cnt(lidx)); | |
1993 _ifg->neighbors(lidx)->dump(); | |
1994 tty->cr(); | |
1995 } | |
1996 // For all blocks | |
1997 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
1998 Block *b = _cfg._blocks[i]; | |
1999 int dump_once = 0; | |
2000 | |
2001 // For all instructions | |
2002 for( uint j = 0; j < b->_nodes.size(); j++ ) { | |
2003 Node *n = b->_nodes[j]; | |
2004 if( Find_const(n) == lidx ) { | |
2005 if( !dump_once++ ) { | |
2006 tty->cr(); | |
2007 b->dump_head( &_cfg._bbs ); | |
2008 } | |
2009 dump(n); | |
2010 continue; | |
2011 } | |
2012 uint cnt = n->req(); | |
2013 for( uint k = 1; k < cnt; k++ ) { | |
2014 Node *m = n->in(k); | |
2015 if (!m) continue; // be robust in the dumper | |
2016 if( Find_const(m) == lidx ) { | |
2017 if( !dump_once++ ) { | |
2018 tty->cr(); | |
2019 b->dump_head( &_cfg._bbs ); | |
2020 } | |
2021 dump(n); | |
2022 } | |
2023 } | |
2024 } | |
2025 } // End of per-block dump | |
2026 tty->cr(); | |
2027 } | |
2028 #endif // not PRODUCT | |
2029 | |
2030 //------------------------------print_chaitin_statistics------------------------------- | |
2031 int PhaseChaitin::_final_loads = 0; | |
2032 int PhaseChaitin::_final_stores = 0; | |
2033 int PhaseChaitin::_final_memoves= 0; | |
2034 int PhaseChaitin::_final_copies = 0; | |
2035 double PhaseChaitin::_final_load_cost = 0; | |
2036 double PhaseChaitin::_final_store_cost = 0; | |
2037 double PhaseChaitin::_final_memove_cost= 0; | |
2038 double PhaseChaitin::_final_copy_cost = 0; | |
2039 int PhaseChaitin::_conserv_coalesce = 0; | |
2040 int PhaseChaitin::_conserv_coalesce_pair = 0; | |
2041 int PhaseChaitin::_conserv_coalesce_trie = 0; | |
2042 int PhaseChaitin::_conserv_coalesce_quad = 0; | |
2043 int PhaseChaitin::_post_alloc = 0; | |
2044 int PhaseChaitin::_lost_opp_pp_coalesce = 0; | |
2045 int PhaseChaitin::_lost_opp_cflow_coalesce = 0; | |
2046 int PhaseChaitin::_used_cisc_instructions = 0; | |
2047 int PhaseChaitin::_unused_cisc_instructions = 0; | |
2048 int PhaseChaitin::_allocator_attempts = 0; | |
2049 int PhaseChaitin::_allocator_successes = 0; | |
2050 | |
2051 #ifndef PRODUCT | |
2052 uint PhaseChaitin::_high_pressure = 0; | |
2053 uint PhaseChaitin::_low_pressure = 0; | |
2054 | |
2055 void PhaseChaitin::print_chaitin_statistics() { | |
2056 tty->print_cr("Inserted %d spill loads, %d spill stores, %d mem-mem moves and %d copies.", _final_loads, _final_stores, _final_memoves, _final_copies); | |
2057 tty->print_cr("Total load cost= %6.0f, store cost = %6.0f, mem-mem cost = %5.2f, copy cost = %5.0f.", _final_load_cost, _final_store_cost, _final_memove_cost, _final_copy_cost); | |
2058 tty->print_cr("Adjusted spill cost = %7.0f.", | |
2059 _final_load_cost*4.0 + _final_store_cost * 2.0 + | |
2060 _final_copy_cost*1.0 + _final_memove_cost*12.0); | |
2061 tty->print("Conservatively coalesced %d copies, %d pairs", | |
2062 _conserv_coalesce, _conserv_coalesce_pair); | |
2063 if( _conserv_coalesce_trie || _conserv_coalesce_quad ) | |
2064 tty->print(", %d tries, %d quads", _conserv_coalesce_trie, _conserv_coalesce_quad); | |
2065 tty->print_cr(", %d post alloc.", _post_alloc); | |
2066 if( _lost_opp_pp_coalesce || _lost_opp_cflow_coalesce ) | |
2067 tty->print_cr("Lost coalesce opportunity, %d private-private, and %d cflow interfered.", | |
2068 _lost_opp_pp_coalesce, _lost_opp_cflow_coalesce ); | |
2069 if( _used_cisc_instructions || _unused_cisc_instructions ) | |
2070 tty->print_cr("Used cisc instruction %d, remained in register %d", | |
2071 _used_cisc_instructions, _unused_cisc_instructions); | |
2072 if( _allocator_successes != 0 ) | |
2073 tty->print_cr("Average allocation trips %f", (float)_allocator_attempts/(float)_allocator_successes); | |
2074 tty->print_cr("High Pressure Blocks = %d, Low Pressure Blocks = %d", _high_pressure, _low_pressure); | |
2075 } | |
2076 #endif // not PRODUCT |