Mercurial > hg > truffle
annotate src/share/vm/opto/chaitin.cpp @ 452:00b023ae2d78
6722113: CMS: Incorrect overflow handling during precleaning of Reference lists
Summary: When we encounter marking stack overflow during precleaning of Reference lists, we were using the overflow list mechanism, which can cause problems on account of mutating the mark word of the header because of conflicts with mutator accesses and updates of that field. Instead we should use the usual mechanism for overflow handling in concurrent phases, namely dirtying of the card on which the overflowed object lies. Since precleaning effectively does a form of discovered list processing, albeit with discovery enabled, we needed to adjust some code to be correct in the face of interleaved processing and discovery.
Reviewed-by: apetrusenko, jcoomes
author | ysr |
---|---|
date | Thu, 20 Nov 2008 12:27:41 -0800 |
parents | 0bf25c4807f9 |
children | 96964ebdb154 |
rev | line source |
---|---|
0 | 1 /* |
196 | 2 * Copyright 2000-2008 Sun Microsystems, Inc. All Rights Reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
20 * CA 95054 USA or visit www.sun.com if you need additional information or | |
21 * have any questions. | |
22 * | |
23 */ | |
24 | |
25 #include "incls/_precompiled.incl" | |
26 #include "incls/_chaitin.cpp.incl" | |
27 | |
28 //============================================================================= | |
29 | |
30 #ifndef PRODUCT | |
31 void LRG::dump( ) const { | |
32 ttyLocker ttyl; | |
33 tty->print("%d ",num_regs()); | |
34 _mask.dump(); | |
35 if( _msize_valid ) { | |
36 if( mask_size() == compute_mask_size() ) tty->print(", #%d ",_mask_size); | |
37 else tty->print(", #!!!_%d_vs_%d ",_mask_size,_mask.Size()); | |
38 } else { | |
39 tty->print(", #?(%d) ",_mask.Size()); | |
40 } | |
41 | |
42 tty->print("EffDeg: "); | |
43 if( _degree_valid ) tty->print( "%d ", _eff_degree ); | |
44 else tty->print("? "); | |
45 | |
295
ea18057223c4
6732194: Data corruption dependent on -server/-client/-Xbatch
never
parents:
196
diff
changeset
|
46 if( is_multidef() ) { |
0 | 47 tty->print("MultiDef "); |
48 if (_defs != NULL) { | |
49 tty->print("("); | |
50 for (int i = 0; i < _defs->length(); i++) { | |
51 tty->print("N%d ", _defs->at(i)->_idx); | |
52 } | |
53 tty->print(") "); | |
54 } | |
55 } | |
56 else if( _def == 0 ) tty->print("Dead "); | |
57 else tty->print("Def: N%d ",_def->_idx); | |
58 | |
59 tty->print("Cost:%4.2g Area:%4.2g Score:%4.2g ",_cost,_area, score()); | |
60 // Flags | |
61 if( _is_oop ) tty->print("Oop "); | |
62 if( _is_float ) tty->print("Float "); | |
63 if( _was_spilled1 ) tty->print("Spilled "); | |
64 if( _was_spilled2 ) tty->print("Spilled2 "); | |
65 if( _direct_conflict ) tty->print("Direct_conflict "); | |
66 if( _fat_proj ) tty->print("Fat "); | |
67 if( _was_lo ) tty->print("Lo "); | |
68 if( _has_copy ) tty->print("Copy "); | |
69 if( _at_risk ) tty->print("Risk "); | |
70 | |
71 if( _must_spill ) tty->print("Must_spill "); | |
72 if( _is_bound ) tty->print("Bound "); | |
73 if( _msize_valid ) { | |
74 if( _degree_valid && lo_degree() ) tty->print("Trivial "); | |
75 } | |
76 | |
77 tty->cr(); | |
78 } | |
79 #endif | |
80 | |
81 //------------------------------score------------------------------------------ | |
82 // Compute score from cost and area. Low score is best to spill. | |
83 static double raw_score( double cost, double area ) { | |
84 return cost - (area*RegisterCostAreaRatio) * 1.52588e-5; | |
85 } | |
86 | |
87 double LRG::score() const { | |
88 // Scale _area by RegisterCostAreaRatio/64K then subtract from cost. | |
89 // Bigger area lowers score, encourages spilling this live range. | |
90 // Bigger cost raise score, prevents spilling this live range. | |
91 // (Note: 1/65536 is the magic constant below; I dont trust the C optimizer | |
92 // to turn a divide by a constant into a multiply by the reciprical). | |
93 double score = raw_score( _cost, _area); | |
94 | |
95 // Account for area. Basically, LRGs covering large areas are better | |
96 // to spill because more other LRGs get freed up. | |
97 if( _area == 0.0 ) // No area? Then no progress to spill | |
98 return 1e35; | |
99 | |
100 if( _was_spilled2 ) // If spilled once before, we are unlikely | |
101 return score + 1e30; // to make progress again. | |
102 | |
103 if( _cost >= _area*3.0 ) // Tiny area relative to cost | |
104 return score + 1e17; // Probably no progress to spill | |
105 | |
106 if( (_cost+_cost) >= _area*3.0 ) // Small area relative to cost | |
107 return score + 1e10; // Likely no progress to spill | |
108 | |
109 return score; | |
110 } | |
111 | |
112 //------------------------------LRG_List--------------------------------------- | |
113 LRG_List::LRG_List( uint max ) : _cnt(max), _max(max), _lidxs(NEW_RESOURCE_ARRAY(uint,max)) { | |
114 memset( _lidxs, 0, sizeof(uint)*max ); | |
115 } | |
116 | |
117 void LRG_List::extend( uint nidx, uint lidx ) { | |
118 _nesting.check(); | |
119 if( nidx >= _max ) { | |
120 uint size = 16; | |
121 while( size <= nidx ) size <<=1; | |
122 _lidxs = REALLOC_RESOURCE_ARRAY( uint, _lidxs, _max, size ); | |
123 _max = size; | |
124 } | |
125 while( _cnt <= nidx ) | |
126 _lidxs[_cnt++] = 0; | |
127 _lidxs[nidx] = lidx; | |
128 } | |
129 | |
130 #define NUMBUCKS 3 | |
131 | |
132 //------------------------------Chaitin---------------------------------------- | |
133 PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher) | |
134 : PhaseRegAlloc(unique, cfg, matcher, | |
135 #ifndef PRODUCT | |
136 print_chaitin_statistics | |
137 #else | |
138 NULL | |
139 #endif | |
140 ), | |
141 _names(unique), _uf_map(unique), | |
142 _maxlrg(0), _live(0), | |
143 _spilled_once(Thread::current()->resource_area()), | |
144 _spilled_twice(Thread::current()->resource_area()), | |
145 _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0), | |
146 _oldphi(unique) | |
147 #ifndef PRODUCT | |
148 , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling")) | |
149 #endif | |
150 { | |
151 NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); ) | |
152 uint i,j; | |
153 // Build a list of basic blocks, sorted by frequency | |
154 _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks ); | |
155 // Experiment with sorting strategies to speed compilation | |
156 double cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket | |
157 Block **buckets[NUMBUCKS]; // Array of buckets | |
158 uint buckcnt[NUMBUCKS]; // Array of bucket counters | |
159 double buckval[NUMBUCKS]; // Array of bucket value cutoffs | |
160 for( i = 0; i < NUMBUCKS; i++ ) { | |
161 buckets[i] = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks ); | |
162 buckcnt[i] = 0; | |
163 // Bump by three orders of magnitude each time | |
164 cutoff *= 0.001; | |
165 buckval[i] = cutoff; | |
166 for( j = 0; j < _cfg._num_blocks; j++ ) { | |
167 buckets[i][j] = NULL; | |
168 } | |
169 } | |
170 // Sort blocks into buckets | |
171 for( i = 0; i < _cfg._num_blocks; i++ ) { | |
172 for( j = 0; j < NUMBUCKS; j++ ) { | |
173 if( (j == NUMBUCKS-1) || (_cfg._blocks[i]->_freq > buckval[j]) ) { | |
174 // Assign block to end of list for appropriate bucket | |
175 buckets[j][buckcnt[j]++] = _cfg._blocks[i]; | |
176 break; // kick out of inner loop | |
177 } | |
178 } | |
179 } | |
180 // Dump buckets into final block array | |
181 uint blkcnt = 0; | |
182 for( i = 0; i < NUMBUCKS; i++ ) { | |
183 for( j = 0; j < buckcnt[i]; j++ ) { | |
184 _blks[blkcnt++] = buckets[i][j]; | |
185 } | |
186 } | |
187 | |
188 assert(blkcnt == _cfg._num_blocks, "Block array not totally filled"); | |
189 } | |
190 | |
191 void PhaseChaitin::Register_Allocate() { | |
192 | |
193 // Above the OLD FP (and in registers) are the incoming arguments. Stack | |
194 // slots in this area are called "arg_slots". Above the NEW FP (and in | |
195 // registers) is the outgoing argument area; above that is the spill/temp | |
196 // area. These are all "frame_slots". Arg_slots start at the zero | |
197 // stack_slots and count up to the known arg_size. Frame_slots start at | |
198 // the stack_slot #arg_size and go up. After allocation I map stack | |
199 // slots to actual offsets. Stack-slots in the arg_slot area are biased | |
200 // by the frame_size; stack-slots in the frame_slot area are biased by 0. | |
201 | |
202 _trip_cnt = 0; | |
203 _alternate = 0; | |
204 _matcher._allocation_started = true; | |
205 | |
206 ResourceArea live_arena; // Arena for liveness & IFG info | |
207 ResourceMark rm(&live_arena); | |
208 | |
209 // Need live-ness for the IFG; need the IFG for coalescing. If the | |
210 // liveness is JUST for coalescing, then I can get some mileage by renaming | |
211 // all copy-related live ranges low and then using the max copy-related | |
212 // live range as a cut-off for LIVE and the IFG. In other words, I can | |
213 // build a subset of LIVE and IFG just for copies. | |
214 PhaseLive live(_cfg,_names,&live_arena); | |
215 | |
216 // Need IFG for coalescing and coloring | |
217 PhaseIFG ifg( &live_arena ); | |
218 _ifg = &ifg; | |
219 | |
220 if (C->unique() > _names.Size()) _names.extend(C->unique()-1, 0); | |
221 | |
222 // Come out of SSA world to the Named world. Assign (virtual) registers to | |
223 // Nodes. Use the same register for all inputs and the output of PhiNodes | |
224 // - effectively ending SSA form. This requires either coalescing live | |
225 // ranges or inserting copies. For the moment, we insert "virtual copies" | |
226 // - we pretend there is a copy prior to each Phi in predecessor blocks. | |
227 // We will attempt to coalesce such "virtual copies" before we manifest | |
228 // them for real. | |
229 de_ssa(); | |
230 | |
231 { | |
232 NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) | |
233 _live = NULL; // Mark live as being not available | |
234 rm.reset_to_mark(); // Reclaim working storage | |
235 IndexSet::reset_memory(C, &live_arena); | |
236 ifg.init(_maxlrg); // Empty IFG | |
237 gather_lrg_masks( false ); // Collect LRG masks | |
238 live.compute( _maxlrg ); // Compute liveness | |
239 _live = &live; // Mark LIVE as being available | |
240 } | |
241 | |
242 // Base pointers are currently "used" by instructions which define new | |
243 // derived pointers. This makes base pointers live up to the where the | |
244 // derived pointer is made, but not beyond. Really, they need to be live | |
245 // across any GC point where the derived value is live. So this code looks | |
246 // at all the GC points, and "stretches" the live range of any base pointer | |
247 // to the GC point. | |
248 if( stretch_base_pointer_live_ranges(&live_arena) ) { | |
249 NOT_PRODUCT( Compile::TracePhase t3("computeLive (sbplr)", &_t_computeLive, TimeCompiler); ) | |
250 // Since some live range stretched, I need to recompute live | |
251 _live = NULL; | |
252 rm.reset_to_mark(); // Reclaim working storage | |
253 IndexSet::reset_memory(C, &live_arena); | |
254 ifg.init(_maxlrg); | |
255 gather_lrg_masks( false ); | |
256 live.compute( _maxlrg ); | |
257 _live = &live; | |
258 } | |
259 // Create the interference graph using virtual copies | |
260 build_ifg_virtual( ); // Include stack slots this time | |
261 | |
262 // Aggressive (but pessimistic) copy coalescing. | |
263 // This pass works on virtual copies. Any virtual copies which are not | |
264 // coalesced get manifested as actual copies | |
265 { | |
266 // The IFG is/was triangular. I am 'squaring it up' so Union can run | |
267 // faster. Union requires a 'for all' operation which is slow on the | |
268 // triangular adjacency matrix (quick reminder: the IFG is 'sparse' - | |
269 // meaning I can visit all the Nodes neighbors less than a Node in time | |
270 // O(# of neighbors), but I have to visit all the Nodes greater than a | |
271 // given Node and search them for an instance, i.e., time O(#MaxLRG)). | |
272 _ifg->SquareUp(); | |
273 | |
274 PhaseAggressiveCoalesce coalesce( *this ); | |
275 coalesce.coalesce_driver( ); | |
276 // Insert un-coalesced copies. Visit all Phis. Where inputs to a Phi do | |
277 // not match the Phi itself, insert a copy. | |
278 coalesce.insert_copies(_matcher); | |
279 } | |
280 | |
281 // After aggressive coalesce, attempt a first cut at coloring. | |
282 // To color, we need the IFG and for that we need LIVE. | |
283 { | |
284 NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) | |
285 _live = NULL; | |
286 rm.reset_to_mark(); // Reclaim working storage | |
287 IndexSet::reset_memory(C, &live_arena); | |
288 ifg.init(_maxlrg); | |
289 gather_lrg_masks( true ); | |
290 live.compute( _maxlrg ); | |
291 _live = &live; | |
292 } | |
293 | |
294 // Build physical interference graph | |
295 uint must_spill = 0; | |
296 must_spill = build_ifg_physical( &live_arena ); | |
297 // If we have a guaranteed spill, might as well spill now | |
298 if( must_spill ) { | |
299 if( !_maxlrg ) return; | |
300 // Bail out if unique gets too large (ie - unique > MaxNodeLimit) | |
301 C->check_node_count(10*must_spill, "out of nodes before split"); | |
302 if (C->failing()) return; | |
303 _maxlrg = Split( _maxlrg ); // Split spilling LRG everywhere | |
304 // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor) | |
305 // or we failed to split | |
306 C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split"); | |
307 if (C->failing()) return; | |
308 | |
309 #ifdef ASSERT | |
310 if( VerifyOpto ) { | |
311 _cfg.verify(); | |
312 verify_base_ptrs(&live_arena); | |
313 } | |
314 #endif | |
315 NOT_PRODUCT( C->verify_graph_edges(); ) | |
316 | |
317 compact(); // Compact LRGs; return new lower max lrg | |
318 | |
319 { | |
320 NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) | |
321 _live = NULL; | |
322 rm.reset_to_mark(); // Reclaim working storage | |
323 IndexSet::reset_memory(C, &live_arena); | |
324 ifg.init(_maxlrg); // Build a new interference graph | |
325 gather_lrg_masks( true ); // Collect intersect mask | |
326 live.compute( _maxlrg ); // Compute LIVE | |
327 _live = &live; | |
328 } | |
329 build_ifg_physical( &live_arena ); | |
330 _ifg->SquareUp(); | |
331 _ifg->Compute_Effective_Degree(); | |
332 // Only do conservative coalescing if requested | |
333 if( OptoCoalesce ) { | |
334 // Conservative (and pessimistic) copy coalescing of those spills | |
335 PhaseConservativeCoalesce coalesce( *this ); | |
336 // If max live ranges greater than cutoff, don't color the stack. | |
337 // This cutoff can be larger than below since it is only done once. | |
338 coalesce.coalesce_driver( ); | |
339 } | |
340 compress_uf_map_for_nodes(); | |
341 | |
342 #ifdef ASSERT | |
343 if( VerifyOpto ) _ifg->verify(this); | |
344 #endif | |
345 } else { | |
346 ifg.SquareUp(); | |
347 ifg.Compute_Effective_Degree(); | |
348 #ifdef ASSERT | |
349 set_was_low(); | |
350 #endif | |
351 } | |
352 | |
353 // Prepare for Simplify & Select | |
354 cache_lrg_info(); // Count degree of LRGs | |
355 | |
356 // Simplify the InterFerence Graph by removing LRGs of low degree. | |
357 // LRGs of low degree are trivially colorable. | |
358 Simplify(); | |
359 | |
360 // Select colors by re-inserting LRGs back into the IFG in reverse order. | |
361 // Return whether or not something spills. | |
362 uint spills = Select( ); | |
363 | |
364 // If we spill, split and recycle the entire thing | |
365 while( spills ) { | |
366 if( _trip_cnt++ > 24 ) { | |
367 DEBUG_ONLY( dump_for_spill_split_recycle(); ) | |
368 if( _trip_cnt > 27 ) { | |
369 C->record_method_not_compilable("failed spill-split-recycle sanity check"); | |
370 return; | |
371 } | |
372 } | |
373 | |
374 if( !_maxlrg ) return; | |
375 _maxlrg = Split( _maxlrg ); // Split spilling LRG everywhere | |
376 // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor) | |
377 C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after split"); | |
378 if (C->failing()) return; | |
379 #ifdef ASSERT | |
380 if( VerifyOpto ) { | |
381 _cfg.verify(); | |
382 verify_base_ptrs(&live_arena); | |
383 } | |
384 #endif | |
385 | |
386 compact(); // Compact LRGs; return new lower max lrg | |
387 | |
388 // Nuke the live-ness and interference graph and LiveRanGe info | |
389 { | |
390 NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) | |
391 _live = NULL; | |
392 rm.reset_to_mark(); // Reclaim working storage | |
393 IndexSet::reset_memory(C, &live_arena); | |
394 ifg.init(_maxlrg); | |
395 | |
396 // Create LiveRanGe array. | |
397 // Intersect register masks for all USEs and DEFs | |
398 gather_lrg_masks( true ); | |
399 live.compute( _maxlrg ); | |
400 _live = &live; | |
401 } | |
402 must_spill = build_ifg_physical( &live_arena ); | |
403 _ifg->SquareUp(); | |
404 _ifg->Compute_Effective_Degree(); | |
405 | |
406 // Only do conservative coalescing if requested | |
407 if( OptoCoalesce ) { | |
408 // Conservative (and pessimistic) copy coalescing | |
409 PhaseConservativeCoalesce coalesce( *this ); | |
410 // Check for few live ranges determines how aggressive coalesce is. | |
411 coalesce.coalesce_driver( ); | |
412 } | |
413 compress_uf_map_for_nodes(); | |
414 #ifdef ASSERT | |
415 if( VerifyOpto ) _ifg->verify(this); | |
416 #endif | |
417 cache_lrg_info(); // Count degree of LRGs | |
418 | |
419 // Simplify the InterFerence Graph by removing LRGs of low degree. | |
420 // LRGs of low degree are trivially colorable. | |
421 Simplify(); | |
422 | |
423 // Select colors by re-inserting LRGs back into the IFG in reverse order. | |
424 // Return whether or not something spills. | |
425 spills = Select( ); | |
426 } | |
427 | |
428 // Count number of Simplify-Select trips per coloring success. | |
429 _allocator_attempts += _trip_cnt + 1; | |
430 _allocator_successes += 1; | |
431 | |
432 // Peephole remove copies | |
433 post_allocate_copy_removal(); | |
434 | |
435 // max_reg is past the largest *register* used. | |
436 // Convert that to a frame_slot number. | |
437 if( _max_reg <= _matcher._new_SP ) | |
438 _framesize = C->out_preserve_stack_slots(); | |
439 else _framesize = _max_reg -_matcher._new_SP; | |
440 assert((int)(_matcher._new_SP+_framesize) >= (int)_matcher._out_arg_limit, "framesize must be large enough"); | |
441 | |
442 // This frame must preserve the required fp alignment | |
419
0bf25c4807f9
6761594: framesize rounding code rounds using wrong units leading to slightly oversized frames
never
parents:
295
diff
changeset
|
443 _framesize = round_to(_framesize, Matcher::stack_alignment_in_slots()); |
0 | 444 assert( _framesize >= 0 && _framesize <= 1000000, "sanity check" ); |
445 #ifndef PRODUCT | |
446 _total_framesize += _framesize; | |
447 if( (int)_framesize > _max_framesize ) | |
448 _max_framesize = _framesize; | |
449 #endif | |
450 | |
451 // Convert CISC spills | |
452 fixup_spills(); | |
453 | |
454 // Log regalloc results | |
455 CompileLog* log = Compile::current()->log(); | |
456 if (log != NULL) { | |
457 log->elem("regalloc attempts='%d' success='%d'", _trip_cnt, !C->failing()); | |
458 } | |
459 | |
460 if (C->failing()) return; | |
461 | |
462 NOT_PRODUCT( C->verify_graph_edges(); ) | |
463 | |
464 // Move important info out of the live_arena to longer lasting storage. | |
465 alloc_node_regs(_names.Size()); | |
466 for( uint i=0; i < _names.Size(); i++ ) { | |
467 if( _names[i] ) { // Live range associated with Node? | |
468 LRG &lrg = lrgs( _names[i] ); | |
469 if( lrg.num_regs() == 1 ) { | |
470 _node_regs[i].set1( lrg.reg() ); | |
471 } else { // Must be a register-pair | |
472 if( !lrg._fat_proj ) { // Must be aligned adjacent register pair | |
473 // Live ranges record the highest register in their mask. | |
474 // We want the low register for the AD file writer's convenience. | |
475 _node_regs[i].set2( OptoReg::add(lrg.reg(),-1) ); | |
476 } else { // Misaligned; extract 2 bits | |
477 OptoReg::Name hi = lrg.reg(); // Get hi register | |
478 lrg.Remove(hi); // Yank from mask | |
479 int lo = lrg.mask().find_first_elem(); // Find lo | |
480 _node_regs[i].set_pair( hi, lo ); | |
481 } | |
482 } | |
483 if( lrg._is_oop ) _node_oops.set(i); | |
484 } else { | |
485 _node_regs[i].set_bad(); | |
486 } | |
487 } | |
488 | |
489 // Done! | |
490 _live = NULL; | |
491 _ifg = NULL; | |
492 C->set_indexSet_arena(NULL); // ResourceArea is at end of scope | |
493 } | |
494 | |
495 //------------------------------de_ssa----------------------------------------- | |
496 void PhaseChaitin::de_ssa() { | |
497 // Set initial Names for all Nodes. Most Nodes get the virtual register | |
498 // number. A few get the ZERO live range number. These do not | |
499 // get allocated, but instead rely on correct scheduling to ensure that | |
500 // only one instance is simultaneously live at a time. | |
501 uint lr_counter = 1; | |
502 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
503 Block *b = _cfg._blocks[i]; | |
504 uint cnt = b->_nodes.size(); | |
505 | |
506 // Handle all the normal Nodes in the block | |
507 for( uint j = 0; j < cnt; j++ ) { | |
508 Node *n = b->_nodes[j]; | |
509 // Pre-color to the zero live range, or pick virtual register | |
510 const RegMask &rm = n->out_RegMask(); | |
511 _names.map( n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0 ); | |
512 } | |
513 } | |
514 // Reset the Union-Find mapping to be identity | |
515 reset_uf_map(lr_counter); | |
516 } | |
517 | |
518 | |
519 //------------------------------gather_lrg_masks------------------------------- | |
520 // Gather LiveRanGe information, including register masks. Modification of | |
521 // cisc spillable in_RegMasks should not be done before AggressiveCoalesce. | |
522 void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { | |
523 | |
524 // Nail down the frame pointer live range | |
525 uint fp_lrg = n2lidx(_cfg._root->in(1)->in(TypeFunc::FramePtr)); | |
526 lrgs(fp_lrg)._cost += 1e12; // Cost is infinite | |
527 | |
528 // For all blocks | |
529 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
530 Block *b = _cfg._blocks[i]; | |
531 | |
532 // For all instructions | |
533 for( uint j = 1; j < b->_nodes.size(); j++ ) { | |
534 Node *n = b->_nodes[j]; | |
535 uint input_edge_start =1; // Skip control most nodes | |
536 if( n->is_Mach() ) input_edge_start = n->as_Mach()->oper_input_base(); | |
537 uint idx = n->is_Copy(); | |
538 | |
539 // Get virtual register number, same as LiveRanGe index | |
540 uint vreg = n2lidx(n); | |
541 LRG &lrg = lrgs(vreg); | |
542 if( vreg ) { // No vreg means un-allocable (e.g. memory) | |
543 | |
544 // Collect has-copy bit | |
545 if( idx ) { | |
546 lrg._has_copy = 1; | |
547 uint clidx = n2lidx(n->in(idx)); | |
548 LRG ©_src = lrgs(clidx); | |
549 copy_src._has_copy = 1; | |
550 } | |
551 | |
552 // Check for float-vs-int live range (used in register-pressure | |
553 // calculations) | |
554 const Type *n_type = n->bottom_type(); | |
555 if( n_type->is_floatingpoint() ) | |
556 lrg._is_float = 1; | |
557 | |
558 // Check for twice prior spilling. Once prior spilling might have | |
559 // spilled 'soft', 2nd prior spill should have spilled 'hard' and | |
560 // further spilling is unlikely to make progress. | |
561 if( _spilled_once.test(n->_idx) ) { | |
562 lrg._was_spilled1 = 1; | |
563 if( _spilled_twice.test(n->_idx) ) | |
564 lrg._was_spilled2 = 1; | |
565 } | |
566 | |
567 #ifndef PRODUCT | |
568 if (trace_spilling() && lrg._def != NULL) { | |
569 // collect defs for MultiDef printing | |
570 if (lrg._defs == NULL) { | |
571 lrg._defs = new (_ifg->_arena) GrowableArray<Node*>(); | |
572 lrg._defs->append(lrg._def); | |
573 } | |
574 lrg._defs->append(n); | |
575 } | |
576 #endif | |
577 | |
578 // Check for a single def LRG; these can spill nicely | |
579 // via rematerialization. Flag as NULL for no def found | |
580 // yet, or 'n' for single def or -1 for many defs. | |
581 lrg._def = lrg._def ? NodeSentinel : n; | |
582 | |
583 // Limit result register mask to acceptable registers | |
584 const RegMask &rm = n->out_RegMask(); | |
585 lrg.AND( rm ); | |
586 // Check for bound register masks | |
587 const RegMask &lrgmask = lrg.mask(); | |
588 if( lrgmask.is_bound1() || lrgmask.is_bound2() ) | |
589 lrg._is_bound = 1; | |
590 | |
591 // Check for maximum frequency value | |
592 if( lrg._maxfreq < b->_freq ) | |
593 lrg._maxfreq = b->_freq; | |
594 | |
595 int ireg = n->ideal_reg(); | |
596 assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP, | |
597 "oops must be in Op_RegP's" ); | |
598 // Check for oop-iness, or long/double | |
599 // Check for multi-kill projection | |
600 switch( ireg ) { | |
601 case MachProjNode::fat_proj: | |
602 // Fat projections have size equal to number of registers killed | |
603 lrg.set_num_regs(rm.Size()); | |
604 lrg.set_reg_pressure(lrg.num_regs()); | |
605 lrg._fat_proj = 1; | |
606 lrg._is_bound = 1; | |
607 break; | |
608 case Op_RegP: | |
609 #ifdef _LP64 | |
610 lrg.set_num_regs(2); // Size is 2 stack words | |
611 #else | |
612 lrg.set_num_regs(1); // Size is 1 stack word | |
613 #endif | |
614 // Register pressure is tracked relative to the maximum values | |
615 // suggested for that platform, INTPRESSURE and FLOATPRESSURE, | |
616 // and relative to other types which compete for the same regs. | |
617 // | |
618 // The following table contains suggested values based on the | |
619 // architectures as defined in each .ad file. | |
620 // INTPRESSURE and FLOATPRESSURE may be tuned differently for | |
621 // compile-speed or performance. | |
622 // Note1: | |
623 // SPARC and SPARCV9 reg_pressures are at 2 instead of 1 | |
624 // since .ad registers are defined as high and low halves. | |
625 // These reg_pressure values remain compatible with the code | |
626 // in is_high_pressure() which relates get_invalid_mask_size(), | |
627 // Block::_reg_pressure and INTPRESSURE, FLOATPRESSURE. | |
628 // Note2: | |
629 // SPARC -d32 has 24 registers available for integral values, | |
630 // but only 10 of these are safe for 64-bit longs. | |
631 // Using set_reg_pressure(2) for both int and long means | |
632 // the allocator will believe it can fit 26 longs into | |
633 // registers. Using 2 for longs and 1 for ints means the | |
634 // allocator will attempt to put 52 integers into registers. | |
635 // The settings below limit this problem to methods with | |
636 // many long values which are being run on 32-bit SPARC. | |
637 // | |
638 // ------------------- reg_pressure -------------------- | |
639 // Each entry is reg_pressure_per_value,number_of_regs | |
640 // RegL RegI RegFlags RegF RegD INTPRESSURE FLOATPRESSURE | |
641 // IA32 2 1 1 1 1 6 6 | |
642 // IA64 1 1 1 1 1 50 41 | |
643 // SPARC 2 2 2 2 2 48 (24) 52 (26) | |
644 // SPARCV9 2 2 2 2 2 48 (24) 52 (26) | |
645 // AMD64 1 1 1 1 1 14 15 | |
646 // ----------------------------------------------------- | |
647 #if defined(SPARC) | |
648 lrg.set_reg_pressure(2); // use for v9 as well | |
649 #else | |
650 lrg.set_reg_pressure(1); // normally one value per register | |
651 #endif | |
652 if( n_type->isa_oop_ptr() ) { | |
653 lrg._is_oop = 1; | |
654 } | |
655 break; | |
656 case Op_RegL: // Check for long or double | |
657 case Op_RegD: | |
658 lrg.set_num_regs(2); | |
659 // Define platform specific register pressure | |
660 #ifdef SPARC | |
661 lrg.set_reg_pressure(2); | |
662 #elif defined(IA32) | |
663 if( ireg == Op_RegL ) { | |
664 lrg.set_reg_pressure(2); | |
665 } else { | |
666 lrg.set_reg_pressure(1); | |
667 } | |
668 #else | |
669 lrg.set_reg_pressure(1); // normally one value per register | |
670 #endif | |
671 // If this def of a double forces a mis-aligned double, | |
672 // flag as '_fat_proj' - really flag as allowing misalignment | |
673 // AND changes how we count interferences. A mis-aligned | |
674 // double can interfere with TWO aligned pairs, or effectively | |
675 // FOUR registers! | |
676 if( rm.is_misaligned_Pair() ) { | |
677 lrg._fat_proj = 1; | |
678 lrg._is_bound = 1; | |
679 } | |
680 break; | |
681 case Op_RegF: | |
682 case Op_RegI: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
683 case Op_RegN: |
0 | 684 case Op_RegFlags: |
685 case 0: // not an ideal register | |
686 lrg.set_num_regs(1); | |
687 #ifdef SPARC | |
688 lrg.set_reg_pressure(2); | |
689 #else | |
690 lrg.set_reg_pressure(1); | |
691 #endif | |
692 break; | |
693 default: | |
694 ShouldNotReachHere(); | |
695 } | |
696 } | |
697 | |
698 // Now do the same for inputs | |
699 uint cnt = n->req(); | |
700 // Setup for CISC SPILLING | |
701 uint inp = (uint)AdlcVMDeps::Not_cisc_spillable; | |
702 if( UseCISCSpill && after_aggressive ) { | |
703 inp = n->cisc_operand(); | |
704 if( inp != (uint)AdlcVMDeps::Not_cisc_spillable ) | |
705 // Convert operand number to edge index number | |
706 inp = n->as_Mach()->operand_index(inp); | |
707 } | |
708 // Prepare register mask for each input | |
709 for( uint k = input_edge_start; k < cnt; k++ ) { | |
710 uint vreg = n2lidx(n->in(k)); | |
711 if( !vreg ) continue; | |
712 | |
713 // If this instruction is CISC Spillable, add the flags | |
714 // bit to its appropriate input | |
715 if( UseCISCSpill && after_aggressive && inp == k ) { | |
716 #ifndef PRODUCT | |
717 if( TraceCISCSpill ) { | |
718 tty->print(" use_cisc_RegMask: "); | |
719 n->dump(); | |
720 } | |
721 #endif | |
722 n->as_Mach()->use_cisc_RegMask(); | |
723 } | |
724 | |
725 LRG &lrg = lrgs(vreg); | |
726 // // Testing for floating point code shape | |
727 // Node *test = n->in(k); | |
728 // if( test->is_Mach() ) { | |
729 // MachNode *m = test->as_Mach(); | |
730 // int op = m->ideal_Opcode(); | |
731 // if (n->is_Call() && (op == Op_AddF || op == Op_MulF) ) { | |
732 // int zzz = 1; | |
733 // } | |
734 // } | |
735 | |
736 // Limit result register mask to acceptable registers. | |
737 // Do not limit registers from uncommon uses before | |
738 // AggressiveCoalesce. This effectively pre-virtual-splits | |
739 // around uncommon uses of common defs. | |
740 const RegMask &rm = n->in_RegMask(k); | |
741 if( !after_aggressive && | |
742 _cfg._bbs[n->in(k)->_idx]->_freq > 1000*b->_freq ) { | |
743 // Since we are BEFORE aggressive coalesce, leave the register | |
744 // mask untrimmed by the call. This encourages more coalescing. | |
745 // Later, AFTER aggressive, this live range will have to spill | |
746 // but the spiller handles slow-path calls very nicely. | |
747 } else { | |
748 lrg.AND( rm ); | |
749 } | |
750 // Check for bound register masks | |
751 const RegMask &lrgmask = lrg.mask(); | |
752 if( lrgmask.is_bound1() || lrgmask.is_bound2() ) | |
753 lrg._is_bound = 1; | |
754 // If this use of a double forces a mis-aligned double, | |
755 // flag as '_fat_proj' - really flag as allowing misalignment | |
756 // AND changes how we count interferences. A mis-aligned | |
757 // double can interfere with TWO aligned pairs, or effectively | |
758 // FOUR registers! | |
759 if( lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_Pair() ) { | |
760 lrg._fat_proj = 1; | |
761 lrg._is_bound = 1; | |
762 } | |
763 // if the LRG is an unaligned pair, we will have to spill | |
764 // so clear the LRG's register mask if it is not already spilled | |
765 if ( !n->is_SpillCopy() && | |
295
ea18057223c4
6732194: Data corruption dependent on -server/-client/-Xbatch
never
parents:
196
diff
changeset
|
766 (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) && |
0 | 767 lrgmask.is_misaligned_Pair()) { |
768 lrg.Clear(); | |
769 } | |
770 | |
771 // Check for maximum frequency value | |
772 if( lrg._maxfreq < b->_freq ) | |
773 lrg._maxfreq = b->_freq; | |
774 | |
775 } // End for all allocated inputs | |
776 } // end for all instructions | |
777 } // end for all blocks | |
778 | |
779 // Final per-liverange setup | |
780 for( uint i2=0; i2<_maxlrg; i2++ ) { | |
781 LRG &lrg = lrgs(i2); | |
782 if( lrg.num_regs() == 2 && !lrg._fat_proj ) | |
783 lrg.ClearToPairs(); | |
784 lrg.compute_set_mask_size(); | |
785 if( lrg.not_free() ) { // Handle case where we lose from the start | |
786 lrg.set_reg(OptoReg::Name(LRG::SPILL_REG)); | |
787 lrg._direct_conflict = 1; | |
788 } | |
789 lrg.set_degree(0); // no neighbors in IFG yet | |
790 } | |
791 } | |
792 | |
793 //------------------------------set_was_low------------------------------------ | |
794 // Set the was-lo-degree bit. Conservative coalescing should not change the | |
795 // colorability of the graph. If any live range was of low-degree before | |
796 // coalescing, it should Simplify. This call sets the was-lo-degree bit. | |
797 // The bit is checked in Simplify. | |
798 void PhaseChaitin::set_was_low() { | |
799 #ifdef ASSERT | |
800 for( uint i = 1; i < _maxlrg; i++ ) { | |
801 int size = lrgs(i).num_regs(); | |
802 uint old_was_lo = lrgs(i)._was_lo; | |
803 lrgs(i)._was_lo = 0; | |
804 if( lrgs(i).lo_degree() ) { | |
805 lrgs(i)._was_lo = 1; // Trivially of low degree | |
806 } else { // Else check the Brigg's assertion | |
807 // Brigg's observation is that the lo-degree neighbors of a | |
808 // hi-degree live range will not interfere with the color choices | |
809 // of said hi-degree live range. The Simplify reverse-stack-coloring | |
810 // order takes care of the details. Hence you do not have to count | |
811 // low-degree neighbors when determining if this guy colors. | |
812 int briggs_degree = 0; | |
813 IndexSet *s = _ifg->neighbors(i); | |
814 IndexSetIterator elements(s); | |
815 uint lidx; | |
816 while((lidx = elements.next()) != 0) { | |
817 if( !lrgs(lidx).lo_degree() ) | |
818 briggs_degree += MAX2(size,lrgs(lidx).num_regs()); | |
819 } | |
820 if( briggs_degree < lrgs(i).degrees_of_freedom() ) | |
821 lrgs(i)._was_lo = 1; // Low degree via the briggs assertion | |
822 } | |
823 assert(old_was_lo <= lrgs(i)._was_lo, "_was_lo may not decrease"); | |
824 } | |
825 #endif | |
826 } | |
827 | |
828 #define REGISTER_CONSTRAINED 16 | |
829 | |
830 //------------------------------cache_lrg_info--------------------------------- | |
831 // Compute cost/area ratio, in case we spill. Build the lo-degree list. | |
832 void PhaseChaitin::cache_lrg_info( ) { | |
833 | |
834 for( uint i = 1; i < _maxlrg; i++ ) { | |
835 LRG &lrg = lrgs(i); | |
836 | |
837 // Check for being of low degree: means we can be trivially colored. | |
838 // Low degree, dead or must-spill guys just get to simplify right away | |
839 if( lrg.lo_degree() || | |
840 !lrg.alive() || | |
841 lrg._must_spill ) { | |
842 // Split low degree list into those guys that must get a | |
843 // register and those that can go to register or stack. | |
844 // The idea is LRGs that can go register or stack color first when | |
845 // they have a good chance of getting a register. The register-only | |
846 // lo-degree live ranges always get a register. | |
847 OptoReg::Name hi_reg = lrg.mask().find_last_elem(); | |
848 if( OptoReg::is_stack(hi_reg)) { // Can go to stack? | |
849 lrg._next = _lo_stk_degree; | |
850 _lo_stk_degree = i; | |
851 } else { | |
852 lrg._next = _lo_degree; | |
853 _lo_degree = i; | |
854 } | |
855 } else { // Else high degree | |
856 lrgs(_hi_degree)._prev = i; | |
857 lrg._next = _hi_degree; | |
858 lrg._prev = 0; | |
859 _hi_degree = i; | |
860 } | |
861 } | |
862 } | |
863 | |
864 //------------------------------Pre-Simplify----------------------------------- | |
865 // Simplify the IFG by removing LRGs of low degree that have NO copies | |
866 void PhaseChaitin::Pre_Simplify( ) { | |
867 | |
868 // Warm up the lo-degree no-copy list | |
869 int lo_no_copy = 0; | |
870 for( uint i = 1; i < _maxlrg; i++ ) { | |
871 if( (lrgs(i).lo_degree() && !lrgs(i)._has_copy) || | |
872 !lrgs(i).alive() || | |
873 lrgs(i)._must_spill ) { | |
874 lrgs(i)._next = lo_no_copy; | |
875 lo_no_copy = i; | |
876 } | |
877 } | |
878 | |
879 while( lo_no_copy ) { | |
880 uint lo = lo_no_copy; | |
881 lo_no_copy = lrgs(lo)._next; | |
882 int size = lrgs(lo).num_regs(); | |
883 | |
884 // Put the simplified guy on the simplified list. | |
885 lrgs(lo)._next = _simplified; | |
886 _simplified = lo; | |
887 | |
888 // Yank this guy from the IFG. | |
889 IndexSet *adj = _ifg->remove_node( lo ); | |
890 | |
891 // If any neighbors' degrees fall below their number of | |
892 // allowed registers, then put that neighbor on the low degree | |
893 // list. Note that 'degree' can only fall and 'numregs' is | |
894 // unchanged by this action. Thus the two are equal at most once, | |
895 // so LRGs hit the lo-degree worklists at most once. | |
896 IndexSetIterator elements(adj); | |
897 uint neighbor; | |
898 while ((neighbor = elements.next()) != 0) { | |
899 LRG *n = &lrgs(neighbor); | |
900 assert( _ifg->effective_degree(neighbor) == n->degree(), "" ); | |
901 | |
902 // Check for just becoming of-low-degree | |
903 if( n->just_lo_degree() && !n->_has_copy ) { | |
904 assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice"); | |
905 // Put on lo-degree list | |
906 n->_next = lo_no_copy; | |
907 lo_no_copy = neighbor; | |
908 } | |
909 } | |
910 } // End of while lo-degree no_copy worklist not empty | |
911 | |
912 // No more lo-degree no-copy live ranges to simplify | |
913 } | |
914 | |
915 //------------------------------Simplify--------------------------------------- | |
916 // Simplify the IFG by removing LRGs of low degree. | |
917 void PhaseChaitin::Simplify( ) { | |
918 | |
919 while( 1 ) { // Repeat till simplified it all | |
920 // May want to explore simplifying lo_degree before _lo_stk_degree. | |
921 // This might result in more spills coloring into registers during | |
922 // Select(). | |
923 while( _lo_degree || _lo_stk_degree ) { | |
924 // If possible, pull from lo_stk first | |
925 uint lo; | |
926 if( _lo_degree ) { | |
927 lo = _lo_degree; | |
928 _lo_degree = lrgs(lo)._next; | |
929 } else { | |
930 lo = _lo_stk_degree; | |
931 _lo_stk_degree = lrgs(lo)._next; | |
932 } | |
933 | |
934 // Put the simplified guy on the simplified list. | |
935 lrgs(lo)._next = _simplified; | |
936 _simplified = lo; | |
937 // If this guy is "at risk" then mark his current neighbors | |
938 if( lrgs(lo)._at_risk ) { | |
939 IndexSetIterator elements(_ifg->neighbors(lo)); | |
940 uint datum; | |
941 while ((datum = elements.next()) != 0) { | |
942 lrgs(datum)._risk_bias = lo; | |
943 } | |
944 } | |
945 | |
946 // Yank this guy from the IFG. | |
947 IndexSet *adj = _ifg->remove_node( lo ); | |
948 | |
949 // If any neighbors' degrees fall below their number of | |
950 // allowed registers, then put that neighbor on the low degree | |
951 // list. Note that 'degree' can only fall and 'numregs' is | |
952 // unchanged by this action. Thus the two are equal at most once, | |
953 // so LRGs hit the lo-degree worklist at most once. | |
954 IndexSetIterator elements(adj); | |
955 uint neighbor; | |
956 while ((neighbor = elements.next()) != 0) { | |
957 LRG *n = &lrgs(neighbor); | |
958 #ifdef ASSERT | |
959 if( VerifyOpto ) { | |
960 assert( _ifg->effective_degree(neighbor) == n->degree(), "" ); | |
961 } | |
962 #endif | |
963 | |
964 // Check for just becoming of-low-degree just counting registers. | |
965 // _must_spill live ranges are already on the low degree list. | |
966 if( n->just_lo_degree() && !n->_must_spill ) { | |
967 assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice"); | |
968 // Pull from hi-degree list | |
969 uint prev = n->_prev; | |
970 uint next = n->_next; | |
971 if( prev ) lrgs(prev)._next = next; | |
972 else _hi_degree = next; | |
973 lrgs(next)._prev = prev; | |
974 n->_next = _lo_degree; | |
975 _lo_degree = neighbor; | |
976 } | |
977 } | |
978 } // End of while lo-degree/lo_stk_degree worklist not empty | |
979 | |
980 // Check for got everything: is hi-degree list empty? | |
981 if( !_hi_degree ) break; | |
982 | |
983 // Time to pick a potential spill guy | |
984 uint lo_score = _hi_degree; | |
985 double score = lrgs(lo_score).score(); | |
986 double area = lrgs(lo_score)._area; | |
987 | |
988 // Find cheapest guy | |
989 debug_only( int lo_no_simplify=0; ); | |
990 for( uint i = _hi_degree; i; i = lrgs(i)._next ) { | |
991 assert( !(*_ifg->_yanked)[i], "" ); | |
992 // It's just vaguely possible to move hi-degree to lo-degree without | |
993 // going through a just-lo-degree stage: If you remove a double from | |
994 // a float live range it's degree will drop by 2 and you can skip the | |
995 // just-lo-degree stage. It's very rare (shows up after 5000+ methods | |
996 // in -Xcomp of Java2Demo). So just choose this guy to simplify next. | |
997 if( lrgs(i).lo_degree() ) { | |
998 lo_score = i; | |
999 break; | |
1000 } | |
1001 debug_only( if( lrgs(i)._was_lo ) lo_no_simplify=i; ); | |
1002 double iscore = lrgs(i).score(); | |
1003 double iarea = lrgs(i)._area; | |
1004 | |
1005 // Compare cost/area of i vs cost/area of lo_score. Smaller cost/area | |
1006 // wins. Ties happen because all live ranges in question have spilled | |
1007 // a few times before and the spill-score adds a huge number which | |
1008 // washes out the low order bits. We are choosing the lesser of 2 | |
1009 // evils; in this case pick largest area to spill. | |
1010 if( iscore < score || | |
1011 (iscore == score && iarea > area && lrgs(lo_score)._was_spilled2) ) { | |
1012 lo_score = i; | |
1013 score = iscore; | |
1014 area = iarea; | |
1015 } | |
1016 } | |
1017 LRG *lo_lrg = &lrgs(lo_score); | |
1018 // The live range we choose for spilling is either hi-degree, or very | |
1019 // rarely it can be low-degree. If we choose a hi-degree live range | |
1020 // there better not be any lo-degree choices. | |
1021 assert( lo_lrg->lo_degree() || !lo_no_simplify, "Live range was lo-degree before coalesce; should simplify" ); | |
1022 | |
1023 // Pull from hi-degree list | |
1024 uint prev = lo_lrg->_prev; | |
1025 uint next = lo_lrg->_next; | |
1026 if( prev ) lrgs(prev)._next = next; | |
1027 else _hi_degree = next; | |
1028 lrgs(next)._prev = prev; | |
1029 // Jam him on the lo-degree list, despite his high degree. | |
1030 // Maybe he'll get a color, and maybe he'll spill. | |
1031 // Only Select() will know. | |
1032 lrgs(lo_score)._at_risk = true; | |
1033 _lo_degree = lo_score; | |
1034 lo_lrg->_next = 0; | |
1035 | |
1036 } // End of while not simplified everything | |
1037 | |
1038 } | |
1039 | |
1040 //------------------------------bias_color------------------------------------- | |
1041 // Choose a color using the biasing heuristic | |
1042 OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { | |
1043 | |
1044 // Check for "at_risk" LRG's | |
1045 uint risk_lrg = Find(lrg._risk_bias); | |
1046 if( risk_lrg != 0 ) { | |
1047 // Walk the colored neighbors of the "at_risk" candidate | |
1048 // Choose a color which is both legal and already taken by a neighbor | |
1049 // of the "at_risk" candidate in order to improve the chances of the | |
1050 // "at_risk" candidate of coloring | |
1051 IndexSetIterator elements(_ifg->neighbors(risk_lrg)); | |
1052 uint datum; | |
1053 while ((datum = elements.next()) != 0) { | |
1054 OptoReg::Name reg = lrgs(datum).reg(); | |
1055 // If this LRG's register is legal for us, choose it | |
1056 if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE && | |
1057 lrg.mask().Member(OptoReg::add(reg,-chunk)) && | |
1058 (lrg.num_regs()==1 || // either size 1 | |
1059 (reg&1) == 1) ) // or aligned (adjacent reg is available since we already cleared-to-pairs) | |
1060 return reg; | |
1061 } | |
1062 } | |
1063 | |
1064 uint copy_lrg = Find(lrg._copy_bias); | |
1065 if( copy_lrg != 0 ) { | |
1066 // If he has a color, | |
1067 if( !(*(_ifg->_yanked))[copy_lrg] ) { | |
1068 OptoReg::Name reg = lrgs(copy_lrg).reg(); | |
1069 // And it is legal for you, | |
1070 if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE && | |
1071 lrg.mask().Member(OptoReg::add(reg,-chunk)) && | |
1072 (lrg.num_regs()==1 || // either size 1 | |
1073 (reg&1) == 1) ) // or aligned (adjacent reg is available since we already cleared-to-pairs) | |
1074 return reg; | |
1075 } else if( chunk == 0 ) { | |
1076 // Choose a color which is legal for him | |
1077 RegMask tempmask = lrg.mask(); | |
1078 tempmask.AND(lrgs(copy_lrg).mask()); | |
1079 OptoReg::Name reg; | |
1080 if( lrg.num_regs() == 1 ) { | |
1081 reg = tempmask.find_first_elem(); | |
1082 } else { | |
1083 tempmask.ClearToPairs(); | |
1084 reg = tempmask.find_first_pair(); | |
1085 } | |
1086 if( OptoReg::is_valid(reg) ) | |
1087 return reg; | |
1088 } | |
1089 } | |
1090 | |
1091 // If no bias info exists, just go with the register selection ordering | |
1092 if( lrg.num_regs() == 2 ) { | |
1093 // Find an aligned pair | |
1094 return OptoReg::add(lrg.mask().find_first_pair(),chunk); | |
1095 } | |
1096 | |
1097 // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate | |
1098 // copy removal to remove many more copies, by preventing a just-assigned | |
1099 // register from being repeatedly assigned. | |
1100 OptoReg::Name reg = lrg.mask().find_first_elem(); | |
1101 if( (++_alternate & 1) && OptoReg::is_valid(reg) ) { | |
1102 // This 'Remove; find; Insert' idiom is an expensive way to find the | |
1103 // SECOND element in the mask. | |
1104 lrg.Remove(reg); | |
1105 OptoReg::Name reg2 = lrg.mask().find_first_elem(); | |
1106 lrg.Insert(reg); | |
1107 if( OptoReg::is_reg(reg2)) | |
1108 reg = reg2; | |
1109 } | |
1110 return OptoReg::add( reg, chunk ); | |
1111 } | |
1112 | |
1113 //------------------------------choose_color----------------------------------- | |
1114 // Choose a color in the current chunk | |
1115 OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) { | |
1116 assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)"); | |
1117 assert(C->out_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP+0)), "must not allocate stack0 (inside preserve area)"); | |
1118 | |
1119 if( lrg.num_regs() == 1 || // Common Case | |
1120 !lrg._fat_proj ) // Aligned+adjacent pairs ok | |
1121 // Use a heuristic to "bias" the color choice | |
1122 return bias_color(lrg, chunk); | |
1123 | |
1124 assert( lrg.num_regs() >= 2, "dead live ranges do not color" ); | |
1125 | |
1126 // Fat-proj case or misaligned double argument. | |
1127 assert(lrg.compute_mask_size() == lrg.num_regs() || | |
1128 lrg.num_regs() == 2,"fat projs exactly color" ); | |
1129 assert( !chunk, "always color in 1st chunk" ); | |
1130 // Return the highest element in the set. | |
1131 return lrg.mask().find_last_elem(); | |
1132 } | |
1133 | |
1134 //------------------------------Select----------------------------------------- | |
1135 // Select colors by re-inserting LRGs back into the IFG. LRGs are re-inserted | |
1136 // in reverse order of removal. As long as nothing of hi-degree was yanked, | |
1137 // everything going back is guaranteed a color. Select that color. If some | |
1138 // hi-degree LRG cannot get a color then we record that we must spill. | |
1139 uint PhaseChaitin::Select( ) { | |
1140 uint spill_reg = LRG::SPILL_REG; | |
1141 _max_reg = OptoReg::Name(0); // Past max register used | |
1142 while( _simplified ) { | |
1143 // Pull next LRG from the simplified list - in reverse order of removal | |
1144 uint lidx = _simplified; | |
1145 LRG *lrg = &lrgs(lidx); | |
1146 _simplified = lrg->_next; | |
1147 | |
1148 | |
1149 #ifndef PRODUCT | |
1150 if (trace_spilling()) { | |
1151 ttyLocker ttyl; | |
1152 tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(), | |
1153 lrg->degrees_of_freedom()); | |
1154 lrg->dump(); | |
1155 } | |
1156 #endif | |
1157 | |
1158 // Re-insert into the IFG | |
1159 _ifg->re_insert(lidx); | |
1160 if( !lrg->alive() ) continue; | |
1161 // capture allstackedness flag before mask is hacked | |
1162 const int is_allstack = lrg->mask().is_AllStack(); | |
1163 | |
1164 // Yeah, yeah, yeah, I know, I know. I can refactor this | |
1165 // to avoid the GOTO, although the refactored code will not | |
1166 // be much clearer. We arrive here IFF we have a stack-based | |
1167 // live range that cannot color in the current chunk, and it | |
1168 // has to move into the next free stack chunk. | |
1169 int chunk = 0; // Current chunk is first chunk | |
1170 retry_next_chunk: | |
1171 | |
1172 // Remove neighbor colors | |
1173 IndexSet *s = _ifg->neighbors(lidx); | |
1174 | |
1175 debug_only(RegMask orig_mask = lrg->mask();) | |
1176 IndexSetIterator elements(s); | |
1177 uint neighbor; | |
1178 while ((neighbor = elements.next()) != 0) { | |
1179 // Note that neighbor might be a spill_reg. In this case, exclusion | |
1180 // of its color will be a no-op, since the spill_reg chunk is in outer | |
1181 // space. Also, if neighbor is in a different chunk, this exclusion | |
1182 // will be a no-op. (Later on, if lrg runs out of possible colors in | |
1183 // its chunk, a new chunk of color may be tried, in which case | |
1184 // examination of neighbors is started again, at retry_next_chunk.) | |
1185 LRG &nlrg = lrgs(neighbor); | |
1186 OptoReg::Name nreg = nlrg.reg(); | |
1187 // Only subtract masks in the same chunk | |
1188 if( nreg >= chunk && nreg < chunk + RegMask::CHUNK_SIZE ) { | |
1189 #ifndef PRODUCT | |
1190 uint size = lrg->mask().Size(); | |
1191 RegMask rm = lrg->mask(); | |
1192 #endif | |
1193 lrg->SUBTRACT(nlrg.mask()); | |
1194 #ifndef PRODUCT | |
1195 if (trace_spilling() && lrg->mask().Size() != size) { | |
1196 ttyLocker ttyl; | |
1197 tty->print("L%d ", lidx); | |
1198 rm.dump(); | |
1199 tty->print(" intersected L%d ", neighbor); | |
1200 nlrg.mask().dump(); | |
1201 tty->print(" removed "); | |
1202 rm.SUBTRACT(lrg->mask()); | |
1203 rm.dump(); | |
1204 tty->print(" leaving "); | |
1205 lrg->mask().dump(); | |
1206 tty->cr(); | |
1207 } | |
1208 #endif | |
1209 } | |
1210 } | |
1211 //assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness"); | |
1212 // Aligned pairs need aligned masks | |
1213 if( lrg->num_regs() == 2 && !lrg->_fat_proj ) | |
1214 lrg->ClearToPairs(); | |
1215 | |
1216 // Check if a color is available and if so pick the color | |
1217 OptoReg::Name reg = choose_color( *lrg, chunk ); | |
1218 #ifdef SPARC | |
1219 debug_only(lrg->compute_set_mask_size()); | |
1220 assert(lrg->num_regs() != 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned"); | |
1221 #endif | |
1222 | |
1223 //--------------- | |
1224 // If we fail to color and the AllStack flag is set, trigger | |
1225 // a chunk-rollover event | |
1226 if(!OptoReg::is_valid(OptoReg::add(reg,-chunk)) && is_allstack) { | |
1227 // Bump register mask up to next stack chunk | |
1228 chunk += RegMask::CHUNK_SIZE; | |
1229 lrg->Set_All(); | |
1230 | |
1231 goto retry_next_chunk; | |
1232 } | |
1233 | |
1234 //--------------- | |
1235 // Did we get a color? | |
1236 else if( OptoReg::is_valid(reg)) { | |
1237 #ifndef PRODUCT | |
1238 RegMask avail_rm = lrg->mask(); | |
1239 #endif | |
1240 | |
1241 // Record selected register | |
1242 lrg->set_reg(reg); | |
1243 | |
1244 if( reg >= _max_reg ) // Compute max register limit | |
1245 _max_reg = OptoReg::add(reg,1); | |
1246 // Fold reg back into normal space | |
1247 reg = OptoReg::add(reg,-chunk); | |
1248 | |
1249 // If the live range is not bound, then we actually had some choices | |
1250 // to make. In this case, the mask has more bits in it than the colors | |
1251 // choosen. Restrict the mask to just what was picked. | |
1252 if( lrg->num_regs() == 1 ) { // Size 1 live range | |
1253 lrg->Clear(); // Clear the mask | |
1254 lrg->Insert(reg); // Set regmask to match selected reg | |
1255 lrg->set_mask_size(1); | |
1256 } else if( !lrg->_fat_proj ) { | |
1257 // For pairs, also insert the low bit of the pair | |
1258 assert( lrg->num_regs() == 2, "unbound fatproj???" ); | |
1259 lrg->Clear(); // Clear the mask | |
1260 lrg->Insert(reg); // Set regmask to match selected reg | |
1261 lrg->Insert(OptoReg::add(reg,-1)); | |
1262 lrg->set_mask_size(2); | |
1263 } else { // Else fatproj | |
1264 // mask must be equal to fatproj bits, by definition | |
1265 } | |
1266 #ifndef PRODUCT | |
1267 if (trace_spilling()) { | |
1268 ttyLocker ttyl; | |
1269 tty->print("L%d selected ", lidx); | |
1270 lrg->mask().dump(); | |
1271 tty->print(" from "); | |
1272 avail_rm.dump(); | |
1273 tty->cr(); | |
1274 } | |
1275 #endif | |
1276 // Note that reg is the highest-numbered register in the newly-bound mask. | |
1277 } // end color available case | |
1278 | |
1279 //--------------- | |
1280 // Live range is live and no colors available | |
1281 else { | |
1282 assert( lrg->alive(), "" ); | |
295
ea18057223c4
6732194: Data corruption dependent on -server/-client/-Xbatch
never
parents:
196
diff
changeset
|
1283 assert( !lrg->_fat_proj || lrg->is_multidef() || |
0 | 1284 lrg->_def->outcnt() > 0, "fat_proj cannot spill"); |
1285 assert( !orig_mask.is_AllStack(), "All Stack does not spill" ); | |
1286 | |
1287 // Assign the special spillreg register | |
1288 lrg->set_reg(OptoReg::Name(spill_reg++)); | |
1289 // Do not empty the regmask; leave mask_size lying around | |
1290 // for use during Spilling | |
1291 #ifndef PRODUCT | |
1292 if( trace_spilling() ) { | |
1293 ttyLocker ttyl; | |
1294 tty->print("L%d spilling with neighbors: ", lidx); | |
1295 s->dump(); | |
1296 debug_only(tty->print(" original mask: ")); | |
1297 debug_only(orig_mask.dump()); | |
1298 dump_lrg(lidx); | |
1299 } | |
1300 #endif | |
1301 } // end spill case | |
1302 | |
1303 } | |
1304 | |
1305 return spill_reg-LRG::SPILL_REG; // Return number of spills | |
1306 } | |
1307 | |
1308 | |
1309 //------------------------------copy_was_spilled------------------------------- | |
1310 // Copy 'was_spilled'-edness from the source Node to the dst Node. | |
1311 void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) { | |
1312 if( _spilled_once.test(src->_idx) ) { | |
1313 _spilled_once.set(dst->_idx); | |
1314 lrgs(Find(dst))._was_spilled1 = 1; | |
1315 if( _spilled_twice.test(src->_idx) ) { | |
1316 _spilled_twice.set(dst->_idx); | |
1317 lrgs(Find(dst))._was_spilled2 = 1; | |
1318 } | |
1319 } | |
1320 } | |
1321 | |
1322 //------------------------------set_was_spilled-------------------------------- | |
1323 // Set the 'spilled_once' or 'spilled_twice' flag on a node. | |
1324 void PhaseChaitin::set_was_spilled( Node *n ) { | |
1325 if( _spilled_once.test_set(n->_idx) ) | |
1326 _spilled_twice.set(n->_idx); | |
1327 } | |
1328 | |
1329 //------------------------------fixup_spills----------------------------------- | |
1330 // Convert Ideal spill instructions into proper FramePtr + offset Loads and | |
1331 // Stores. Use-def chains are NOT preserved, but Node->LRG->reg maps are. | |
1332 void PhaseChaitin::fixup_spills() { | |
1333 // This function does only cisc spill work. | |
1334 if( !UseCISCSpill ) return; | |
1335 | |
1336 NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); ) | |
1337 | |
1338 // Grab the Frame Pointer | |
1339 Node *fp = _cfg._broot->head()->in(1)->in(TypeFunc::FramePtr); | |
1340 | |
1341 // For all blocks | |
1342 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
1343 Block *b = _cfg._blocks[i]; | |
1344 | |
1345 // For all instructions in block | |
1346 uint last_inst = b->end_idx(); | |
1347 for( uint j = 1; j <= last_inst; j++ ) { | |
1348 Node *n = b->_nodes[j]; | |
1349 | |
1350 // Dead instruction??? | |
1351 assert( n->outcnt() != 0 ||// Nothing dead after post alloc | |
1352 C->top() == n || // Or the random TOP node | |
1353 n->is_Proj(), // Or a fat-proj kill node | |
1354 "No dead instructions after post-alloc" ); | |
1355 | |
1356 int inp = n->cisc_operand(); | |
1357 if( inp != AdlcVMDeps::Not_cisc_spillable ) { | |
1358 // Convert operand number to edge index number | |
1359 MachNode *mach = n->as_Mach(); | |
1360 inp = mach->operand_index(inp); | |
1361 Node *src = n->in(inp); // Value to load or store | |
1362 LRG &lrg_cisc = lrgs( Find_const(src) ); | |
1363 OptoReg::Name src_reg = lrg_cisc.reg(); | |
1364 // Doubles record the HIGH register of an adjacent pair. | |
1365 src_reg = OptoReg::add(src_reg,1-lrg_cisc.num_regs()); | |
1366 if( OptoReg::is_stack(src_reg) ) { // If input is on stack | |
1367 // This is a CISC Spill, get stack offset and construct new node | |
1368 #ifndef PRODUCT | |
1369 if( TraceCISCSpill ) { | |
1370 tty->print(" reg-instr: "); | |
1371 n->dump(); | |
1372 } | |
1373 #endif | |
1374 int stk_offset = reg2offset(src_reg); | |
1375 // Bailout if we might exceed node limit when spilling this instruction | |
1376 C->check_node_count(0, "out of nodes fixing spills"); | |
1377 if (C->failing()) return; | |
1378 // Transform node | |
1379 MachNode *cisc = mach->cisc_version(stk_offset, C)->as_Mach(); | |
1380 cisc->set_req(inp,fp); // Base register is frame pointer | |
1381 if( cisc->oper_input_base() > 1 && mach->oper_input_base() <= 1 ) { | |
1382 assert( cisc->oper_input_base() == 2, "Only adding one edge"); | |
1383 cisc->ins_req(1,src); // Requires a memory edge | |
1384 } | |
1385 b->_nodes.map(j,cisc); // Insert into basic block | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
113
diff
changeset
|
1386 n->subsume_by(cisc); // Correct graph |
0 | 1387 // |
1388 ++_used_cisc_instructions; | |
1389 #ifndef PRODUCT | |
1390 if( TraceCISCSpill ) { | |
1391 tty->print(" cisc-instr: "); | |
1392 cisc->dump(); | |
1393 } | |
1394 #endif | |
1395 } else { | |
1396 #ifndef PRODUCT | |
1397 if( TraceCISCSpill ) { | |
1398 tty->print(" using reg-instr: "); | |
1399 n->dump(); | |
1400 } | |
1401 #endif | |
1402 ++_unused_cisc_instructions; // input can be on stack | |
1403 } | |
1404 } | |
1405 | |
1406 } // End of for all instructions | |
1407 | |
1408 } // End of for all blocks | |
1409 } | |
1410 | |
1411 //------------------------------find_base_for_derived-------------------------- | |
1412 // Helper to stretch above; recursively discover the base Node for a | |
1413 // given derived Node. Easy for AddP-related machine nodes, but needs | |
1414 // to be recursive for derived Phis. | |
1415 Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derived, uint &maxlrg ) { | |
1416 // See if already computed; if so return it | |
1417 if( derived_base_map[derived->_idx] ) | |
1418 return derived_base_map[derived->_idx]; | |
1419 | |
1420 // See if this happens to be a base. | |
1421 // NOTE: we use TypePtr instead of TypeOopPtr because we can have | |
1422 // pointers derived from NULL! These are always along paths that | |
1423 // can't happen at run-time but the optimizer cannot deduce it so | |
1424 // we have to handle it gracefully. | |
1425 const TypePtr *tj = derived->bottom_type()->isa_ptr(); | |
1426 // If its an OOP with a non-zero offset, then it is derived. | |
1427 if( tj->_offset == 0 ) { | |
1428 derived_base_map[derived->_idx] = derived; | |
1429 return derived; | |
1430 } | |
1431 // Derived is NULL+offset? Base is NULL! | |
1432 if( derived->is_Con() ) { | |
1433 Node *base = new (C, 1) ConPNode( TypePtr::NULL_PTR ); | |
1434 uint no_lidx = 0; // an unmatched constant in debug info has no LRG | |
1435 _names.extend(base->_idx, no_lidx); | |
1436 derived_base_map[derived->_idx] = base; | |
1437 return base; | |
1438 } | |
1439 | |
1440 // Check for AddP-related opcodes | |
1441 if( !derived->is_Phi() ) { | |
1442 assert( derived->as_Mach()->ideal_Opcode() == Op_AddP, "" ); | |
1443 Node *base = derived->in(AddPNode::Base); | |
1444 derived_base_map[derived->_idx] = base; | |
1445 return base; | |
1446 } | |
1447 | |
1448 // Recursively find bases for Phis. | |
1449 // First check to see if we can avoid a base Phi here. | |
1450 Node *base = find_base_for_derived( derived_base_map, derived->in(1),maxlrg); | |
1451 uint i; | |
1452 for( i = 2; i < derived->req(); i++ ) | |
1453 if( base != find_base_for_derived( derived_base_map,derived->in(i),maxlrg)) | |
1454 break; | |
1455 // Went to the end without finding any different bases? | |
1456 if( i == derived->req() ) { // No need for a base Phi here | |
1457 derived_base_map[derived->_idx] = base; | |
1458 return base; | |
1459 } | |
1460 | |
1461 // Now we see we need a base-Phi here to merge the bases | |
1462 base = new (C, derived->req()) PhiNode( derived->in(0), base->bottom_type() ); | |
1463 for( i = 1; i < derived->req(); i++ ) | |
1464 base->init_req(i, find_base_for_derived(derived_base_map, derived->in(i), maxlrg)); | |
1465 | |
1466 // Search the current block for an existing base-Phi | |
1467 Block *b = _cfg._bbs[derived->_idx]; | |
1468 for( i = 1; i <= b->end_idx(); i++ ) {// Search for matching Phi | |
1469 Node *phi = b->_nodes[i]; | |
1470 if( !phi->is_Phi() ) { // Found end of Phis with no match? | |
1471 b->_nodes.insert( i, base ); // Must insert created Phi here as base | |
1472 _cfg._bbs.map( base->_idx, b ); | |
1473 new_lrg(base,maxlrg++); | |
1474 break; | |
1475 } | |
1476 // See if Phi matches. | |
1477 uint j; | |
1478 for( j = 1; j < base->req(); j++ ) | |
1479 if( phi->in(j) != base->in(j) && | |
1480 !(phi->in(j)->is_Con() && base->in(j)->is_Con()) ) // allow different NULLs | |
1481 break; | |
1482 if( j == base->req() ) { // All inputs match? | |
1483 base = phi; // Then use existing 'phi' and drop 'base' | |
1484 break; | |
1485 } | |
1486 } | |
1487 | |
1488 | |
1489 // Cache info for later passes | |
1490 derived_base_map[derived->_idx] = base; | |
1491 return base; | |
1492 } | |
1493 | |
1494 | |
1495 //------------------------------stretch_base_pointer_live_ranges--------------- | |
1496 // At each Safepoint, insert extra debug edges for each pair of derived value/ | |
1497 // base pointer that is live across the Safepoint for oopmap building. The | |
1498 // edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the | |
1499 // required edge set. | |
1500 bool PhaseChaitin::stretch_base_pointer_live_ranges( ResourceArea *a ) { | |
1501 int must_recompute_live = false; | |
1502 uint maxlrg = _maxlrg; | |
1503 Node **derived_base_map = (Node**)a->Amalloc(sizeof(Node*)*C->unique()); | |
1504 memset( derived_base_map, 0, sizeof(Node*)*C->unique() ); | |
1505 | |
1506 // For all blocks in RPO do... | |
1507 for( uint i=0; i<_cfg._num_blocks; i++ ) { | |
1508 Block *b = _cfg._blocks[i]; | |
1509 // Note use of deep-copy constructor. I cannot hammer the original | |
1510 // liveout bits, because they are needed by the following coalesce pass. | |
1511 IndexSet liveout(_live->live(b)); | |
1512 | |
1513 for( uint j = b->end_idx() + 1; j > 1; j-- ) { | |
1514 Node *n = b->_nodes[j-1]; | |
1515 | |
1516 // Pre-split compares of loop-phis. Loop-phis form a cycle we would | |
1517 // like to see in the same register. Compare uses the loop-phi and so | |
1518 // extends its live range BUT cannot be part of the cycle. If this | |
1519 // extended live range overlaps with the update of the loop-phi value | |
1520 // we need both alive at the same time -- which requires at least 1 | |
1521 // copy. But because Intel has only 2-address registers we end up with | |
1522 // at least 2 copies, one before the loop-phi update instruction and | |
1523 // one after. Instead we split the input to the compare just after the | |
1524 // phi. | |
1525 if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CmpI ) { | |
1526 Node *phi = n->in(1); | |
1527 if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) { | |
1528 Block *phi_block = _cfg._bbs[phi->_idx]; | |
1529 if( _cfg._bbs[phi_block->pred(2)->_idx] == b ) { | |
1530 const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI]; | |
1531 Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask ); | |
1532 insert_proj( phi_block, 1, spill, maxlrg++ ); | |
1533 n->set_req(1,spill); | |
1534 must_recompute_live = true; | |
1535 } | |
1536 } | |
1537 } | |
1538 | |
1539 // Get value being defined | |
1540 uint lidx = n2lidx(n); | |
1541 if( lidx && lidx < _maxlrg /* Ignore the occasional brand-new live range */) { | |
1542 // Remove from live-out set | |
1543 liveout.remove(lidx); | |
1544 | |
1545 // Copies do not define a new value and so do not interfere. | |
1546 // Remove the copies source from the liveout set before interfering. | |
1547 uint idx = n->is_Copy(); | |
1548 if( idx ) liveout.remove( n2lidx(n->in(idx)) ); | |
1549 } | |
1550 | |
1551 // Found a safepoint? | |
1552 JVMState *jvms = n->jvms(); | |
1553 if( jvms ) { | |
1554 // Now scan for a live derived pointer | |
1555 IndexSetIterator elements(&liveout); | |
1556 uint neighbor; | |
1557 while ((neighbor = elements.next()) != 0) { | |
1558 // Find reaching DEF for base and derived values | |
1559 // This works because we are still in SSA during this call. | |
1560 Node *derived = lrgs(neighbor)._def; | |
1561 const TypePtr *tj = derived->bottom_type()->isa_ptr(); | |
1562 // If its an OOP with a non-zero offset, then it is derived. | |
1563 if( tj && tj->_offset != 0 && tj->isa_oop_ptr() ) { | |
1564 Node *base = find_base_for_derived( derived_base_map, derived, maxlrg ); | |
1565 assert( base->_idx < _names.Size(), "" ); | |
1566 // Add reaching DEFs of derived pointer and base pointer as a | |
1567 // pair of inputs | |
1568 n->add_req( derived ); | |
1569 n->add_req( base ); | |
1570 | |
1571 // See if the base pointer is already live to this point. | |
1572 // Since I'm working on the SSA form, live-ness amounts to | |
1573 // reaching def's. So if I find the base's live range then | |
1574 // I know the base's def reaches here. | |
1575 if( (n2lidx(base) >= _maxlrg ||// (Brand new base (hence not live) or | |
1576 !liveout.member( n2lidx(base) ) ) && // not live) AND | |
1577 (n2lidx(base) > 0) && // not a constant | |
1578 _cfg._bbs[base->_idx] != b ) { // base not def'd in blk) | |
1579 // Base pointer is not currently live. Since I stretched | |
1580 // the base pointer to here and it crosses basic-block | |
1581 // boundaries, the global live info is now incorrect. | |
1582 // Recompute live. | |
1583 must_recompute_live = true; | |
1584 } // End of if base pointer is not live to debug info | |
1585 } | |
1586 } // End of scan all live data for derived ptrs crossing GC point | |
1587 } // End of if found a GC point | |
1588 | |
1589 // Make all inputs live | |
1590 if( !n->is_Phi() ) { // Phi function uses come from prior block | |
1591 for( uint k = 1; k < n->req(); k++ ) { | |
1592 uint lidx = n2lidx(n->in(k)); | |
1593 if( lidx < _maxlrg ) | |
1594 liveout.insert( lidx ); | |
1595 } | |
1596 } | |
1597 | |
1598 } // End of forall instructions in block | |
1599 liveout.clear(); // Free the memory used by liveout. | |
1600 | |
1601 } // End of forall blocks | |
1602 _maxlrg = maxlrg; | |
1603 | |
1604 // If I created a new live range I need to recompute live | |
1605 if( maxlrg != _ifg->_maxlrg ) | |
1606 must_recompute_live = true; | |
1607 | |
1608 return must_recompute_live != 0; | |
1609 } | |
1610 | |
1611 | |
1612 //------------------------------add_reference---------------------------------- | |
1613 // Extend the node to LRG mapping | |
1614 void PhaseChaitin::add_reference( const Node *node, const Node *old_node ) { | |
1615 _names.extend( node->_idx, n2lidx(old_node) ); | |
1616 } | |
1617 | |
1618 //------------------------------dump------------------------------------------- | |
1619 #ifndef PRODUCT | |
1620 void PhaseChaitin::dump( const Node *n ) const { | |
1621 uint r = (n->_idx < _names.Size() ) ? Find_const(n) : 0; | |
1622 tty->print("L%d",r); | |
1623 if( r && n->Opcode() != Op_Phi ) { | |
1624 if( _node_regs ) { // Got a post-allocation copy of allocation? | |
1625 tty->print("["); | |
1626 OptoReg::Name second = get_reg_second(n); | |
1627 if( OptoReg::is_valid(second) ) { | |
1628 if( OptoReg::is_reg(second) ) | |
1629 tty->print("%s:",Matcher::regName[second]); | |
1630 else | |
1631 tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(second)); | |
1632 } | |
1633 OptoReg::Name first = get_reg_first(n); | |
1634 if( OptoReg::is_reg(first) ) | |
1635 tty->print("%s]",Matcher::regName[first]); | |
1636 else | |
1637 tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(first)); | |
1638 } else | |
1639 n->out_RegMask().dump(); | |
1640 } | |
1641 tty->print("/N%d\t",n->_idx); | |
1642 tty->print("%s === ", n->Name()); | |
1643 uint k; | |
1644 for( k = 0; k < n->req(); k++) { | |
1645 Node *m = n->in(k); | |
1646 if( !m ) tty->print("_ "); | |
1647 else { | |
1648 uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0; | |
1649 tty->print("L%d",r); | |
1650 // Data MultiNode's can have projections with no real registers. | |
1651 // Don't die while dumping them. | |
1652 int op = n->Opcode(); | |
1653 if( r && op != Op_Phi && op != Op_Proj && op != Op_SCMemProj) { | |
1654 if( _node_regs ) { | |
1655 tty->print("["); | |
1656 OptoReg::Name second = get_reg_second(n->in(k)); | |
1657 if( OptoReg::is_valid(second) ) { | |
1658 if( OptoReg::is_reg(second) ) | |
1659 tty->print("%s:",Matcher::regName[second]); | |
1660 else | |
1661 tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer), | |
1662 reg2offset_unchecked(second)); | |
1663 } | |
1664 OptoReg::Name first = get_reg_first(n->in(k)); | |
1665 if( OptoReg::is_reg(first) ) | |
1666 tty->print("%s]",Matcher::regName[first]); | |
1667 else | |
1668 tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer), | |
1669 reg2offset_unchecked(first)); | |
1670 } else | |
1671 n->in_RegMask(k).dump(); | |
1672 } | |
1673 tty->print("/N%d ",m->_idx); | |
1674 } | |
1675 } | |
1676 if( k < n->len() && n->in(k) ) tty->print("| "); | |
1677 for( ; k < n->len(); k++ ) { | |
1678 Node *m = n->in(k); | |
1679 if( !m ) break; | |
1680 uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0; | |
1681 tty->print("L%d",r); | |
1682 tty->print("/N%d ",m->_idx); | |
1683 } | |
1684 if( n->is_Mach() ) n->as_Mach()->dump_spec(tty); | |
1685 else n->dump_spec(tty); | |
1686 if( _spilled_once.test(n->_idx ) ) { | |
1687 tty->print(" Spill_1"); | |
1688 if( _spilled_twice.test(n->_idx ) ) | |
1689 tty->print(" Spill_2"); | |
1690 } | |
1691 tty->print("\n"); | |
1692 } | |
1693 | |
1694 void PhaseChaitin::dump( const Block * b ) const { | |
1695 b->dump_head( &_cfg._bbs ); | |
1696 | |
1697 // For all instructions | |
1698 for( uint j = 0; j < b->_nodes.size(); j++ ) | |
1699 dump(b->_nodes[j]); | |
1700 // Print live-out info at end of block | |
1701 if( _live ) { | |
1702 tty->print("Liveout: "); | |
1703 IndexSet *live = _live->live(b); | |
1704 IndexSetIterator elements(live); | |
1705 tty->print("{"); | |
1706 uint i; | |
1707 while ((i = elements.next()) != 0) { | |
1708 tty->print("L%d ", Find_const(i)); | |
1709 } | |
1710 tty->print_cr("}"); | |
1711 } | |
1712 tty->print("\n"); | |
1713 } | |
1714 | |
1715 void PhaseChaitin::dump() const { | |
1716 tty->print( "--- Chaitin -- argsize: %d framesize: %d ---\n", | |
1717 _matcher._new_SP, _framesize ); | |
1718 | |
1719 // For all blocks | |
1720 for( uint i = 0; i < _cfg._num_blocks; i++ ) | |
1721 dump(_cfg._blocks[i]); | |
1722 // End of per-block dump | |
1723 tty->print("\n"); | |
1724 | |
1725 if (!_ifg) { | |
1726 tty->print("(No IFG.)\n"); | |
1727 return; | |
1728 } | |
1729 | |
1730 // Dump LRG array | |
1731 tty->print("--- Live RanGe Array ---\n"); | |
1732 for(uint i2 = 1; i2 < _maxlrg; i2++ ) { | |
1733 tty->print("L%d: ",i2); | |
1734 if( i2 < _ifg->_maxlrg ) lrgs(i2).dump( ); | |
1735 else tty->print("new LRG"); | |
1736 } | |
1737 tty->print_cr(""); | |
1738 | |
1739 // Dump lo-degree list | |
1740 tty->print("Lo degree: "); | |
1741 for(uint i3 = _lo_degree; i3; i3 = lrgs(i3)._next ) | |
1742 tty->print("L%d ",i3); | |
1743 tty->print_cr(""); | |
1744 | |
1745 // Dump lo-stk-degree list | |
1746 tty->print("Lo stk degree: "); | |
1747 for(uint i4 = _lo_stk_degree; i4; i4 = lrgs(i4)._next ) | |
1748 tty->print("L%d ",i4); | |
1749 tty->print_cr(""); | |
1750 | |
1751 // Dump lo-degree list | |
1752 tty->print("Hi degree: "); | |
1753 for(uint i5 = _hi_degree; i5; i5 = lrgs(i5)._next ) | |
1754 tty->print("L%d ",i5); | |
1755 tty->print_cr(""); | |
1756 } | |
1757 | |
1758 //------------------------------dump_degree_lists------------------------------ | |
1759 void PhaseChaitin::dump_degree_lists() const { | |
1760 // Dump lo-degree list | |
1761 tty->print("Lo degree: "); | |
1762 for( uint i = _lo_degree; i; i = lrgs(i)._next ) | |
1763 tty->print("L%d ",i); | |
1764 tty->print_cr(""); | |
1765 | |
1766 // Dump lo-stk-degree list | |
1767 tty->print("Lo stk degree: "); | |
1768 for(uint i2 = _lo_stk_degree; i2; i2 = lrgs(i2)._next ) | |
1769 tty->print("L%d ",i2); | |
1770 tty->print_cr(""); | |
1771 | |
1772 // Dump lo-degree list | |
1773 tty->print("Hi degree: "); | |
1774 for(uint i3 = _hi_degree; i3; i3 = lrgs(i3)._next ) | |
1775 tty->print("L%d ",i3); | |
1776 tty->print_cr(""); | |
1777 } | |
1778 | |
1779 //------------------------------dump_simplified-------------------------------- | |
1780 void PhaseChaitin::dump_simplified() const { | |
1781 tty->print("Simplified: "); | |
1782 for( uint i = _simplified; i; i = lrgs(i)._next ) | |
1783 tty->print("L%d ",i); | |
1784 tty->print_cr(""); | |
1785 } | |
1786 | |
1787 static char *print_reg( OptoReg::Name reg, const PhaseChaitin *pc, char *buf ) { | |
1788 if ((int)reg < 0) | |
1789 sprintf(buf, "<OptoReg::%d>", (int)reg); | |
1790 else if (OptoReg::is_reg(reg)) | |
1791 strcpy(buf, Matcher::regName[reg]); | |
1792 else | |
1793 sprintf(buf,"%s + #%d",OptoReg::regname(OptoReg::c_frame_pointer), | |
1794 pc->reg2offset(reg)); | |
1795 return buf+strlen(buf); | |
1796 } | |
1797 | |
1798 //------------------------------dump_register---------------------------------- | |
1799 // Dump a register name into a buffer. Be intelligent if we get called | |
1800 // before allocation is complete. | |
1801 char *PhaseChaitin::dump_register( const Node *n, char *buf ) const { | |
1802 if( !this ) { // Not got anything? | |
1803 sprintf(buf,"N%d",n->_idx); // Then use Node index | |
1804 } else if( _node_regs ) { | |
1805 // Post allocation, use direct mappings, no LRG info available | |
1806 print_reg( get_reg_first(n), this, buf ); | |
1807 } else { | |
1808 uint lidx = Find_const(n); // Grab LRG number | |
1809 if( !_ifg ) { | |
1810 sprintf(buf,"L%d",lidx); // No register binding yet | |
1811 } else if( !lidx ) { // Special, not allocated value | |
1812 strcpy(buf,"Special"); | |
1813 } else if( (lrgs(lidx).num_regs() == 1) | |
1814 ? !lrgs(lidx).mask().is_bound1() | |
1815 : !lrgs(lidx).mask().is_bound2() ) { | |
1816 sprintf(buf,"L%d",lidx); // No register binding yet | |
1817 } else { // Hah! We have a bound machine register | |
1818 print_reg( lrgs(lidx).reg(), this, buf ); | |
1819 } | |
1820 } | |
1821 return buf+strlen(buf); | |
1822 } | |
1823 | |
1824 //----------------------dump_for_spill_split_recycle-------------------------- | |
1825 void PhaseChaitin::dump_for_spill_split_recycle() const { | |
1826 if( WizardMode && (PrintCompilation || PrintOpto) ) { | |
1827 // Display which live ranges need to be split and the allocator's state | |
1828 tty->print_cr("Graph-Coloring Iteration %d will split the following live ranges", _trip_cnt); | |
1829 for( uint bidx = 1; bidx < _maxlrg; bidx++ ) { | |
1830 if( lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG ) { | |
1831 tty->print("L%d: ", bidx); | |
1832 lrgs(bidx).dump(); | |
1833 } | |
1834 } | |
1835 tty->cr(); | |
1836 dump(); | |
1837 } | |
1838 } | |
1839 | |
1840 //------------------------------dump_frame------------------------------------ | |
1841 void PhaseChaitin::dump_frame() const { | |
1842 const char *fp = OptoReg::regname(OptoReg::c_frame_pointer); | |
1843 const TypeTuple *domain = C->tf()->domain(); | |
1844 const int argcnt = domain->cnt() - TypeFunc::Parms; | |
1845 | |
1846 // Incoming arguments in registers dump | |
1847 for( int k = 0; k < argcnt; k++ ) { | |
1848 OptoReg::Name parmreg = _matcher._parm_regs[k].first(); | |
1849 if( OptoReg::is_reg(parmreg)) { | |
1850 const char *reg_name = OptoReg::regname(parmreg); | |
1851 tty->print("#r%3.3d %s", parmreg, reg_name); | |
1852 parmreg = _matcher._parm_regs[k].second(); | |
1853 if( OptoReg::is_reg(parmreg)) { | |
1854 tty->print(":%s", OptoReg::regname(parmreg)); | |
1855 } | |
1856 tty->print(" : parm %d: ", k); | |
1857 domain->field_at(k + TypeFunc::Parms)->dump(); | |
1858 tty->print_cr(""); | |
1859 } | |
1860 } | |
1861 | |
1862 // Check for un-owned padding above incoming args | |
1863 OptoReg::Name reg = _matcher._new_SP; | |
1864 if( reg > _matcher._in_arg_limit ) { | |
1865 reg = OptoReg::add(reg, -1); | |
1866 tty->print_cr("#r%3.3d %s+%2d: pad0, owned by CALLER", reg, fp, reg2offset_unchecked(reg)); | |
1867 } | |
1868 | |
1869 // Incoming argument area dump | |
1870 OptoReg::Name begin_in_arg = OptoReg::add(_matcher._old_SP,C->out_preserve_stack_slots()); | |
1871 while( reg > begin_in_arg ) { | |
1872 reg = OptoReg::add(reg, -1); | |
1873 tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg)); | |
1874 int j; | |
1875 for( j = 0; j < argcnt; j++) { | |
1876 if( _matcher._parm_regs[j].first() == reg || | |
1877 _matcher._parm_regs[j].second() == reg ) { | |
1878 tty->print("parm %d: ",j); | |
1879 domain->field_at(j + TypeFunc::Parms)->dump(); | |
1880 tty->print_cr(""); | |
1881 break; | |
1882 } | |
1883 } | |
1884 if( j >= argcnt ) | |
1885 tty->print_cr("HOLE, owned by SELF"); | |
1886 } | |
1887 | |
1888 // Old outgoing preserve area | |
1889 while( reg > _matcher._old_SP ) { | |
1890 reg = OptoReg::add(reg, -1); | |
1891 tty->print_cr("#r%3.3d %s+%2d: old out preserve",reg,fp,reg2offset_unchecked(reg)); | |
1892 } | |
1893 | |
1894 // Old SP | |
1895 tty->print_cr("# -- Old %s -- Framesize: %d --",fp, | |
1896 reg2offset_unchecked(OptoReg::add(_matcher._old_SP,-1)) - reg2offset_unchecked(_matcher._new_SP)+jintSize); | |
1897 | |
1898 // Preserve area dump | |
1899 reg = OptoReg::add(reg, -1); | |
1900 while( OptoReg::is_stack(reg)) { | |
1901 tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg)); | |
1902 if( _matcher.return_addr() == reg ) | |
1903 tty->print_cr("return address"); | |
1904 else if( _matcher.return_addr() == OptoReg::add(reg,1) && | |
1905 VerifyStackAtCalls ) | |
1906 tty->print_cr("0xBADB100D +VerifyStackAtCalls"); | |
1907 else if ((int)OptoReg::reg2stack(reg) < C->fixed_slots()) | |
1908 tty->print_cr("Fixed slot %d", OptoReg::reg2stack(reg)); | |
1909 else | |
1910 tty->print_cr("pad2, in_preserve"); | |
1911 reg = OptoReg::add(reg, -1); | |
1912 } | |
1913 | |
1914 // Spill area dump | |
1915 reg = OptoReg::add(_matcher._new_SP, _framesize ); | |
1916 while( reg > _matcher._out_arg_limit ) { | |
1917 reg = OptoReg::add(reg, -1); | |
1918 tty->print_cr("#r%3.3d %s+%2d: spill",reg,fp,reg2offset_unchecked(reg)); | |
1919 } | |
1920 | |
1921 // Outgoing argument area dump | |
1922 while( reg > OptoReg::add(_matcher._new_SP, C->out_preserve_stack_slots()) ) { | |
1923 reg = OptoReg::add(reg, -1); | |
1924 tty->print_cr("#r%3.3d %s+%2d: outgoing argument",reg,fp,reg2offset_unchecked(reg)); | |
1925 } | |
1926 | |
1927 // Outgoing new preserve area | |
1928 while( reg > _matcher._new_SP ) { | |
1929 reg = OptoReg::add(reg, -1); | |
1930 tty->print_cr("#r%3.3d %s+%2d: new out preserve",reg,fp,reg2offset_unchecked(reg)); | |
1931 } | |
1932 tty->print_cr("#"); | |
1933 } | |
1934 | |
1935 //------------------------------dump_bb---------------------------------------- | |
1936 void PhaseChaitin::dump_bb( uint pre_order ) const { | |
1937 tty->print_cr("---dump of B%d---",pre_order); | |
1938 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
1939 Block *b = _cfg._blocks[i]; | |
1940 if( b->_pre_order == pre_order ) | |
1941 dump(b); | |
1942 } | |
1943 } | |
1944 | |
1945 //------------------------------dump_lrg--------------------------------------- | |
1946 void PhaseChaitin::dump_lrg( uint lidx ) const { | |
1947 tty->print_cr("---dump of L%d---",lidx); | |
1948 | |
1949 if( _ifg ) { | |
1950 if( lidx >= _maxlrg ) { | |
1951 tty->print("Attempt to print live range index beyond max live range.\n"); | |
1952 return; | |
1953 } | |
1954 tty->print("L%d: ",lidx); | |
1955 lrgs(lidx).dump( ); | |
1956 } | |
1957 if( _ifg ) { tty->print("Neighbors: %d - ", _ifg->neighbor_cnt(lidx)); | |
1958 _ifg->neighbors(lidx)->dump(); | |
1959 tty->cr(); | |
1960 } | |
1961 // For all blocks | |
1962 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
1963 Block *b = _cfg._blocks[i]; | |
1964 int dump_once = 0; | |
1965 | |
1966 // For all instructions | |
1967 for( uint j = 0; j < b->_nodes.size(); j++ ) { | |
1968 Node *n = b->_nodes[j]; | |
1969 if( Find_const(n) == lidx ) { | |
1970 if( !dump_once++ ) { | |
1971 tty->cr(); | |
1972 b->dump_head( &_cfg._bbs ); | |
1973 } | |
1974 dump(n); | |
1975 continue; | |
1976 } | |
1977 uint cnt = n->req(); | |
1978 for( uint k = 1; k < cnt; k++ ) { | |
1979 Node *m = n->in(k); | |
1980 if (!m) continue; // be robust in the dumper | |
1981 if( Find_const(m) == lidx ) { | |
1982 if( !dump_once++ ) { | |
1983 tty->cr(); | |
1984 b->dump_head( &_cfg._bbs ); | |
1985 } | |
1986 dump(n); | |
1987 } | |
1988 } | |
1989 } | |
1990 } // End of per-block dump | |
1991 tty->cr(); | |
1992 } | |
1993 #endif // not PRODUCT | |
1994 | |
1995 //------------------------------print_chaitin_statistics------------------------------- | |
1996 int PhaseChaitin::_final_loads = 0; | |
1997 int PhaseChaitin::_final_stores = 0; | |
1998 int PhaseChaitin::_final_memoves= 0; | |
1999 int PhaseChaitin::_final_copies = 0; | |
2000 double PhaseChaitin::_final_load_cost = 0; | |
2001 double PhaseChaitin::_final_store_cost = 0; | |
2002 double PhaseChaitin::_final_memove_cost= 0; | |
2003 double PhaseChaitin::_final_copy_cost = 0; | |
2004 int PhaseChaitin::_conserv_coalesce = 0; | |
2005 int PhaseChaitin::_conserv_coalesce_pair = 0; | |
2006 int PhaseChaitin::_conserv_coalesce_trie = 0; | |
2007 int PhaseChaitin::_conserv_coalesce_quad = 0; | |
2008 int PhaseChaitin::_post_alloc = 0; | |
2009 int PhaseChaitin::_lost_opp_pp_coalesce = 0; | |
2010 int PhaseChaitin::_lost_opp_cflow_coalesce = 0; | |
2011 int PhaseChaitin::_used_cisc_instructions = 0; | |
2012 int PhaseChaitin::_unused_cisc_instructions = 0; | |
2013 int PhaseChaitin::_allocator_attempts = 0; | |
2014 int PhaseChaitin::_allocator_successes = 0; | |
2015 | |
2016 #ifndef PRODUCT | |
2017 uint PhaseChaitin::_high_pressure = 0; | |
2018 uint PhaseChaitin::_low_pressure = 0; | |
2019 | |
2020 void PhaseChaitin::print_chaitin_statistics() { | |
2021 tty->print_cr("Inserted %d spill loads, %d spill stores, %d mem-mem moves and %d copies.", _final_loads, _final_stores, _final_memoves, _final_copies); | |
2022 tty->print_cr("Total load cost= %6.0f, store cost = %6.0f, mem-mem cost = %5.2f, copy cost = %5.0f.", _final_load_cost, _final_store_cost, _final_memove_cost, _final_copy_cost); | |
2023 tty->print_cr("Adjusted spill cost = %7.0f.", | |
2024 _final_load_cost*4.0 + _final_store_cost * 2.0 + | |
2025 _final_copy_cost*1.0 + _final_memove_cost*12.0); | |
2026 tty->print("Conservatively coalesced %d copies, %d pairs", | |
2027 _conserv_coalesce, _conserv_coalesce_pair); | |
2028 if( _conserv_coalesce_trie || _conserv_coalesce_quad ) | |
2029 tty->print(", %d tries, %d quads", _conserv_coalesce_trie, _conserv_coalesce_quad); | |
2030 tty->print_cr(", %d post alloc.", _post_alloc); | |
2031 if( _lost_opp_pp_coalesce || _lost_opp_cflow_coalesce ) | |
2032 tty->print_cr("Lost coalesce opportunity, %d private-private, and %d cflow interfered.", | |
2033 _lost_opp_pp_coalesce, _lost_opp_cflow_coalesce ); | |
2034 if( _used_cisc_instructions || _unused_cisc_instructions ) | |
2035 tty->print_cr("Used cisc instruction %d, remained in register %d", | |
2036 _used_cisc_instructions, _unused_cisc_instructions); | |
2037 if( _allocator_successes != 0 ) | |
2038 tty->print_cr("Average allocation trips %f", (float)_allocator_attempts/(float)_allocator_successes); | |
2039 tty->print_cr("High Pressure Blocks = %d, Low Pressure Blocks = %d", _high_pressure, _low_pressure); | |
2040 } | |
2041 #endif // not PRODUCT |