Mercurial > hg > graal-compiler
diff src/share/vm/opto/coalesce.cpp @ 10111:8373c19be854
8011621: live_ranges_in_separate_class.patch
Reviewed-by: kvn, roland
Contributed-by: niclas.adlertz@oracle.com
author | neliasso |
---|---|
date | Tue, 16 Apr 2013 10:08:41 +0200 |
parents | c7b60b601eb4 |
children | 693e4d04fd09 |
line wrap: on
line diff
--- a/src/share/vm/opto/coalesce.cpp Mon Apr 15 16:20:05 2013 -0700 +++ b/src/share/vm/opto/coalesce.cpp Tue Apr 16 10:08:41 2013 +0200 @@ -35,159 +35,11 @@ #include "opto/regmask.hpp" //============================================================================= -//------------------------------reset_uf_map----------------------------------- -void PhaseChaitin::reset_uf_map( uint maxlrg ) { - _maxlrg = maxlrg; - // Force the Union-Find mapping to be at least this large - _uf_map.extend(_maxlrg,0); - // Initialize it to be the ID mapping. - for( uint i=0; i<_maxlrg; i++ ) - _uf_map.map(i,i); -} - -//------------------------------compress_uf_map-------------------------------- -// Make all Nodes map directly to their final live range; no need for -// the Union-Find mapping after this call. -void PhaseChaitin::compress_uf_map_for_nodes( ) { - // For all Nodes, compress mapping - uint unique = _names.Size(); - for( uint i=0; i<unique; i++ ) { - uint lrg = _names[i]; - uint compressed_lrg = Find(lrg); - if( lrg != compressed_lrg ) - _names.map(i,compressed_lrg); - } -} - -//------------------------------Find------------------------------------------- -// Straight out of Tarjan's union-find algorithm -uint PhaseChaitin::Find_compress( uint lrg ) { - uint cur = lrg; - uint next = _uf_map[cur]; - while( next != cur ) { // Scan chain of equivalences - assert( next < cur, "always union smaller" ); - cur = next; // until find a fixed-point - next = _uf_map[cur]; - } - // Core of union-find algorithm: update chain of - // equivalences to be equal to the root. - while( lrg != next ) { - uint tmp = _uf_map[lrg]; - _uf_map.map(lrg, next); - lrg = tmp; - } - return lrg; -} - -//------------------------------Find------------------------------------------- -// Straight out of Tarjan's union-find algorithm -uint PhaseChaitin::Find_compress( const Node *n ) { - uint lrg = Find_compress(_names[n->_idx]); - _names.map(n->_idx,lrg); - return lrg; -} - -//------------------------------Find_const------------------------------------- -// Like Find above, but no path compress, so bad asymptotic behavior -uint PhaseChaitin::Find_const( uint lrg ) const { - if( !lrg ) return lrg; // Ignore the zero LRG - // Off the end? This happens during debugging dumps when you got - // brand new live ranges but have not told the allocator yet. - if( lrg >= _maxlrg ) return lrg; - uint next = _uf_map[lrg]; - while( next != lrg ) { // Scan chain of equivalences - assert( next < lrg, "always union smaller" ); - lrg = next; // until find a fixed-point - next = _uf_map[lrg]; - } - return next; -} - -//------------------------------Find------------------------------------------- -// Like Find above, but no path compress, so bad asymptotic behavior -uint PhaseChaitin::Find_const( const Node *n ) const { - if( n->_idx >= _names.Size() ) return 0; // not mapped, usual for debug dump - return Find_const( _names[n->_idx] ); -} - -//------------------------------Union------------------------------------------ -// union 2 sets together. -void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) { - uint src = Find(src_n); - uint dst = Find(dst_n); - assert( src, "" ); - assert( dst, "" ); - assert( src < _maxlrg, "oob" ); - assert( dst < _maxlrg, "oob" ); - assert( src < dst, "always union smaller" ); - _uf_map.map(dst,src); -} - -//------------------------------new_lrg---------------------------------------- -void PhaseChaitin::new_lrg( const Node *x, uint lrg ) { - // Make the Node->LRG mapping - _names.extend(x->_idx,lrg); - // Make the Union-Find mapping an identity function - _uf_map.extend(lrg,lrg); -} - -//------------------------------clone_projs------------------------------------ -// After cloning some rematerialized instruction, clone any MachProj's that -// follow it. Example: Intel zero is XOR, kills flags. Sparc FP constants -// use G3 as an address temp. -int PhaseChaitin::clone_projs( Block *b, uint idx, Node *con, Node *copy, uint &maxlrg ) { - Block *bcon = _cfg._bbs[con->_idx]; - uint cindex = bcon->find_node(con); - Node *con_next = bcon->_nodes[cindex+1]; - if( con_next->in(0) != con || !con_next->is_MachProj() ) - return false; // No MachProj's follow - - // Copy kills after the cloned constant - Node *kills = con_next->clone(); - kills->set_req( 0, copy ); - b->_nodes.insert( idx, kills ); - _cfg._bbs.map( kills->_idx, b ); - new_lrg( kills, maxlrg++ ); - return true; -} - -//------------------------------compact---------------------------------------- -// Renumber the live ranges to compact them. Makes the IFG smaller. -void PhaseChaitin::compact() { - // Current the _uf_map contains a series of short chains which are headed - // by a self-cycle. All the chains run from big numbers to little numbers. - // The Find() call chases the chains & shortens them for the next Find call. - // We are going to change this structure slightly. Numbers above a moving - // wave 'i' are unchanged. Numbers below 'j' point directly to their - // compacted live range with no further chaining. There are no chains or - // cycles below 'i', so the Find call no longer works. - uint j=1; - uint i; - for( i=1; i < _maxlrg; i++ ) { - uint lr = _uf_map[i]; - // Ignore unallocated live ranges - if( !lr ) continue; - assert( lr <= i, "" ); - _uf_map.map(i, ( lr == i ) ? j++ : _uf_map[lr]); - } - if( false ) // PrintOptoCompactLiveRanges - printf("Compacted %d LRs from %d\n",i-j,i); - // Now change the Node->LR mapping to reflect the compacted names - uint unique = _names.Size(); - for( i=0; i<unique; i++ ) - _names.map(i,_uf_map[_names[i]]); - - // Reset the Union-Find mapping - reset_uf_map(j); - -} - -//============================================================================= //------------------------------Dump------------------------------------------- #ifndef PRODUCT -void PhaseCoalesce::dump( Node *n ) const { +void PhaseCoalesce::dump(Node *n) const { // Being a const function means I cannot use 'Find' - uint r = _phc.Find(n); + uint r = _phc._lrg_map.find(n); tty->print("L%d/N%d ",r,n->_idx); } @@ -235,9 +87,9 @@ //------------------------------combine_these_two------------------------------ // Combine the live ranges def'd by these 2 Nodes. N2 is an input to N1. -void PhaseCoalesce::combine_these_two( Node *n1, Node *n2 ) { - uint lr1 = _phc.Find(n1); - uint lr2 = _phc.Find(n2); +void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) { + uint lr1 = _phc._lrg_map.find(n1); + uint lr2 = _phc._lrg_map.find(n2); if( lr1 != lr2 && // Different live ranges already AND !_phc._ifg->test_edge_sq( lr1, lr2 ) ) { // Do not interfere LRG *lrg1 = &_phc.lrgs(lr1); @@ -306,14 +158,18 @@ // I am about to clobber the dst_name, so the copy must be inserted // after the last use. Last use is really first-use on a backwards scan. uint i = b->end_idx()-1; - while( 1 ) { + while(1) { Node *n = b->_nodes[i]; // Check for end of virtual copies; this is also the end of the // parallel renaming effort. - if( n->_idx < _unique ) break; + if (n->_idx < _unique) { + break; + } uint idx = n->is_Copy(); assert( idx || n->is_Con() || n->is_MachProj(), "Only copies during parallel renaming" ); - if( idx && _phc.Find(n->in(idx)) == dst_name ) break; + if (idx && _phc._lrg_map.find(n->in(idx)) == dst_name) { + break; + } i--; } uint last_use_idx = i; @@ -324,24 +180,29 @@ // There can be only 1 kill that exits any block and that is // the last kill. Thus it is the first kill on a backwards scan. i = b->end_idx()-1; - while( 1 ) { + while (1) { Node *n = b->_nodes[i]; // Check for end of virtual copies; this is also the end of the // parallel renaming effort. - if( n->_idx < _unique ) break; + if (n->_idx < _unique) { + break; + } assert( n->is_Copy() || n->is_Con() || n->is_MachProj(), "Only copies during parallel renaming" ); - if( _phc.Find(n) == src_name ) { + if (_phc._lrg_map.find(n) == src_name) { kill_src_idx = i; break; } i--; } // Need a temp? Last use of dst comes after the kill of src? - if( last_use_idx >= kill_src_idx ) { + if (last_use_idx >= kill_src_idx) { // Need to break a cycle with a temp uint idx = copy->is_Copy(); Node *tmp = copy->clone(); - _phc.new_lrg(tmp,_phc._maxlrg++); + uint max_lrg_id = _phc._lrg_map.max_lrg_id(); + _phc.new_lrg(tmp, max_lrg_id); + _phc._lrg_map.set_max_lrg_id(max_lrg_id + 1); + // Insert new temp between copy and source tmp ->set_req(idx,copy->in(idx)); copy->set_req(idx,tmp); @@ -359,14 +220,14 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) { // We do LRGs compressing and fix a liveout data only here since the other // place in Split() is guarded by the assert which we never hit. - _phc.compress_uf_map_for_nodes(); + _phc._lrg_map.compress_uf_map_for_nodes(); // Fix block's liveout data for compressed live ranges. - for(uint lrg = 1; lrg < _phc._maxlrg; lrg++ ) { - uint compressed_lrg = _phc.Find(lrg); - if( lrg != compressed_lrg ) { - for( uint bidx = 0; bidx < _phc._cfg._num_blocks; bidx++ ) { + for (uint lrg = 1; lrg < _phc._lrg_map.max_lrg_id(); lrg++) { + uint compressed_lrg = _phc._lrg_map.find(lrg); + if (lrg != compressed_lrg) { + for (uint bidx = 0; bidx < _phc._cfg._num_blocks; bidx++) { IndexSet *liveout = _phc._live->live(_phc._cfg._blocks[bidx]); - if( liveout->member(lrg) ) { + if (liveout->member(lrg)) { liveout->remove(lrg); liveout->insert(compressed_lrg); } @@ -392,8 +253,9 @@ uint cidx = copy->is_Copy(); if( cidx ) { Node *def = copy->in(cidx); - if( _phc.Find(copy) == _phc.Find(def) ) - n->set_req(k,def); + if (_phc._lrg_map.find(copy) == _phc._lrg_map.find(def)) { + n->set_req(k, def); + } } } @@ -401,7 +263,7 @@ uint cidx = n->is_Copy(); if( cidx ) { Node *def = n->in(cidx); - if( _phc.Find(n) == _phc.Find(def) ) { + if (_phc._lrg_map.find(n) == _phc._lrg_map.find(def)) { n->replace_by(def); n->set_req(cidx,NULL); b->_nodes.remove(l); @@ -410,16 +272,18 @@ } } - if( n->is_Phi() ) { + if (n->is_Phi()) { // Get the chosen name for the Phi - uint phi_name = _phc.Find( n ); + uint phi_name = _phc._lrg_map.find(n); // Ignore the pre-allocated specials - if( !phi_name ) continue; + if (!phi_name) { + continue; + } // Check for mismatch inputs to Phi - for( uint j = 1; j<cnt; j++ ) { + for (uint j = 1; j < cnt; j++) { Node *m = n->in(j); - uint src_name = _phc.Find(m); - if( src_name != phi_name ) { + uint src_name = _phc._lrg_map.find(m); + if (src_name != phi_name) { Block *pred = _phc._cfg._bbs[b->pred(j)->_idx]; Node *copy; assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach"); @@ -430,18 +294,18 @@ // Insert the copy in the predecessor basic block pred->add_inst(copy); // Copy any flags as well - _phc.clone_projs( pred, pred->end_idx(), m, copy, _phc._maxlrg ); + _phc.clone_projs(pred, pred->end_idx(), m, copy, _phc._lrg_map); } else { const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()]; - copy = new (C) MachSpillCopyNode(m,*rm,*rm); + copy = new (C) MachSpillCopyNode(m, *rm, *rm); // Find a good place to insert. Kinda tricky, use a subroutine insert_copy_with_overlap(pred,copy,phi_name,src_name); } // Insert the copy in the use-def chain - n->set_req( j, copy ); + n->set_req(j, copy); _phc._cfg._bbs.map( copy->_idx, pred ); // Extend ("register allocate") the names array for the copy. - _phc._names.extend( copy->_idx, phi_name ); + _phc._lrg_map.extend(copy->_idx, phi_name); } // End of if Phi names do not match } // End of for all inputs to Phi } else { // End of if Phi @@ -450,39 +314,40 @@ uint idx; if( n->is_Mach() && (idx=n->as_Mach()->two_adr()) ) { // Get the chosen name for the Node - uint name = _phc.Find( n ); - assert( name, "no 2-address specials" ); + uint name = _phc._lrg_map.find(n); + assert (name, "no 2-address specials"); // Check for name mis-match on the 2-address input Node *m = n->in(idx); - if( _phc.Find(m) != name ) { + if (_phc._lrg_map.find(m) != name) { Node *copy; assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach"); // At this point it is unsafe to extend live ranges (6550579). // Rematerialize only constants as we do for Phi above. - if( m->is_Mach() && m->as_Mach()->is_Con() && - m->as_Mach()->rematerialize() ) { + if(m->is_Mach() && m->as_Mach()->is_Con() && + m->as_Mach()->rematerialize()) { copy = m->clone(); // Insert the copy in the basic block, just before us - b->_nodes.insert( l++, copy ); - if( _phc.clone_projs( b, l, m, copy, _phc._maxlrg ) ) + b->_nodes.insert(l++, copy); + if(_phc.clone_projs(b, l, m, copy, _phc._lrg_map)) { l++; + } } else { const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()]; - copy = new (C) MachSpillCopyNode( m, *rm, *rm ); + copy = new (C) MachSpillCopyNode(m, *rm, *rm); // Insert the copy in the basic block, just before us - b->_nodes.insert( l++, copy ); + b->_nodes.insert(l++, copy); } // Insert the copy in the use-def chain - n->set_req(idx, copy ); + n->set_req(idx, copy); // Extend ("register allocate") the names array for the copy. - _phc._names.extend( copy->_idx, name ); + _phc._lrg_map.extend(copy->_idx, name); _phc._cfg._bbs.map( copy->_idx, b ); } } // End of is two-adr // Insert a copy at a debug use for a lrg which has high frequency - if( b->_freq < OPTO_DEBUG_SPLIT_FREQ || b->is_uncommon(_phc._cfg._bbs) ) { + if (b->_freq < OPTO_DEBUG_SPLIT_FREQ || b->is_uncommon(_phc._cfg._bbs)) { // Walk the debug inputs to the node and check for lrg freq JVMState* jvms = n->jvms(); uint debug_start = jvms ? jvms->debug_start() : 999999; @@ -490,9 +355,11 @@ for(uint inpidx = debug_start; inpidx < debug_end; inpidx++) { // Do not split monitors; they are only needed for debug table // entries and need no code. - if( jvms->is_monitor_use(inpidx) ) continue; + if (jvms->is_monitor_use(inpidx)) { + continue; + } Node *inp = n->in(inpidx); - uint nidx = _phc.n2lidx(inp); + uint nidx = _phc._lrg_map.live_range_id(inp); LRG &lrg = lrgs(nidx); // If this lrg has a high frequency use/def @@ -519,8 +386,10 @@ // Insert the copy in the basic block, just before us b->_nodes.insert( l++, copy ); // Extend ("register allocate") the names array for the copy. - _phc.new_lrg( copy, _phc._maxlrg++ ); - _phc._cfg._bbs.map( copy->_idx, b ); + uint max_lrg_id = _phc._lrg_map.max_lrg_id(); + _phc.new_lrg(copy, max_lrg_id); + _phc._lrg_map.set_max_lrg_id(max_lrg_id + 1); + _phc._cfg._bbs.map(copy->_idx, b); //tty->print_cr("Split a debug use in Aggressive Coalesce"); } // End of if high frequency use/def } // End of for all debug inputs @@ -583,17 +452,17 @@ uint idx; // 2-address instructions have a virtual Copy matching their input // to their output - if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) { + if (n->is_Mach() && (idx = n->as_Mach()->two_adr())) { MachNode *mach = n->as_Mach(); - combine_these_two( mach, mach->in(idx) ); + combine_these_two(mach, mach->in(idx)); } } // End of for all instructions in block } //============================================================================= //------------------------------PhaseConservativeCoalesce---------------------- -PhaseConservativeCoalesce::PhaseConservativeCoalesce( PhaseChaitin &chaitin ) : PhaseCoalesce(chaitin) { - _ulr.initialize(_phc._maxlrg); +PhaseConservativeCoalesce::PhaseConservativeCoalesce(PhaseChaitin &chaitin) : PhaseCoalesce(chaitin) { + _ulr.initialize(_phc._lrg_map.max_lrg_id()); } //------------------------------verify----------------------------------------- @@ -673,10 +542,14 @@ // Else work back one in copy chain prev_copy = prev_copy->in(prev_copy->is_Copy()); } else { // Else collect interferences - uint lidx = _phc.Find(x); + uint lidx = _phc._lrg_map.find(x); // Found another def of live-range being stretched? - if( lidx == lr1 ) return max_juint; - if( lidx == lr2 ) return max_juint; + if(lidx == lr1) { + return max_juint; + } + if(lidx == lr2) { + return max_juint; + } // If we attempt to coalesce across a bound def if( lrgs(lidx).is_bound() ) { @@ -751,33 +624,43 @@ // See if I can coalesce a series of multiple copies together. I need the // final dest copy and the original src copy. They can be the same Node. // Compute the compatible register masks. -bool PhaseConservativeCoalesce::copy_copy( Node *dst_copy, Node *src_copy, Block *b, uint bindex ) { +bool PhaseConservativeCoalesce::copy_copy(Node *dst_copy, Node *src_copy, Block *b, uint bindex) { - if( !dst_copy->is_SpillCopy() ) return false; - if( !src_copy->is_SpillCopy() ) return false; + if (!dst_copy->is_SpillCopy()) { + return false; + } + if (!src_copy->is_SpillCopy()) { + return false; + } Node *src_def = src_copy->in(src_copy->is_Copy()); - uint lr1 = _phc.Find(dst_copy); - uint lr2 = _phc.Find(src_def ); + uint lr1 = _phc._lrg_map.find(dst_copy); + uint lr2 = _phc._lrg_map.find(src_def); // Same live ranges already? - if( lr1 == lr2 ) return false; + if (lr1 == lr2) { + return false; + } // Interfere? - if( _phc._ifg->test_edge_sq( lr1, lr2 ) ) return false; + if (_phc._ifg->test_edge_sq(lr1, lr2)) { + return false; + } // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK. - if( !lrgs(lr1)._is_oop && lrgs(lr2)._is_oop ) // not an oop->int cast + if (!lrgs(lr1)._is_oop && lrgs(lr2)._is_oop) { // not an oop->int cast return false; + } // Coalescing between an aligned live range and a mis-aligned live range? // No, no! Alignment changes how we count degree. - if( lrgs(lr1)._fat_proj != lrgs(lr2)._fat_proj ) + if (lrgs(lr1)._fat_proj != lrgs(lr2)._fat_proj) { return false; + } // Sort; use smaller live-range number Node *lr1_node = dst_copy; Node *lr2_node = src_def; - if( lr1 > lr2 ) { + if (lr1 > lr2) { uint tmp = lr1; lr1 = lr2; lr2 = tmp; lr1_node = src_def; lr2_node = dst_copy; } @@ -916,17 +799,5 @@ PhaseChaitin::_conserv_coalesce++; // Collect stats on success continue; } - - /* do not attempt pairs. About 1/2 of all pairs can be removed by - post-alloc. The other set are too few to bother. - Node *copy2 = copy1->in(idx1); - uint idx2 = copy2->is_Copy(); - if( !idx2 ) continue; - if( copy_copy(copy1,copy2,b,i) ) { - i--; // Retry, same location in block - PhaseChaitin::_conserv_coalesce_pair++; // Collect stats on success - continue; - } - */ } }