Mercurial > hg > truffle
diff src/share/vm/opto/block.cpp @ 418:72c5366e5d86
6743900: frequency based block layout
Summary: post-register allocation pass that drives block layout by edge frequencies
Reviewed-by: never, kvn
author | rasbold |
---|---|
date | Thu, 06 Nov 2008 14:59:10 -0800 |
parents | 9ee9cf798b59 |
children | 91263420e1c6 |
line wrap: on
line diff
--- a/src/share/vm/opto/block.cpp Thu Oct 30 17:08:48 2008 -0700 +++ b/src/share/vm/opto/block.cpp Thu Nov 06 14:59:10 2008 -0800 @@ -57,6 +57,14 @@ _blocks[i] = b; } +#ifndef PRODUCT +void Block_List::print() { + for (uint i=0; i < size(); i++) { + tty->print("B%d ", _blocks[i]->_pre_order); + } + tty->print("size = %d\n", size()); +} +#endif //============================================================================= @@ -66,6 +74,12 @@ // Check for Start block if( _pre_order == 1 ) return InteriorEntryAlignment; // Check for loop alignment + if (has_loop_alignment()) return loop_alignment(); + + return 1; // no particular alignment +} + +uint Block::compute_loop_alignment() { Node *h = head(); if( h->is_Loop() && h->as_Loop()->is_inner_loop() ) { // Pre- and post-loops have low trip count so do not bother with @@ -83,13 +97,15 @@ } return OptoLoopAlignment; // Otherwise align loop head } + return 1; // no particular alignment } //----------------------------------------------------------------------------- // Compute the size of first 'inst_cnt' instructions in this block. // Return the number of instructions left to compute if the block has -// less then 'inst_cnt' instructions. +// less then 'inst_cnt' instructions. Stop, and return 0 if sum_size +// exceeds OptoLoopAlignment. uint Block::compute_first_inst_size(uint& sum_size, uint inst_cnt, PhaseRegAlloc* ra) { uint last_inst = _nodes.size(); @@ -307,6 +323,8 @@ tty->print("\tLoop: B%d-B%d ", bhead->_pre_order, bx->_pre_order); // Dump any loop-specific bits, especially for CountedLoops. loop->dump_spec(tty); + } else if (has_loop_alignment()) { + tty->print(" top-of-loop"); } tty->print(" Freq: %g",_freq); if( Verbose || WizardMode ) { @@ -509,9 +527,11 @@ int branch_idx = b->_nodes.size() - b->_num_succs-1; if( branch_idx < 1 ) return false; Node *bra = b->_nodes[branch_idx]; - if( bra->is_Catch() ) return true; + if( bra->is_Catch() ) + return true; if( bra->is_Mach() ) { - if( bra->is_MachNullCheck() ) return true; + if( bra->is_MachNullCheck() ) + return true; int iop = bra->as_Mach()->ideal_Opcode(); if( iop == Op_FastLock || iop == Op_FastUnlock ) return true; @@ -557,10 +577,10 @@ dead->_nodes[k]->del_req(j); } -//------------------------------MoveToNext------------------------------------- +//------------------------------move_to_next----------------------------------- // Helper function to move block bx to the slot following b_index. Return // true if the move is successful, otherwise false -bool PhaseCFG::MoveToNext(Block* bx, uint b_index) { +bool PhaseCFG::move_to_next(Block* bx, uint b_index) { if (bx == NULL) return false; // Return false if bx is already scheduled. @@ -591,9 +611,9 @@ return true; } -//------------------------------MoveToEnd-------------------------------------- +//------------------------------move_to_end------------------------------------ // Move empty and uncommon blocks to the end. -void PhaseCFG::MoveToEnd(Block *b, uint i) { +void PhaseCFG::move_to_end(Block *b, uint i) { int e = b->is_Empty(); if (e != Block::not_empty) { if (e == Block::empty_with_goto) { @@ -609,15 +629,31 @@ _blocks.push(b); } -//------------------------------RemoveEmpty------------------------------------ -// Remove empty basic blocks and useless branches. -void PhaseCFG::RemoveEmpty() { +//---------------------------set_loop_alignment-------------------------------- +// Set loop alignment for every block +void PhaseCFG::set_loop_alignment() { + uint last = _num_blocks; + assert( _blocks[0] == _broot, "" ); + + for (uint i = 1; i < last; i++ ) { + Block *b = _blocks[i]; + if (b->head()->is_Loop()) { + b->set_loop_alignment(b); + } + } +} + +//-----------------------------remove_empty------------------------------------ +// Make empty basic blocks to be "connector" blocks, Move uncommon blocks +// to the end. +void PhaseCFG::remove_empty() { // Move uncommon blocks to the end uint last = _num_blocks; - uint i; assert( _blocks[0] == _broot, "" ); - for( i = 1; i < last; i++ ) { + + for (uint i = 1; i < last; i++) { Block *b = _blocks[i]; + if (b->is_connector()) break; // Check for NeverBranch at block end. This needs to become a GOTO to the // true target. NeverBranch are treated as a conditional branch that @@ -629,37 +665,40 @@ convert_NeverBranch_to_Goto(b); // Look for uncommon blocks and move to end. - if( b->is_uncommon(_bbs) ) { - MoveToEnd(b, i); - last--; // No longer check for being uncommon! - if( no_flip_branch(b) ) { // Fall-thru case must follow? - b = _blocks[i]; // Find the fall-thru block - MoveToEnd(b, i); - last--; + if (!C->do_freq_based_layout()) { + if( b->is_uncommon(_bbs) ) { + move_to_end(b, i); + last--; // No longer check for being uncommon! + if( no_flip_branch(b) ) { // Fall-thru case must follow? + b = _blocks[i]; // Find the fall-thru block + move_to_end(b, i); + last--; + } + i--; // backup block counter post-increment } - i--; // backup block counter post-increment } } - // Remove empty blocks - uint j1; + // Move empty blocks to the end last = _num_blocks; - for( i=0; i < last; i++ ) { + for (uint i = 1; i < last; i++) { Block *b = _blocks[i]; - if (i > 0) { - if (b->is_Empty() != Block::not_empty) { - MoveToEnd(b, i); - last--; - i--; - } + if (b->is_Empty() != Block::not_empty) { + move_to_end(b, i); + last--; + i--; } } // End of for all blocks +} +//-----------------------------fixup_flow-------------------------------------- +// Fix up the final control flow for basic blocks. +void PhaseCFG::fixup_flow() { // Fixup final control flow for the blocks. Remove jump-to-next // block. If neither arm of a IF follows the conditional branch, we // have to add a second jump after the conditional. We place the // TRUE branch target in succs[0] for both GOTOs and IFs. - for( i=0; i < _num_blocks; i++ ) { + for (uint i=0; i < _num_blocks; i++) { Block *b = _blocks[i]; b->_pre_order = i; // turn pre-order into block-index @@ -700,7 +739,7 @@ } } // Remove all CatchProjs - for (j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop(); + for (uint j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop(); } else if (b->_num_succs == 1) { // Block ends in a Goto? @@ -730,8 +769,7 @@ // successors after the current one, provided that the // successor was previously unscheduled, but moveable // (i.e., all paths to it involve a branch). - if( bnext != bs0 && bnext != bs1 ) { - + if( !C->do_freq_based_layout() && bnext != bs0 && bnext != bs1 ) { // Choose the more common successor based on the probability // of the conditional branch. Block *bx = bs0; @@ -751,9 +789,9 @@ } // Attempt the more common successor first - if (MoveToNext(bx, i)) { + if (move_to_next(bx, i)) { bnext = bx; - } else if (MoveToNext(by, i)) { + } else if (move_to_next(by, i)) { bnext = by; } } @@ -774,10 +812,8 @@ // Flip projection for each target { ProjNode *tmp = proj0; proj0 = proj1; proj1 = tmp; } - } else if( bnext == bs1 ) { // Fall-thru is already in succs[1] - - } else { // Else need a double-branch - + } else if( bnext != bs1 ) { + // Need a double-branch // The existing conditional branch need not change. // Add a unconditional branch to the false target. // Alas, it must appear in its own block and adding a @@ -786,8 +822,9 @@ } // Make sure we TRUE branch to the target - if( proj0->Opcode() == Op_IfFalse ) + if( proj0->Opcode() == Op_IfFalse ) { iff->negate(); + } b->_nodes.pop(); // Remove IfFalse & IfTrue projections b->_nodes.pop(); @@ -796,9 +833,7 @@ // Multi-exit block, e.g. a switch statement // But we don't need to do anything here } - } // End of for all blocks - } @@ -905,7 +940,7 @@ // Force the Union-Find mapping to be at least this large extend(max,0); // Initialize to be the ID mapping. - for( uint i=0; i<_max; i++ ) map(i,i); + for( uint i=0; i<max; i++ ) map(i,i); } //------------------------------Find_compress---------------------------------- @@ -937,7 +972,6 @@ if( idx >= _max ) return idx; uint next = lookup(idx); while( next != idx ) { // Scan chain of equivalences - assert( next < idx, "always union smaller" ); idx = next; // until find a fixed-point next = lookup(idx); } @@ -956,3 +990,491 @@ assert( src < dst, "always union smaller" ); map(dst,src); } + +#ifndef PRODUCT +static void edge_dump(GrowableArray<CFGEdge *> *edges) { + tty->print_cr("---- Edges ----"); + for (int i = 0; i < edges->length(); i++) { + CFGEdge *e = edges->at(i); + if (e != NULL) { + edges->at(i)->dump(); + } + } +} + +static void trace_dump(Trace *traces[], int count) { + tty->print_cr("---- Traces ----"); + for (int i = 0; i < count; i++) { + Trace *tr = traces[i]; + if (tr != NULL) { + tr->dump(); + } + } +} + +void Trace::dump( ) const { + tty->print_cr("Trace (freq %f)", first_block()->_freq); + for (Block *b = first_block(); b != NULL; b = next(b)) { + tty->print(" B%d", b->_pre_order); + if (b->head()->is_Loop()) { + tty->print(" (L%d)", b->compute_loop_alignment()); + } + if (b->has_loop_alignment()) { + tty->print(" (T%d)", b->code_alignment()); + } + } + tty->cr(); +} + +void CFGEdge::dump( ) const { + tty->print(" B%d --> B%d Freq: %f out:%3d%% in:%3d%% State: ", + from()->_pre_order, to()->_pre_order, freq(), _from_pct, _to_pct); + switch(state()) { + case connected: + tty->print("connected"); + break; + case open: + tty->print("open"); + break; + case interior: + tty->print("interior"); + break; + } + if (infrequent()) { + tty->print(" infrequent"); + } + tty->cr(); +} +#endif + +//============================================================================= + +//------------------------------edge_order------------------------------------- +// Comparison function for edges +static int edge_order(CFGEdge **e0, CFGEdge **e1) { + float freq0 = (*e0)->freq(); + float freq1 = (*e1)->freq(); + if (freq0 != freq1) { + return freq0 > freq1 ? -1 : 1; + } + + int dist0 = (*e0)->to()->_rpo - (*e0)->from()->_rpo; + int dist1 = (*e1)->to()->_rpo - (*e1)->from()->_rpo; + + return dist1 - dist0; +} + +//------------------------------trace_frequency_order-------------------------- +// Comparison function for edges +static int trace_frequency_order(const void *p0, const void *p1) { + Trace *tr0 = *(Trace **) p0; + Trace *tr1 = *(Trace **) p1; + Block *b0 = tr0->first_block(); + Block *b1 = tr1->first_block(); + + // The trace of connector blocks goes at the end; + // we only expect one such trace + if (b0->is_connector() != b1->is_connector()) { + return b1->is_connector() ? -1 : 1; + } + + // Pull more frequently executed blocks to the beginning + float freq0 = b0->_freq; + float freq1 = b1->_freq; + if (freq0 != freq1) { + return freq0 > freq1 ? -1 : 1; + } + + int diff = tr0->first_block()->_rpo - tr1->first_block()->_rpo; + + return diff; +} + +//------------------------------find_edges------------------------------------- +// Find edges of interest, i.e, those which can fall through. Presumes that +// edges which don't fall through are of low frequency and can be generally +// ignored. Initialize the list of traces. +void PhaseBlockLayout::find_edges() +{ + // Walk the blocks, creating edges and Traces + uint i; + Trace *tr = NULL; + for (i = 0; i < _cfg._num_blocks; i++) { + Block *b = _cfg._blocks[i]; + tr = new Trace(b, next, prev); + traces[tr->id()] = tr; + + // All connector blocks should be at the end of the list + if (b->is_connector()) break; + + // If this block and the next one have a one-to-one successor + // predecessor relationship, simply append the next block + int nfallthru = b->num_fall_throughs(); + while (nfallthru == 1 && + b->succ_fall_through(0)) { + Block *n = b->_succs[0]; + + // Skip over single-entry connector blocks, we don't want to + // add them to the trace. + while (n->is_connector() && n->num_preds() == 1) { + n = n->_succs[0]; + } + + // We see a merge point, so stop search for the next block + if (n->num_preds() != 1) break; + + i++; + assert(n = _cfg._blocks[i], "expecting next block"); + tr->append(n); + uf->map(n->_pre_order, tr->id()); + traces[n->_pre_order] = NULL; + nfallthru = b->num_fall_throughs(); + b = n; + } + + if (nfallthru > 0) { + // Create a CFGEdge for each outgoing + // edge that could be a fall-through. + for (uint j = 0; j < b->_num_succs; j++ ) { + if (b->succ_fall_through(j)) { + Block *target = b->non_connector_successor(j); + float freq = b->_freq * b->succ_prob(j); + int from_pct = (int) ((100 * freq) / b->_freq); + int to_pct = (int) ((100 * freq) / target->_freq); + edges->append(new CFGEdge(b, target, freq, from_pct, to_pct)); + } + } + } + } + + // Group connector blocks into one trace + for (i++; i < _cfg._num_blocks; i++) { + Block *b = _cfg._blocks[i]; + assert(b->is_connector(), "connector blocks at the end"); + tr->append(b); + uf->map(b->_pre_order, tr->id()); + traces[b->_pre_order] = NULL; + } +} + +//------------------------------union_traces---------------------------------- +// Union two traces together in uf, and null out the trace in the list +void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace) +{ + uint old_id = old_trace->id(); + uint updated_id = updated_trace->id(); + + uint lo_id = updated_id; + uint hi_id = old_id; + + // If from is greater than to, swap values to meet + // UnionFind guarantee. + if (updated_id > old_id) { + lo_id = old_id; + hi_id = updated_id; + + // Fix up the trace ids + traces[lo_id] = traces[updated_id]; + updated_trace->set_id(lo_id); + } + + // Union the lower with the higher and remove the pointer + // to the higher. + uf->Union(lo_id, hi_id); + traces[hi_id] = NULL; +} + +//------------------------------grow_traces------------------------------------- +// Append traces together via the most frequently executed edges +void PhaseBlockLayout::grow_traces() +{ + // Order the edges, and drive the growth of Traces via the most + // frequently executed edges. + edges->sort(edge_order); + for (int i = 0; i < edges->length(); i++) { + CFGEdge *e = edges->at(i); + + if (e->state() != CFGEdge::open) continue; + + Block *src_block = e->from(); + Block *targ_block = e->to(); + + // Don't grow traces along backedges? + if (!BlockLayoutRotateLoops) { + if (targ_block->_rpo <= src_block->_rpo) { + targ_block->set_loop_alignment(targ_block); + continue; + } + } + + Trace *src_trace = trace(src_block); + Trace *targ_trace = trace(targ_block); + + // If the edge in question can join two traces at their ends, + // append one trace to the other. + if (src_trace->last_block() == src_block) { + if (src_trace == targ_trace) { + e->set_state(CFGEdge::interior); + if (targ_trace->backedge(e)) { + // Reset i to catch any newly eligible edge + // (Or we could remember the first "open" edge, and reset there) + i = 0; + } + } else if (targ_trace->first_block() == targ_block) { + e->set_state(CFGEdge::connected); + src_trace->append(targ_trace); + union_traces(src_trace, targ_trace); + } + } + } +} + +//------------------------------merge_traces----------------------------------- +// Embed one trace into another, if the fork or join points are sufficiently +// balanced. +void PhaseBlockLayout::merge_traces(bool fall_thru_only) +{ + // Walk the edge list a another time, looking at unprocessed edges. + // Fold in diamonds + for (int i = 0; i < edges->length(); i++) { + CFGEdge *e = edges->at(i); + + if (e->state() != CFGEdge::open) continue; + if (fall_thru_only) { + if (e->infrequent()) continue; + } + + Block *src_block = e->from(); + Trace *src_trace = trace(src_block); + bool src_at_tail = src_trace->last_block() == src_block; + + Block *targ_block = e->to(); + Trace *targ_trace = trace(targ_block); + bool targ_at_start = targ_trace->first_block() == targ_block; + + if (src_trace == targ_trace) { + // This may be a loop, but we can't do much about it. + e->set_state(CFGEdge::interior); + continue; + } + + if (fall_thru_only) { + // If the edge links the middle of two traces, we can't do anything. + // Mark the edge and continue. + if (!src_at_tail & !targ_at_start) { + continue; + } + + // Don't grow traces along backedges? + if (!BlockLayoutRotateLoops && (targ_block->_rpo <= src_block->_rpo)) { + continue; + } + + // If both ends of the edge are available, why didn't we handle it earlier? + assert(src_at_tail ^ targ_at_start, "Should have caught this edge earlier."); + + if (targ_at_start) { + // Insert the "targ" trace in the "src" trace if the insertion point + // is a two way branch. + // Better profitability check possible, but may not be worth it. + // Someday, see if the this "fork" has an associated "join"; + // then make a policy on merging this trace at the fork or join. + // For example, other things being equal, it may be better to place this + // trace at the join point if the "src" trace ends in a two-way, but + // the insertion point is one-way. + assert(src_block->num_fall_throughs() == 2, "unexpected diamond"); + e->set_state(CFGEdge::connected); + src_trace->insert_after(src_block, targ_trace); + union_traces(src_trace, targ_trace); + } else if (src_at_tail) { + if (src_trace != trace(_cfg._broot)) { + e->set_state(CFGEdge::connected); + targ_trace->insert_before(targ_block, src_trace); + union_traces(targ_trace, src_trace); + } + } + } else if (e->state() == CFGEdge::open) { + // Append traces, even without a fall-thru connection. + // But leave root entry at the begining of the block list. + if (targ_trace != trace(_cfg._broot)) { + e->set_state(CFGEdge::connected); + src_trace->append(targ_trace); + union_traces(src_trace, targ_trace); + } + } + } +} + +//----------------------------reorder_traces----------------------------------- +// Order the sequence of the traces in some desirable way, and fixup the +// jumps at the end of each block. +void PhaseBlockLayout::reorder_traces(int count) +{ + ResourceArea *area = Thread::current()->resource_area(); + Trace ** new_traces = NEW_ARENA_ARRAY(area, Trace *, count); + Block_List worklist; + int new_count = 0; + + // Compact the traces. + for (int i = 0; i < count; i++) { + Trace *tr = traces[i]; + if (tr != NULL) { + new_traces[new_count++] = tr; + } + } + + // The entry block should be first on the new trace list. + Trace *tr = trace(_cfg._broot); + assert(tr == new_traces[0], "entry trace misplaced"); + + // Sort the new trace list by frequency + qsort(new_traces + 1, new_count - 1, sizeof(new_traces[0]), trace_frequency_order); + + // Patch up the successor blocks + _cfg._blocks.reset(); + _cfg._num_blocks = 0; + for (int i = 0; i < new_count; i++) { + Trace *tr = new_traces[i]; + if (tr != NULL) { + tr->fixup_blocks(_cfg); + } + } +} + +//------------------------------PhaseBlockLayout------------------------------- +// Order basic blocks based on frequency +PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) : + Phase(BlockLayout), + _cfg(cfg) +{ + ResourceMark rm; + ResourceArea *area = Thread::current()->resource_area(); + + // List of traces + int size = _cfg._num_blocks + 1; + traces = NEW_ARENA_ARRAY(area, Trace *, size); + memset(traces, 0, size*sizeof(Trace*)); + next = NEW_ARENA_ARRAY(area, Block *, size); + memset(next, 0, size*sizeof(Block *)); + prev = NEW_ARENA_ARRAY(area, Block *, size); + memset(prev , 0, size*sizeof(Block *)); + + // List of edges + edges = new GrowableArray<CFGEdge*>; + + // Mapping block index --> block_trace + uf = new UnionFind(size); + uf->reset(size); + + // Find edges and create traces. + find_edges(); + + // Grow traces at their ends via most frequent edges. + grow_traces(); + + // Merge one trace into another, but only at fall-through points. + // This may make diamonds and other related shapes in a trace. + merge_traces(true); + + // Run merge again, allowing two traces to be catenated, even if + // one does not fall through into the other. This appends loosely + // related traces to be near each other. + merge_traces(false); + + // Re-order all the remaining traces by frequency + reorder_traces(size); + + assert(_cfg._num_blocks >= (uint) (size - 1), "number of blocks can not shrink"); +} + + +//------------------------------backedge--------------------------------------- +// Edge e completes a loop in a trace. If the target block is head of the +// loop, rotate the loop block so that the loop ends in a conditional branch. +bool Trace::backedge(CFGEdge *e) { + bool loop_rotated = false; + Block *src_block = e->from(); + Block *targ_block = e->to(); + + assert(last_block() == src_block, "loop discovery at back branch"); + if (first_block() == targ_block) { + if (BlockLayoutRotateLoops && last_block()->num_fall_throughs() < 2) { + // Find the last block in the trace that has a conditional + // branch. + Block *b; + for (b = last_block(); b != NULL; b = prev(b)) { + if (b->num_fall_throughs() == 2) { + break; + } + } + + if (b != last_block() && b != NULL) { + loop_rotated = true; + + // Rotate the loop by doing two-part linked-list surgery. + append(first_block()); + break_loop_after(b); + } + } + + // Backbranch to the top of a trace + // Scroll foward through the trace from the targ_block. If we find + // a loop head before another loop top, use the the loop head alignment. + for (Block *b = targ_block; b != NULL; b = next(b)) { + if (b->has_loop_alignment()) { + break; + } + if (b->head()->is_Loop()) { + targ_block = b; + break; + } + } + + first_block()->set_loop_alignment(targ_block); + + } else { + // Backbranch into the middle of a trace + targ_block->set_loop_alignment(targ_block); + } + + return loop_rotated; +} + +//------------------------------fixup_blocks----------------------------------- +// push blocks onto the CFG list +// ensure that blocks have the correct two-way branch sense +void Trace::fixup_blocks(PhaseCFG &cfg) { + Block *last = last_block(); + for (Block *b = first_block(); b != NULL; b = next(b)) { + cfg._blocks.push(b); + cfg._num_blocks++; + if (!b->is_connector()) { + int nfallthru = b->num_fall_throughs(); + if (b != last) { + if (nfallthru == 2) { + // Ensure that the sense of the branch is correct + Block *bnext = next(b); + Block *bs0 = b->non_connector_successor(0); + + MachNode *iff = b->_nodes[b->_nodes.size()-3]->as_Mach(); + ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj(); + ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj(); + + if (bnext == bs0) { + // Fall-thru case in succs[0], should be in succs[1] + + // Flip targets in _succs map + Block *tbs0 = b->_succs[0]; + Block *tbs1 = b->_succs[1]; + b->_succs.map( 0, tbs1 ); + b->_succs.map( 1, tbs0 ); + + // Flip projections to match targets + b->_nodes.map(b->_nodes.size()-2, proj1); + b->_nodes.map(b->_nodes.size()-1, proj0); + } + } + } + } + } +}