# HG changeset patch # User rasbold # Date 1226012350 28800 # Node ID 72c5366e5d860d0f6e58438fa14fdeed3ed1d80a # Parent f4fe12e429a4a1d2ab8741f3669a4cb0bff8d89c 6743900: frequency based block layout Summary: post-register allocation pass that drives block layout by edge frequencies Reviewed-by: never, kvn diff -r f4fe12e429a4 -r 72c5366e5d86 src/share/vm/opto/block.cpp --- a/src/share/vm/opto/block.cpp Thu Oct 30 17:08:48 2008 -0700 +++ b/src/share/vm/opto/block.cpp Thu Nov 06 14:59:10 2008 -0800 @@ -57,6 +57,14 @@ _blocks[i] = b; } +#ifndef PRODUCT +void Block_List::print() { + for (uint i=0; i < size(); i++) { + tty->print("B%d ", _blocks[i]->_pre_order); + } + tty->print("size = %d\n", size()); +} +#endif //============================================================================= @@ -66,6 +74,12 @@ // Check for Start block if( _pre_order == 1 ) return InteriorEntryAlignment; // Check for loop alignment + if (has_loop_alignment()) return loop_alignment(); + + return 1; // no particular alignment +} + +uint Block::compute_loop_alignment() { Node *h = head(); if( h->is_Loop() && h->as_Loop()->is_inner_loop() ) { // Pre- and post-loops have low trip count so do not bother with @@ -83,13 +97,15 @@ } return OptoLoopAlignment; // Otherwise align loop head } + return 1; // no particular alignment } //----------------------------------------------------------------------------- // Compute the size of first 'inst_cnt' instructions in this block. // Return the number of instructions left to compute if the block has -// less then 'inst_cnt' instructions. +// less then 'inst_cnt' instructions. Stop, and return 0 if sum_size +// exceeds OptoLoopAlignment. uint Block::compute_first_inst_size(uint& sum_size, uint inst_cnt, PhaseRegAlloc* ra) { uint last_inst = _nodes.size(); @@ -307,6 +323,8 @@ tty->print("\tLoop: B%d-B%d ", bhead->_pre_order, bx->_pre_order); // Dump any loop-specific bits, especially for CountedLoops. loop->dump_spec(tty); + } else if (has_loop_alignment()) { + tty->print(" top-of-loop"); } tty->print(" Freq: %g",_freq); if( Verbose || WizardMode ) { @@ -509,9 +527,11 @@ int branch_idx = b->_nodes.size() - b->_num_succs-1; if( branch_idx < 1 ) return false; Node *bra = b->_nodes[branch_idx]; - if( bra->is_Catch() ) return true; + if( bra->is_Catch() ) + return true; if( bra->is_Mach() ) { - if( bra->is_MachNullCheck() ) return true; + if( bra->is_MachNullCheck() ) + return true; int iop = bra->as_Mach()->ideal_Opcode(); if( iop == Op_FastLock || iop == Op_FastUnlock ) return true; @@ -557,10 +577,10 @@ dead->_nodes[k]->del_req(j); } -//------------------------------MoveToNext------------------------------------- +//------------------------------move_to_next----------------------------------- // Helper function to move block bx to the slot following b_index. Return // true if the move is successful, otherwise false -bool PhaseCFG::MoveToNext(Block* bx, uint b_index) { +bool PhaseCFG::move_to_next(Block* bx, uint b_index) { if (bx == NULL) return false; // Return false if bx is already scheduled. @@ -591,9 +611,9 @@ return true; } -//------------------------------MoveToEnd-------------------------------------- +//------------------------------move_to_end------------------------------------ // Move empty and uncommon blocks to the end. -void PhaseCFG::MoveToEnd(Block *b, uint i) { +void PhaseCFG::move_to_end(Block *b, uint i) { int e = b->is_Empty(); if (e != Block::not_empty) { if (e == Block::empty_with_goto) { @@ -609,15 +629,31 @@ _blocks.push(b); } -//------------------------------RemoveEmpty------------------------------------ -// Remove empty basic blocks and useless branches. -void PhaseCFG::RemoveEmpty() { +//---------------------------set_loop_alignment-------------------------------- +// Set loop alignment for every block +void PhaseCFG::set_loop_alignment() { + uint last = _num_blocks; + assert( _blocks[0] == _broot, "" ); + + for (uint i = 1; i < last; i++ ) { + Block *b = _blocks[i]; + if (b->head()->is_Loop()) { + b->set_loop_alignment(b); + } + } +} + +//-----------------------------remove_empty------------------------------------ +// Make empty basic blocks to be "connector" blocks, Move uncommon blocks +// to the end. +void PhaseCFG::remove_empty() { // Move uncommon blocks to the end uint last = _num_blocks; - uint i; assert( _blocks[0] == _broot, "" ); - for( i = 1; i < last; i++ ) { + + for (uint i = 1; i < last; i++) { Block *b = _blocks[i]; + if (b->is_connector()) break; // Check for NeverBranch at block end. This needs to become a GOTO to the // true target. NeverBranch are treated as a conditional branch that @@ -629,37 +665,40 @@ convert_NeverBranch_to_Goto(b); // Look for uncommon blocks and move to end. - if( b->is_uncommon(_bbs) ) { - MoveToEnd(b, i); - last--; // No longer check for being uncommon! - if( no_flip_branch(b) ) { // Fall-thru case must follow? - b = _blocks[i]; // Find the fall-thru block - MoveToEnd(b, i); - last--; + if (!C->do_freq_based_layout()) { + if( b->is_uncommon(_bbs) ) { + move_to_end(b, i); + last--; // No longer check for being uncommon! + if( no_flip_branch(b) ) { // Fall-thru case must follow? + b = _blocks[i]; // Find the fall-thru block + move_to_end(b, i); + last--; + } + i--; // backup block counter post-increment } - i--; // backup block counter post-increment } } - // Remove empty blocks - uint j1; + // Move empty blocks to the end last = _num_blocks; - for( i=0; i < last; i++ ) { + for (uint i = 1; i < last; i++) { Block *b = _blocks[i]; - if (i > 0) { - if (b->is_Empty() != Block::not_empty) { - MoveToEnd(b, i); - last--; - i--; - } + if (b->is_Empty() != Block::not_empty) { + move_to_end(b, i); + last--; + i--; } } // End of for all blocks +} +//-----------------------------fixup_flow-------------------------------------- +// Fix up the final control flow for basic blocks. +void PhaseCFG::fixup_flow() { // Fixup final control flow for the blocks. Remove jump-to-next // block. If neither arm of a IF follows the conditional branch, we // have to add a second jump after the conditional. We place the // TRUE branch target in succs[0] for both GOTOs and IFs. - for( i=0; i < _num_blocks; i++ ) { + for (uint i=0; i < _num_blocks; i++) { Block *b = _blocks[i]; b->_pre_order = i; // turn pre-order into block-index @@ -700,7 +739,7 @@ } } // Remove all CatchProjs - for (j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop(); + for (uint j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop(); } else if (b->_num_succs == 1) { // Block ends in a Goto? @@ -730,8 +769,7 @@ // successors after the current one, provided that the // successor was previously unscheduled, but moveable // (i.e., all paths to it involve a branch). - if( bnext != bs0 && bnext != bs1 ) { - + if( !C->do_freq_based_layout() && bnext != bs0 && bnext != bs1 ) { // Choose the more common successor based on the probability // of the conditional branch. Block *bx = bs0; @@ -751,9 +789,9 @@ } // Attempt the more common successor first - if (MoveToNext(bx, i)) { + if (move_to_next(bx, i)) { bnext = bx; - } else if (MoveToNext(by, i)) { + } else if (move_to_next(by, i)) { bnext = by; } } @@ -774,10 +812,8 @@ // Flip projection for each target { ProjNode *tmp = proj0; proj0 = proj1; proj1 = tmp; } - } else if( bnext == bs1 ) { // Fall-thru is already in succs[1] - - } else { // Else need a double-branch - + } else if( bnext != bs1 ) { + // Need a double-branch // The existing conditional branch need not change. // Add a unconditional branch to the false target. // Alas, it must appear in its own block and adding a @@ -786,8 +822,9 @@ } // Make sure we TRUE branch to the target - if( proj0->Opcode() == Op_IfFalse ) + if( proj0->Opcode() == Op_IfFalse ) { iff->negate(); + } b->_nodes.pop(); // Remove IfFalse & IfTrue projections b->_nodes.pop(); @@ -796,9 +833,7 @@ // Multi-exit block, e.g. a switch statement // But we don't need to do anything here } - } // End of for all blocks - } @@ -905,7 +940,7 @@ // Force the Union-Find mapping to be at least this large extend(max,0); // Initialize to be the ID mapping. - for( uint i=0; i<_max; i++ ) map(i,i); + for( uint i=0; i= _max ) return idx; uint next = lookup(idx); while( next != idx ) { // Scan chain of equivalences - assert( next < idx, "always union smaller" ); idx = next; // until find a fixed-point next = lookup(idx); } @@ -956,3 +990,491 @@ assert( src < dst, "always union smaller" ); map(dst,src); } + +#ifndef PRODUCT +static void edge_dump(GrowableArray *edges) { + tty->print_cr("---- Edges ----"); + for (int i = 0; i < edges->length(); i++) { + CFGEdge *e = edges->at(i); + if (e != NULL) { + edges->at(i)->dump(); + } + } +} + +static void trace_dump(Trace *traces[], int count) { + tty->print_cr("---- Traces ----"); + for (int i = 0; i < count; i++) { + Trace *tr = traces[i]; + if (tr != NULL) { + tr->dump(); + } + } +} + +void Trace::dump( ) const { + tty->print_cr("Trace (freq %f)", first_block()->_freq); + for (Block *b = first_block(); b != NULL; b = next(b)) { + tty->print(" B%d", b->_pre_order); + if (b->head()->is_Loop()) { + tty->print(" (L%d)", b->compute_loop_alignment()); + } + if (b->has_loop_alignment()) { + tty->print(" (T%d)", b->code_alignment()); + } + } + tty->cr(); +} + +void CFGEdge::dump( ) const { + tty->print(" B%d --> B%d Freq: %f out:%3d%% in:%3d%% State: ", + from()->_pre_order, to()->_pre_order, freq(), _from_pct, _to_pct); + switch(state()) { + case connected: + tty->print("connected"); + break; + case open: + tty->print("open"); + break; + case interior: + tty->print("interior"); + break; + } + if (infrequent()) { + tty->print(" infrequent"); + } + tty->cr(); +} +#endif + +//============================================================================= + +//------------------------------edge_order------------------------------------- +// Comparison function for edges +static int edge_order(CFGEdge **e0, CFGEdge **e1) { + float freq0 = (*e0)->freq(); + float freq1 = (*e1)->freq(); + if (freq0 != freq1) { + return freq0 > freq1 ? -1 : 1; + } + + int dist0 = (*e0)->to()->_rpo - (*e0)->from()->_rpo; + int dist1 = (*e1)->to()->_rpo - (*e1)->from()->_rpo; + + return dist1 - dist0; +} + +//------------------------------trace_frequency_order-------------------------- +// Comparison function for edges +static int trace_frequency_order(const void *p0, const void *p1) { + Trace *tr0 = *(Trace **) p0; + Trace *tr1 = *(Trace **) p1; + Block *b0 = tr0->first_block(); + Block *b1 = tr1->first_block(); + + // The trace of connector blocks goes at the end; + // we only expect one such trace + if (b0->is_connector() != b1->is_connector()) { + return b1->is_connector() ? -1 : 1; + } + + // Pull more frequently executed blocks to the beginning + float freq0 = b0->_freq; + float freq1 = b1->_freq; + if (freq0 != freq1) { + return freq0 > freq1 ? -1 : 1; + } + + int diff = tr0->first_block()->_rpo - tr1->first_block()->_rpo; + + return diff; +} + +//------------------------------find_edges------------------------------------- +// Find edges of interest, i.e, those which can fall through. Presumes that +// edges which don't fall through are of low frequency and can be generally +// ignored. Initialize the list of traces. +void PhaseBlockLayout::find_edges() +{ + // Walk the blocks, creating edges and Traces + uint i; + Trace *tr = NULL; + for (i = 0; i < _cfg._num_blocks; i++) { + Block *b = _cfg._blocks[i]; + tr = new Trace(b, next, prev); + traces[tr->id()] = tr; + + // All connector blocks should be at the end of the list + if (b->is_connector()) break; + + // If this block and the next one have a one-to-one successor + // predecessor relationship, simply append the next block + int nfallthru = b->num_fall_throughs(); + while (nfallthru == 1 && + b->succ_fall_through(0)) { + Block *n = b->_succs[0]; + + // Skip over single-entry connector blocks, we don't want to + // add them to the trace. + while (n->is_connector() && n->num_preds() == 1) { + n = n->_succs[0]; + } + + // We see a merge point, so stop search for the next block + if (n->num_preds() != 1) break; + + i++; + assert(n = _cfg._blocks[i], "expecting next block"); + tr->append(n); + uf->map(n->_pre_order, tr->id()); + traces[n->_pre_order] = NULL; + nfallthru = b->num_fall_throughs(); + b = n; + } + + if (nfallthru > 0) { + // Create a CFGEdge for each outgoing + // edge that could be a fall-through. + for (uint j = 0; j < b->_num_succs; j++ ) { + if (b->succ_fall_through(j)) { + Block *target = b->non_connector_successor(j); + float freq = b->_freq * b->succ_prob(j); + int from_pct = (int) ((100 * freq) / b->_freq); + int to_pct = (int) ((100 * freq) / target->_freq); + edges->append(new CFGEdge(b, target, freq, from_pct, to_pct)); + } + } + } + } + + // Group connector blocks into one trace + for (i++; i < _cfg._num_blocks; i++) { + Block *b = _cfg._blocks[i]; + assert(b->is_connector(), "connector blocks at the end"); + tr->append(b); + uf->map(b->_pre_order, tr->id()); + traces[b->_pre_order] = NULL; + } +} + +//------------------------------union_traces---------------------------------- +// Union two traces together in uf, and null out the trace in the list +void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace) +{ + uint old_id = old_trace->id(); + uint updated_id = updated_trace->id(); + + uint lo_id = updated_id; + uint hi_id = old_id; + + // If from is greater than to, swap values to meet + // UnionFind guarantee. + if (updated_id > old_id) { + lo_id = old_id; + hi_id = updated_id; + + // Fix up the trace ids + traces[lo_id] = traces[updated_id]; + updated_trace->set_id(lo_id); + } + + // Union the lower with the higher and remove the pointer + // to the higher. + uf->Union(lo_id, hi_id); + traces[hi_id] = NULL; +} + +//------------------------------grow_traces------------------------------------- +// Append traces together via the most frequently executed edges +void PhaseBlockLayout::grow_traces() +{ + // Order the edges, and drive the growth of Traces via the most + // frequently executed edges. + edges->sort(edge_order); + for (int i = 0; i < edges->length(); i++) { + CFGEdge *e = edges->at(i); + + if (e->state() != CFGEdge::open) continue; + + Block *src_block = e->from(); + Block *targ_block = e->to(); + + // Don't grow traces along backedges? + if (!BlockLayoutRotateLoops) { + if (targ_block->_rpo <= src_block->_rpo) { + targ_block->set_loop_alignment(targ_block); + continue; + } + } + + Trace *src_trace = trace(src_block); + Trace *targ_trace = trace(targ_block); + + // If the edge in question can join two traces at their ends, + // append one trace to the other. + if (src_trace->last_block() == src_block) { + if (src_trace == targ_trace) { + e->set_state(CFGEdge::interior); + if (targ_trace->backedge(e)) { + // Reset i to catch any newly eligible edge + // (Or we could remember the first "open" edge, and reset there) + i = 0; + } + } else if (targ_trace->first_block() == targ_block) { + e->set_state(CFGEdge::connected); + src_trace->append(targ_trace); + union_traces(src_trace, targ_trace); + } + } + } +} + +//------------------------------merge_traces----------------------------------- +// Embed one trace into another, if the fork or join points are sufficiently +// balanced. +void PhaseBlockLayout::merge_traces(bool fall_thru_only) +{ + // Walk the edge list a another time, looking at unprocessed edges. + // Fold in diamonds + for (int i = 0; i < edges->length(); i++) { + CFGEdge *e = edges->at(i); + + if (e->state() != CFGEdge::open) continue; + if (fall_thru_only) { + if (e->infrequent()) continue; + } + + Block *src_block = e->from(); + Trace *src_trace = trace(src_block); + bool src_at_tail = src_trace->last_block() == src_block; + + Block *targ_block = e->to(); + Trace *targ_trace = trace(targ_block); + bool targ_at_start = targ_trace->first_block() == targ_block; + + if (src_trace == targ_trace) { + // This may be a loop, but we can't do much about it. + e->set_state(CFGEdge::interior); + continue; + } + + if (fall_thru_only) { + // If the edge links the middle of two traces, we can't do anything. + // Mark the edge and continue. + if (!src_at_tail & !targ_at_start) { + continue; + } + + // Don't grow traces along backedges? + if (!BlockLayoutRotateLoops && (targ_block->_rpo <= src_block->_rpo)) { + continue; + } + + // If both ends of the edge are available, why didn't we handle it earlier? + assert(src_at_tail ^ targ_at_start, "Should have caught this edge earlier."); + + if (targ_at_start) { + // Insert the "targ" trace in the "src" trace if the insertion point + // is a two way branch. + // Better profitability check possible, but may not be worth it. + // Someday, see if the this "fork" has an associated "join"; + // then make a policy on merging this trace at the fork or join. + // For example, other things being equal, it may be better to place this + // trace at the join point if the "src" trace ends in a two-way, but + // the insertion point is one-way. + assert(src_block->num_fall_throughs() == 2, "unexpected diamond"); + e->set_state(CFGEdge::connected); + src_trace->insert_after(src_block, targ_trace); + union_traces(src_trace, targ_trace); + } else if (src_at_tail) { + if (src_trace != trace(_cfg._broot)) { + e->set_state(CFGEdge::connected); + targ_trace->insert_before(targ_block, src_trace); + union_traces(targ_trace, src_trace); + } + } + } else if (e->state() == CFGEdge::open) { + // Append traces, even without a fall-thru connection. + // But leave root entry at the begining of the block list. + if (targ_trace != trace(_cfg._broot)) { + e->set_state(CFGEdge::connected); + src_trace->append(targ_trace); + union_traces(src_trace, targ_trace); + } + } + } +} + +//----------------------------reorder_traces----------------------------------- +// Order the sequence of the traces in some desirable way, and fixup the +// jumps at the end of each block. +void PhaseBlockLayout::reorder_traces(int count) +{ + ResourceArea *area = Thread::current()->resource_area(); + Trace ** new_traces = NEW_ARENA_ARRAY(area, Trace *, count); + Block_List worklist; + int new_count = 0; + + // Compact the traces. + for (int i = 0; i < count; i++) { + Trace *tr = traces[i]; + if (tr != NULL) { + new_traces[new_count++] = tr; + } + } + + // The entry block should be first on the new trace list. + Trace *tr = trace(_cfg._broot); + assert(tr == new_traces[0], "entry trace misplaced"); + + // Sort the new trace list by frequency + qsort(new_traces + 1, new_count - 1, sizeof(new_traces[0]), trace_frequency_order); + + // Patch up the successor blocks + _cfg._blocks.reset(); + _cfg._num_blocks = 0; + for (int i = 0; i < new_count; i++) { + Trace *tr = new_traces[i]; + if (tr != NULL) { + tr->fixup_blocks(_cfg); + } + } +} + +//------------------------------PhaseBlockLayout------------------------------- +// Order basic blocks based on frequency +PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) : + Phase(BlockLayout), + _cfg(cfg) +{ + ResourceMark rm; + ResourceArea *area = Thread::current()->resource_area(); + + // List of traces + int size = _cfg._num_blocks + 1; + traces = NEW_ARENA_ARRAY(area, Trace *, size); + memset(traces, 0, size*sizeof(Trace*)); + next = NEW_ARENA_ARRAY(area, Block *, size); + memset(next, 0, size*sizeof(Block *)); + prev = NEW_ARENA_ARRAY(area, Block *, size); + memset(prev , 0, size*sizeof(Block *)); + + // List of edges + edges = new GrowableArray; + + // Mapping block index --> block_trace + uf = new UnionFind(size); + uf->reset(size); + + // Find edges and create traces. + find_edges(); + + // Grow traces at their ends via most frequent edges. + grow_traces(); + + // Merge one trace into another, but only at fall-through points. + // This may make diamonds and other related shapes in a trace. + merge_traces(true); + + // Run merge again, allowing two traces to be catenated, even if + // one does not fall through into the other. This appends loosely + // related traces to be near each other. + merge_traces(false); + + // Re-order all the remaining traces by frequency + reorder_traces(size); + + assert(_cfg._num_blocks >= (uint) (size - 1), "number of blocks can not shrink"); +} + + +//------------------------------backedge--------------------------------------- +// Edge e completes a loop in a trace. If the target block is head of the +// loop, rotate the loop block so that the loop ends in a conditional branch. +bool Trace::backedge(CFGEdge *e) { + bool loop_rotated = false; + Block *src_block = e->from(); + Block *targ_block = e->to(); + + assert(last_block() == src_block, "loop discovery at back branch"); + if (first_block() == targ_block) { + if (BlockLayoutRotateLoops && last_block()->num_fall_throughs() < 2) { + // Find the last block in the trace that has a conditional + // branch. + Block *b; + for (b = last_block(); b != NULL; b = prev(b)) { + if (b->num_fall_throughs() == 2) { + break; + } + } + + if (b != last_block() && b != NULL) { + loop_rotated = true; + + // Rotate the loop by doing two-part linked-list surgery. + append(first_block()); + break_loop_after(b); + } + } + + // Backbranch to the top of a trace + // Scroll foward through the trace from the targ_block. If we find + // a loop head before another loop top, use the the loop head alignment. + for (Block *b = targ_block; b != NULL; b = next(b)) { + if (b->has_loop_alignment()) { + break; + } + if (b->head()->is_Loop()) { + targ_block = b; + break; + } + } + + first_block()->set_loop_alignment(targ_block); + + } else { + // Backbranch into the middle of a trace + targ_block->set_loop_alignment(targ_block); + } + + return loop_rotated; +} + +//------------------------------fixup_blocks----------------------------------- +// push blocks onto the CFG list +// ensure that blocks have the correct two-way branch sense +void Trace::fixup_blocks(PhaseCFG &cfg) { + Block *last = last_block(); + for (Block *b = first_block(); b != NULL; b = next(b)) { + cfg._blocks.push(b); + cfg._num_blocks++; + if (!b->is_connector()) { + int nfallthru = b->num_fall_throughs(); + if (b != last) { + if (nfallthru == 2) { + // Ensure that the sense of the branch is correct + Block *bnext = next(b); + Block *bs0 = b->non_connector_successor(0); + + MachNode *iff = b->_nodes[b->_nodes.size()-3]->as_Mach(); + ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj(); + ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj(); + + if (bnext == bs0) { + // Fall-thru case in succs[0], should be in succs[1] + + // Flip targets in _succs map + Block *tbs0 = b->_succs[0]; + Block *tbs1 = b->_succs[1]; + b->_succs.map( 0, tbs1 ); + b->_succs.map( 1, tbs0 ); + + // Flip projections to match targets + b->_nodes.map(b->_nodes.size()-2, proj1); + b->_nodes.map(b->_nodes.size()-1, proj0); + } + } + } + } + } +} diff -r f4fe12e429a4 -r 72c5366e5d86 src/share/vm/opto/block.hpp --- a/src/share/vm/opto/block.hpp Thu Oct 30 17:08:48 2008 -0700 +++ b/src/share/vm/opto/block.hpp Thu Nov 06 14:59:10 2008 -0800 @@ -75,6 +75,7 @@ void insert( uint i, Block *n ); uint size() const { return _cnt; } void reset() { _cnt = 0; } + void print(); }; @@ -129,7 +130,11 @@ uint _rpo; // Number in reverse post order walk virtual bool is_block() { return true; } - float succ_prob(uint i); // return probability of i'th successor + float succ_prob(uint i); // return probability of i'th successor + int num_fall_throughs(); // How many fall-through candidate this block has + void update_uncommon_branch(Block* un); // Lower branch prob to uncommon code + bool succ_fall_through(uint i); // Is successor "i" is a fall-through candidate + Block* lone_fall_through(); // Return lone fall-through Block or null Block* dom_lca(Block* that); // Compute LCA in dominator tree. #ifdef ASSERT @@ -144,6 +149,7 @@ // Report the alignment required by this block. Must be a power of 2. // The previous block will insert nops to get this alignment. uint code_alignment(); + uint compute_loop_alignment(); // BLOCK_FREQUENCY is a sentinel to mark uses of constant block frequencies. // It is currently also used to scale such frequencies relative to @@ -184,11 +190,12 @@ int current_alignment = current_offset & max_pad; if( current_alignment != 0 ) { uint padding = (block_alignment-current_alignment) & max_pad; - if( !head()->is_Loop() || - padding <= (uint)MaxLoopPad || - first_inst_size() > padding ) { - return padding; + if( has_loop_alignment() && + padding > (uint)MaxLoopPad && + first_inst_size() <= padding ) { + return 0; } + return padding; } } return 0; @@ -202,6 +209,21 @@ void set_connector() { _connector = true; } bool is_connector() const { return _connector; }; + // Loop_alignment will be set for blocks which are at the top of loops. + // The block layout pass may rotate loops such that the loop head may not + // be the sequentially first block of the loop encountered in the linear + // list of blocks. If the layout pass is not run, loop alignment is set + // for each block which is the head of a loop. + uint _loop_alignment; + void set_loop_alignment(Block *loop_top) { + uint new_alignment = loop_top->compute_loop_alignment(); + if (new_alignment > _loop_alignment) { + _loop_alignment = new_alignment; + } + } + uint loop_alignment() const { return _loop_alignment; } + bool has_loop_alignment() const { return loop_alignment() > 0; } + // Create a new Block with given head Node. // Creates the (empty) predecessor arrays. Block( Arena *a, Node *headnode ) @@ -219,7 +241,8 @@ _raise_LCA_mark(0), _raise_LCA_visited(0), _first_inst_size(999999), - _connector(false) { + _connector(false), + _loop_alignment(0) { _nodes.push(headnode); } @@ -275,6 +298,16 @@ return s; } + // Return true if b is a successor of this block + bool has_successor(Block* b) const { + for (uint i = 0; i < _num_succs; i++ ) { + if (non_connector_successor(i) == b) { + return true; + } + } + return false; + } + // Successor block, after forwarding through connectors Block* non_connector_successor(int i) const { return _succs[i]->non_connector(); @@ -319,7 +352,6 @@ // I'll need a few machine-specific GotoNodes. Clone from this one. MachNode *_goto; - void insert_goto_at(uint block_no, uint succ_no); Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false); void verify_anti_dependences(Block* LCA, Node* load) { @@ -379,10 +411,15 @@ // Compute the instruction global latency with a backwards walk void ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack); + // Set loop alignment + void set_loop_alignment(); + // Remove empty basic blocks - void RemoveEmpty(); - bool MoveToNext(Block* bx, uint b_index); - void MoveToEnd(Block* bx, uint b_index); + void remove_empty(); + void fixup_flow(); + bool move_to_next(Block* bx, uint b_index); + void move_to_end(Block* bx, uint b_index); + void insert_goto_at(uint block_no, uint succ_no); // Check for NeverBranch at block end. This needs to become a GOTO to the // true target. NeverBranch are treated as a conditional branch that always @@ -413,7 +450,7 @@ }; -//------------------------------UnionFindInfo---------------------------------- +//------------------------------UnionFind-------------------------------------- // Map Block indices to a block-index for a cfg-cover. // Array lookup in the optimized case. class UnionFind : public ResourceObj { @@ -508,3 +545,166 @@ void dump_tree() const; #endif }; + + +//----------------------------------CFGEdge------------------------------------ +// A edge between two basic blocks that will be embodied by a branch or a +// fall-through. +class CFGEdge : public ResourceObj { + private: + Block * _from; // Source basic block + Block * _to; // Destination basic block + float _freq; // Execution frequency (estimate) + int _state; + bool _infrequent; + int _from_pct; + int _to_pct; + + // Private accessors + int from_pct() const { return _from_pct; } + int to_pct() const { return _to_pct; } + int from_infrequent() const { return from_pct() < BlockLayoutMinDiamondPercentage; } + int to_infrequent() const { return to_pct() < BlockLayoutMinDiamondPercentage; } + + public: + enum { + open, // initial edge state; unprocessed + connected, // edge used to connect two traces together + interior // edge is interior to trace (could be backedge) + }; + + CFGEdge(Block *from, Block *to, float freq, int from_pct, int to_pct) : + _from(from), _to(to), _freq(freq), + _from_pct(from_pct), _to_pct(to_pct), _state(open) { + _infrequent = from_infrequent() || to_infrequent(); + } + + float freq() const { return _freq; } + Block* from() const { return _from; } + Block* to () const { return _to; } + int infrequent() const { return _infrequent; } + int state() const { return _state; } + + void set_state(int state) { _state = state; } + +#ifndef PRODUCT + void dump( ) const; +#endif +}; + + +//-----------------------------------Trace------------------------------------- +// An ordered list of basic blocks. +class Trace : public ResourceObj { + private: + uint _id; // Unique Trace id (derived from initial block) + Block ** _next_list; // Array mapping index to next block + Block ** _prev_list; // Array mapping index to previous block + Block * _first; // First block in the trace + Block * _last; // Last block in the trace + + // Return the block that follows "b" in the trace. + Block * next(Block *b) const { return _next_list[b->_pre_order]; } + void set_next(Block *b, Block *n) const { _next_list[b->_pre_order] = n; } + + // Return the block that preceeds "b" in the trace. + Block * prev(Block *b) const { return _prev_list[b->_pre_order]; } + void set_prev(Block *b, Block *p) const { _prev_list[b->_pre_order] = p; } + + // We've discovered a loop in this trace. Reset last to be "b", and first as + // the block following "b + void break_loop_after(Block *b) { + _last = b; + _first = next(b); + set_prev(_first, NULL); + set_next(_last, NULL); + } + + public: + + Trace(Block *b, Block **next_list, Block **prev_list) : + _first(b), + _last(b), + _next_list(next_list), + _prev_list(prev_list), + _id(b->_pre_order) { + set_next(b, NULL); + set_prev(b, NULL); + }; + + // Return the id number + uint id() const { return _id; } + void set_id(uint id) { _id = id; } + + // Return the first block in the trace + Block * first_block() const { return _first; } + + // Return the last block in the trace + Block * last_block() const { return _last; } + + // Insert a trace in the middle of this one after b + void insert_after(Block *b, Trace *tr) { + set_next(tr->last_block(), next(b)); + if (next(b) != NULL) { + set_prev(next(b), tr->last_block()); + } + + set_next(b, tr->first_block()); + set_prev(tr->first_block(), b); + + if (b == _last) { + _last = tr->last_block(); + } + } + + void insert_before(Block *b, Trace *tr) { + Block *p = prev(b); + assert(p != NULL, "use append instead"); + insert_after(p, tr); + } + + // Append another trace to this one. + void append(Trace *tr) { + insert_after(_last, tr); + } + + // Append a block at the end of this trace + void append(Block *b) { + set_next(_last, b); + set_prev(b, _last); + _last = b; + } + + // Adjust the the blocks in this trace + void fixup_blocks(PhaseCFG &cfg); + bool backedge(CFGEdge *e); + +#ifndef PRODUCT + void dump( ) const; +#endif +}; + +//------------------------------PhaseBlockLayout------------------------------- +// Rearrange blocks into some canonical order, based on edges and their frequencies +class PhaseBlockLayout : public Phase { + PhaseCFG &_cfg; // Control flow graph + + GrowableArray *edges; + Trace **traces; + Block **next; + Block **prev; + UnionFind *uf; + + // Given a block, find its encompassing Trace + Trace * trace(Block *b) { + return traces[uf->Find_compress(b->_pre_order)]; + } + public: + PhaseBlockLayout(PhaseCFG &cfg); + + void find_edges(); + void grow_traces(); + void merge_traces(bool loose_connections); + void reorder_traces(int count); + void union_traces(Trace* from, Trace* to); +}; diff -r f4fe12e429a4 -r 72c5366e5d86 src/share/vm/opto/c2_globals.hpp --- a/src/share/vm/opto/c2_globals.hpp Thu Oct 30 17:08:48 2008 -0700 +++ b/src/share/vm/opto/c2_globals.hpp Thu Nov 06 14:59:10 2008 -0800 @@ -396,5 +396,15 @@ \ diagnostic(intx, DominatorSearchLimit, 1000, \ "Iterations limit in Node::dominates") \ + \ + product(bool, BlockLayoutByFrequency, true, \ + "Use edge frequencies to drive block ordering") \ + \ + product(intx, BlockLayoutMinDiamondPercentage, 20, \ + "Miniumum %% of a successor (predecessor) for which block layout "\ + "a will allow a fork (join) in a single chain") \ + \ + product(bool, BlockLayoutRotateLoops, false, \ + "Allow back branches to be fall throughs in the block layour") \ C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG) diff -r f4fe12e429a4 -r 72c5366e5d86 src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Thu Oct 30 17:08:48 2008 -0700 +++ b/src/share/vm/opto/compile.cpp Thu Nov 06 14:59:10 2008 -0800 @@ -822,6 +822,7 @@ Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist)); set_decompile_count(0); + set_do_freq_based_layout(BlockLayoutByFrequency || method_has_option("BlockLayoutByFrequency")); // Compilation level related initialization if (env()->comp_level() == CompLevel_fast_compile) { set_num_loop_opts(Tier1LoopOptsCount); @@ -1701,8 +1702,14 @@ // are not adding any new instructions. If any basic block is empty, we // can now safely remove it. { - NOT_PRODUCT( TracePhase t2("removeEmpty", &_t_removeEmptyBlocks, TimeCompiler); ) - cfg.RemoveEmpty(); + NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); ) + cfg.remove_empty(); + if (do_freq_based_layout()) { + PhaseBlockLayout layout(cfg); + } else { + cfg.set_loop_alignment(); + } + cfg.fixup_flow(); } // Perform any platform dependent postallocation verifications. diff -r f4fe12e429a4 -r 72c5366e5d86 src/share/vm/opto/compile.hpp --- a/src/share/vm/opto/compile.hpp Thu Oct 30 17:08:48 2008 -0700 +++ b/src/share/vm/opto/compile.hpp Thu Nov 06 14:59:10 2008 -0800 @@ -154,6 +154,7 @@ uint _decompile_count; // Cumulative decompilation counts. bool _do_inlining; // True if we intend to do inlining bool _do_scheduling; // True if we intend to do scheduling + bool _do_freq_based_layout; // True if we intend to do frequency based block layout bool _do_count_invocations; // True if we generate code to count invocations bool _do_method_data_update; // True if we generate code to update methodDataOops int _AliasLevel; // Locally-adjusted version of AliasLevel flag. @@ -307,6 +308,8 @@ void set_do_inlining(bool z) { _do_inlining = z; } bool do_scheduling() const { return _do_scheduling; } void set_do_scheduling(bool z) { _do_scheduling = z; } + bool do_freq_based_layout() const{ return _do_freq_based_layout; } + void set_do_freq_based_layout(bool z){ _do_freq_based_layout = z; } bool do_count_invocations() const{ return _do_count_invocations; } void set_do_count_invocations(bool z){ _do_count_invocations = z; } bool do_method_data_update() const { return _do_method_data_update; } diff -r f4fe12e429a4 -r 72c5366e5d86 src/share/vm/opto/gcm.cpp --- a/src/share/vm/opto/gcm.cpp Thu Oct 30 17:08:48 2008 -0700 +++ b/src/share/vm/opto/gcm.cpp Thu Nov 06 14:59:10 2008 -0800 @@ -1319,11 +1319,33 @@ //------------------------------Estimate_Block_Frequency----------------------- // Estimate block frequencies based on IfNode probabilities. void PhaseCFG::Estimate_Block_Frequency() { - int cnts = C->method() ? C->method()->interpreter_invocation_count() : 1; - // Most of our algorithms will die horribly if frequency can become - // negative so make sure cnts is a sane value. - if( cnts <= 0 ) cnts = 1; - float f = (float)cnts/(float)FreqCountInvocations; + + // Force conditional branches leading to uncommon traps to be unlikely, + // not because we get to the uncommon_trap with less relative frequency, + // but because an uncommon_trap typically causes a deopt, so we only get + // there once. + if (C->do_freq_based_layout()) { + Block_List worklist; + Block* root_blk = _blocks[0]; + for (uint i = 1; i < root_blk->num_preds(); i++) { + Block *pb = _bbs[root_blk->pred(i)->_idx]; + if (pb->has_uncommon_code()) { + worklist.push(pb); + } + } + while (worklist.size() > 0) { + Block* uct = worklist.pop(); + if (uct == _broot) continue; + for (uint i = 1; i < uct->num_preds(); i++) { + Block *pb = _bbs[uct->pred(i)->_idx]; + if (pb->_num_succs == 1) { + worklist.push(pb); + } else if (pb->num_fall_throughs() == 2) { + pb->update_uncommon_branch(uct); + } + } + } + } // Create the loop tree and calculate loop depth. _root_loop = create_loop_tree(); @@ -1333,25 +1355,27 @@ _root_loop->compute_freq(); // Adjust all frequencies to be relative to a single method entry - _root_loop->_freq = f * 1.0; + _root_loop->_freq = 1.0; _root_loop->scale_freq(); // force paths ending at uncommon traps to be infrequent - Block_List worklist; - Block* root_blk = _blocks[0]; - for (uint i = 0; i < root_blk->num_preds(); i++) { - Block *pb = _bbs[root_blk->pred(i)->_idx]; - if (pb->has_uncommon_code()) { - worklist.push(pb); + if (!C->do_freq_based_layout()) { + Block_List worklist; + Block* root_blk = _blocks[0]; + for (uint i = 1; i < root_blk->num_preds(); i++) { + Block *pb = _bbs[root_blk->pred(i)->_idx]; + if (pb->has_uncommon_code()) { + worklist.push(pb); + } } - } - while (worklist.size() > 0) { - Block* uct = worklist.pop(); - uct->_freq = PROB_MIN; - for (uint i = 0; i < uct->num_preds(); i++) { - Block *pb = _bbs[uct->pred(i)->_idx]; - if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { - worklist.push(pb); + while (worklist.size() > 0) { + Block* uct = worklist.pop(); + uct->_freq = PROB_MIN; + for (uint i = 1; i < uct->num_preds(); i++) { + Block *pb = _bbs[uct->pred(i)->_idx]; + if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { + worklist.push(pb); + } } } } @@ -1556,22 +1580,6 @@ } } -#if 0 - // Raise frequency of the loop backedge block, in an effort - // to keep it empty. Skip the method level "loop". - if (_parent != NULL) { - CFGElement* s = _members.at(_members.length() - 1); - if (s->is_block()) { - Block* bk = s->as_Block(); - if (bk->_num_succs == 1 && bk->_succs[0] == hd) { - // almost any value >= 1.0f works - // FIXME: raw constant - bk->_freq = 1.05f; - } - } - } -#endif - // For all loops other than the outer, "method" loop, // sum and normalize the exit probability. The "method" loop // should keep the initial exit probability of 1, so that @@ -1589,12 +1597,15 @@ // the probability of exit per loop entry. for (int i = 0; i < _exits.length(); i++) { Block* et = _exits.at(i).get_target(); - float new_prob = _exits.at(i).get_prob() / exits_sum; + float new_prob = 0.0f; + if (_exits.at(i).get_prob() > 0.0f) { + new_prob = _exits.at(i).get_prob() / exits_sum; + } BlockProbPair bpp(et, new_prob); _exits.at_put(i, bpp); } - // Save the total, but guard against unreasoable probability, + // Save the total, but guard against unreasonable probability, // as the value is used to estimate the loop trip count. // An infinite trip count would blur relative block // frequencies. @@ -1688,6 +1699,137 @@ return 0.0f; } +//------------------------------num_fall_throughs----------------------------- +// Return the number of fall-through candidates for a block +int Block::num_fall_throughs() { + int eidx = end_idx(); + Node *n = _nodes[eidx]; // Get ending Node + + int op = n->Opcode(); + if (n->is_Mach()) { + if (n->is_MachNullCheck()) { + // In theory, either side can fall-thru, for simplicity sake, + // let's say only the false branch can now. + return 1; + } + op = n->as_Mach()->ideal_Opcode(); + } + + // Switch on branch type + switch( op ) { + case Op_CountedLoopEnd: + case Op_If: + return 2; + + case Op_Root: + case Op_Goto: + return 1; + + case Op_Catch: { + for (uint i = 0; i < _num_succs; i++) { + const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj(); + if (ci->_con == CatchProjNode::fall_through_index) { + return 1; + } + } + return 0; + } + + case Op_Jump: + case Op_NeverBranch: + case Op_TailCall: + case Op_TailJump: + case Op_Return: + case Op_Halt: + case Op_Rethrow: + return 0; + + default: + ShouldNotReachHere(); + } + + return 0; +} + +//------------------------------succ_fall_through----------------------------- +// Return true if a specific successor could be fall-through target. +bool Block::succ_fall_through(uint i) { + int eidx = end_idx(); + Node *n = _nodes[eidx]; // Get ending Node + + int op = n->Opcode(); + if (n->is_Mach()) { + if (n->is_MachNullCheck()) { + // In theory, either side can fall-thru, for simplicity sake, + // let's say only the false branch can now. + return _nodes[i + eidx + 1]->Opcode() == Op_IfFalse; + } + op = n->as_Mach()->ideal_Opcode(); + } + + // Switch on branch type + switch( op ) { + case Op_CountedLoopEnd: + case Op_If: + case Op_Root: + case Op_Goto: + return true; + + case Op_Catch: { + const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj(); + return ci->_con == CatchProjNode::fall_through_index; + } + + case Op_Jump: + case Op_NeverBranch: + case Op_TailCall: + case Op_TailJump: + case Op_Return: + case Op_Halt: + case Op_Rethrow: + return false; + + default: + ShouldNotReachHere(); + } + + return false; +} + +//------------------------------update_uncommon_branch------------------------ +// Update the probability of a two-branch to be uncommon +void Block::update_uncommon_branch(Block* ub) { + int eidx = end_idx(); + Node *n = _nodes[eidx]; // Get ending Node + + int op = n->as_Mach()->ideal_Opcode(); + + assert(op == Op_CountedLoopEnd || op == Op_If, "must be a If"); + assert(num_fall_throughs() == 2, "must be a two way branch block"); + + // Which successor is ub? + uint s; + for (s = 0; s <_num_succs; s++) { + if (_succs[s] == ub) break; + } + assert(s < 2, "uncommon successor must be found"); + + // If ub is the true path, make the proability small, else + // ub is the false path, and make the probability large + bool invert = (_nodes[s + eidx + 1]->Opcode() == Op_IfFalse); + + // Get existing probability + float p = n->as_MachIf()->_prob; + + if (invert) p = 1.0 - p; + if (p > PROB_MIN) { + p = PROB_MIN; + } + if (invert) p = 1.0 - p; + + n->as_MachIf()->_prob = p; +} + //------------------------------update_succ_freq------------------------------- // Update the appropriate frequency associated with block 'b', a succesor of // a block in this loop. diff -r f4fe12e429a4 -r 72c5366e5d86 src/share/vm/opto/output.cpp --- a/src/share/vm/opto/output.cpp Thu Oct 30 17:08:48 2008 -0700 +++ b/src/share/vm/opto/output.cpp Thu Nov 06 14:59:10 2008 -0800 @@ -263,7 +263,7 @@ # endif // ENABLE_ZAP_DEAD_LOCALS //------------------------------compute_loop_first_inst_sizes------------------ -// Compute the size of first NumberOfLoopInstrToAlign instructions at head +// Compute the size of first NumberOfLoopInstrToAlign instructions at the top // of a loop. When aligning a loop we need to provide enough instructions // in cpu's fetch buffer to feed decoders. The loop alignment could be // avoided if we have enough instructions in fetch buffer at the head of a loop. @@ -284,34 +284,23 @@ for( uint i=1; i <= last_block; i++ ) { Block *b = _cfg->_blocks[i]; // Check the first loop's block which requires an alignment. - if( b->head()->is_Loop() && - b->code_alignment() > (uint)relocInfo::addr_unit() ) { + if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) { uint sum_size = 0; uint inst_cnt = NumberOfLoopInstrToAlign; - inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, - _regalloc); - // Check the next fallthrough block if first loop's block does not have - // enough instructions. - if( inst_cnt > 0 && i < last_block ) { - // First, check if the first loop's block contains whole loop. - // LoopNode::LoopBackControl == 2. - Block *bx = _cfg->_bbs[b->pred(2)->_idx]; - // Skip connector blocks (with limit in case of irreducible loops). - int search_limit = 16; - while( bx->is_connector() && search_limit-- > 0) { - bx = _cfg->_bbs[bx->pred(1)->_idx]; - } - if( bx != b ) { // loop body is in several blocks. - Block *nb = NULL; - while( inst_cnt > 0 && i < last_block && nb != bx && - !_cfg->_blocks[i+1]->head()->is_Loop() ) { - i++; - nb = _cfg->_blocks[i]; - inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, - _regalloc); - } // while( inst_cnt > 0 && i < last_block ) - } // if( bx != b ) - } // if( inst_cnt > 0 && i < last_block ) + inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc); + + // Check subsequent fallthrough blocks if the loop's first + // block(s) does not have enough instructions. + Block *nb = b; + while( inst_cnt > 0 && + i < last_block && + !_cfg->_blocks[i+1]->has_loop_alignment() && + !nb->has_successor(b) ) { + i++; + nb = _cfg->_blocks[i]; + inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc); + } // while( inst_cnt > 0 && i < last_block ) + b->set_first_inst_size(sum_size); } // f( b->head()->is_Loop() ) } // for( i <= last_block ) @@ -512,7 +501,7 @@ // Get the size of the block uint blk_size = adr - blk_starts[i]; - // When the next block starts a loop, we may insert pad NOP + // When the next block is the top of a loop, we may insert pad NOP // instructions. Block *nb = _cfg->_blocks[i+1]; int current_offset = blk_starts[i] + blk_size; @@ -1382,8 +1371,8 @@ } // End for all instructions in block - // If the next block _starts_ a loop, pad this block out to align - // the loop start a little. Helps prevent pipe stalls at loop starts + // If the next block is the top of a loop, pad this block out to align + // the loop top a little. Helps prevent pipe stalls at loop back branches. int nop_size = (new (this) MachNopNode())->size(_regalloc); if( i<_cfg->_num_blocks-1 ) { Block *nb = _cfg->_blocks[i+1]; diff -r f4fe12e429a4 -r 72c5366e5d86 src/share/vm/opto/phase.cpp --- a/src/share/vm/opto/phase.cpp Thu Oct 30 17:08:48 2008 -0700 +++ b/src/share/vm/opto/phase.cpp Thu Nov 06 14:59:10 2008 -0800 @@ -46,7 +46,7 @@ #ifndef PRODUCT elapsedTimer Phase::_t_graphReshaping; elapsedTimer Phase::_t_scheduler; -elapsedTimer Phase::_t_removeEmptyBlocks; +elapsedTimer Phase::_t_blockOrdering; elapsedTimer Phase::_t_macroExpand; elapsedTimer Phase::_t_peephole; elapsedTimer Phase::_t_codeGeneration; @@ -128,7 +128,7 @@ tty->print_cr (" subtotal : %3.3f sec, %3.2f %%", regalloc_subtotal, percent_of_regalloc); } tty->print_cr (" macroExpand : %3.3f sec", Phase::_t_macroExpand.seconds()); - tty->print_cr (" removeEmpty : %3.3f sec", Phase::_t_removeEmptyBlocks.seconds()); + tty->print_cr (" blockOrdering: %3.3f sec", Phase::_t_blockOrdering.seconds()); tty->print_cr (" peephole : %3.3f sec", Phase::_t_peephole.seconds()); tty->print_cr (" codeGen : %3.3f sec", Phase::_t_codeGeneration.seconds()); tty->print_cr (" install_code : %3.3f sec", Phase::_t_registerMethod.seconds()); @@ -137,7 +137,7 @@ (DoEscapeAnalysis ? Phase::_t_escapeAnalysis.seconds() : 0.0) + Phase::_t_optimizer.seconds() + Phase::_t_graphReshaping.seconds() + Phase::_t_matcher.seconds() + Phase::_t_scheduler.seconds() + - Phase::_t_registerAllocation.seconds() + Phase::_t_removeEmptyBlocks.seconds() + + Phase::_t_registerAllocation.seconds() + Phase::_t_blockOrdering.seconds() + Phase::_t_macroExpand.seconds() + Phase::_t_peephole.seconds() + Phase::_t_codeGeneration.seconds() + Phase::_t_registerMethod.seconds(); double percent_of_method_compile = ((phase_subtotal == 0.0) ? 0.0 : phase_subtotal / Phase::_t_methodCompilation.seconds()) * 100.0; diff -r f4fe12e429a4 -r 72c5366e5d86 src/share/vm/opto/phase.hpp --- a/src/share/vm/opto/phase.hpp Thu Oct 30 17:08:48 2008 -0700 +++ b/src/share/vm/opto/phase.hpp Thu Nov 06 14:59:10 2008 -0800 @@ -40,16 +40,12 @@ Optimistic, // Optimistic analysis phase GVN, // Pessimistic global value numbering phase Ins_Select, // Instruction selection phase - Copy_Elimination, // Copy Elimination - Dead_Code_Elimination, // DCE and compress Nodes - Conditional_Constant, // Conditional Constant Propagation CFG, // Build a CFG - DefUse, // Build Def->Use chains + BlockLayout, // Linear ordering of blocks Register_Allocation, // Register allocation, duh LIVE, // Dragon-book LIVE range problem Interference_Graph, // Building the IFG Coalesce, // Coalescing copies - Conditional_CProp, // Conditional Constant Propagation Ideal_Loop, // Find idealized trip-counted loops Macro_Expand, // Expand macro nodes Peephole, // Apply peephole optimizations @@ -80,7 +76,7 @@ #ifndef PRODUCT static elapsedTimer _t_graphReshaping; static elapsedTimer _t_scheduler; - static elapsedTimer _t_removeEmptyBlocks; + static elapsedTimer _t_blockOrdering; static elapsedTimer _t_macroExpand; static elapsedTimer _t_peephole; static elapsedTimer _t_codeGeneration;