diff src/share/vm/opto/block.hpp @ 14412:e2722a66aba7

Merge
author kvn
date Thu, 05 Sep 2013 11:04:39 -0700
parents adb9a7d94cb5
children 650868c062a9
line wrap: on
line diff
--- a/src/share/vm/opto/block.hpp	Thu Aug 22 09:39:54 2013 -0700
+++ b/src/share/vm/opto/block.hpp	Thu Sep 05 11:04:39 2013 -0700
@@ -48,13 +48,12 @@
   friend class VMStructs;
   uint _size;                   // allocated size, as opposed to formal limit
   debug_only(uint _limit;)      // limit to formal domain
+  Arena *_arena;                // Arena to allocate in
 protected:
   Block **_blocks;
   void grow( uint i );          // Grow array node to fit
 
 public:
-  Arena *_arena;                // Arena to allocate in
-
   Block_Array(Arena *a) : _arena(a), _size(OptoBlockListSize) {
     debug_only(_limit=0);
     _blocks = NEW_ARENA_ARRAY( a, Block *, OptoBlockListSize );
@@ -77,7 +76,7 @@
 public:
   uint _cnt;
   Block_List() : Block_Array(Thread::current()->resource_area()), _cnt(0) {}
-  void push( Block *b ) { map(_cnt++,b); }
+  void push( Block *b ) {  map(_cnt++,b); }
   Block *pop() { return _blocks[--_cnt]; }
   Block *rpop() { Block *b = _blocks[0]; _blocks[0]=_blocks[--_cnt]; return b;}
   void remove( uint i );
@@ -284,15 +283,15 @@
   // helper function that adds caller save registers to MachProjNode
   void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe);
   // Schedule a call next in the block
-  uint sched_call(Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call);
+  uint sched_call(Matcher &matcher, PhaseCFG* cfg, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call);
 
   // Perform basic-block local scheduling
   Node *select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot);
-  void set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs );
-  void needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs);
+  void set_next_call( Node *n, VectorSet &next_call, PhaseCFG* cfg);
+  void needed_for_next_call(Node *this_call, VectorSet &next_call, PhaseCFG* cfg);
   bool schedule_local(PhaseCFG *cfg, Matcher &m, GrowableArray<int> &ready_cnt, VectorSet &next_call);
   // Cleanup if any code lands between a Call and his Catch
-  void call_catch_cleanup(Block_Array &bbs, Compile *C);
+  void call_catch_cleanup(PhaseCFG* cfg, Compile *C);
   // Detect implicit-null-check opportunities.  Basically, find NULL checks
   // with suitable memory ops nearby.  Use the memory op to do the NULL check.
   // I can generate a memory op if there is not one nearby.
@@ -331,15 +330,15 @@
 
   // Use frequency calculations and code shape to predict if the block
   // is uncommon.
-  bool is_uncommon( Block_Array &bbs ) const;
+  bool is_uncommon(PhaseCFG* cfg) const;
 
 #ifndef PRODUCT
   // Debugging print of basic block
   void dump_bidx(const Block* orig, outputStream* st = tty) const;
-  void dump_pred(const Block_Array *bbs, Block* orig, outputStream* st = tty) const;
-  void dump_head( const Block_Array *bbs, outputStream* st = tty ) const;
+  void dump_pred(const PhaseCFG* cfg, Block* orig, outputStream* st = tty) const;
+  void dump_head(const PhaseCFG* cfg, outputStream* st = tty) const;
   void dump() const;
-  void dump( const Block_Array *bbs ) const;
+  void dump(const PhaseCFG* cfg) const;
 #endif
 };
 
@@ -349,14 +348,77 @@
 class PhaseCFG : public Phase {
   friend class VMStructs;
  private:
+
+  // Root of whole program
+  RootNode* _root;
+
+  // The block containing the root node
+  Block* _root_block;
+
+  // List of basic blocks that are created during CFG creation
+  Block_List _blocks;
+
+  // Count of basic blocks
+  uint _number_of_blocks;
+
+  // Arena for the blocks to be stored in
+  Arena* _block_arena;
+
+  // The matcher for this compilation
+  Matcher& _matcher;
+
+  // Map nodes to owning basic block
+  Block_Array _node_to_block_mapping;
+
+  // Loop from the root
+  CFGLoop* _root_loop;
+
+  // Outmost loop frequency
+  float _outer_loop_frequency;
+
+  // Per node latency estimation, valid only during GCM
+  GrowableArray<uint>* _node_latency;
+
   // Build a proper looking cfg.  Return count of basic blocks
   uint build_cfg();
 
-  // Perform DFS search.
+  // Build the dominator tree so that we know where we can move instructions
+  void build_dominator_tree();
+
+  // Estimate block frequencies based on IfNode probabilities, so that we know where we want to move instructions
+  void estimate_block_frequency();
+
+  // Global Code Motion.  See Click's PLDI95 paper.  Place Nodes in specific
+  // basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block.
+  // Move nodes to ensure correctness from GVN and also try to move nodes out of loops.
+  void global_code_motion();
+
+  // Schedule Nodes early in their basic blocks.
+  bool schedule_early(VectorSet &visited, Node_List &roots);
+
+  // For each node, find the latest block it can be scheduled into
+  // and then select the cheapest block between the latest and earliest
+  // block to place the node.
+  void schedule_late(VectorSet &visited, Node_List &stack);
+
+  // Compute the (backwards) latency of a node from a single use
+  int latency_from_use(Node *n, const Node *def, Node *use);
+
+  // Compute the (backwards) latency of a node from the uses of this instruction
+  void partial_latency_of_defs(Node *n);
+
+  // Compute the instruction global latency with a backwards walk
+  void compute_latencies_backwards(VectorSet &visited, Node_List &stack);
+
+  // Pick a block between early and late that is a cheaper alternative
+  // to late. Helper for schedule_late.
+  Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
+
+  // Perform a Depth First Search (DFS).
   // Setup 'vertex' as DFS to vertex mapping.
   // Setup 'semi' as vertex to DFS mapping.
   // Set 'parent' to DFS parent.
-  uint DFS( Tarjan *tarjan );
+  uint do_DFS(Tarjan* tarjan, uint rpo_counter);
 
   // Helper function to insert a node into a block
   void schedule_node_into_block( Node *n, Block *b );
@@ -367,79 +429,18 @@
   void schedule_pinned_nodes( VectorSet &visited );
 
   // I'll need a few machine-specific GotoNodes.  Clone from this one.
-  MachNode *_goto;
+  // Used when building the CFG and creating end nodes for blocks.
+  MachNode* _goto;
 
   Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false);
   void verify_anti_dependences(Block* LCA, Node* load) {
-    assert(LCA == _bbs[load->_idx], "should already be scheduled");
+    assert(LCA == get_block_for_node(load), "should already be scheduled");
     insert_anti_dependences(LCA, load, true);
   }
 
- public:
-  PhaseCFG( Arena *a, RootNode *r, Matcher &m );
-
-  uint _num_blocks;             // Count of basic blocks
-  Block_List _blocks;           // List of basic blocks
-  RootNode *_root;              // Root of whole program
-  Block_Array _bbs;             // Map Nodes to owning Basic Block
-  Block *_broot;                // Basic block of root
-  uint _rpo_ctr;
-  CFGLoop* _root_loop;
-  float _outer_loop_freq;       // Outmost loop frequency
-
-  // Per node latency estimation, valid only during GCM
-  GrowableArray<uint> *_node_latency;
-
-#ifndef PRODUCT
-  bool _trace_opto_pipelining;  // tracing flag
-#endif
-
-#ifdef ASSERT
-  Unique_Node_List _raw_oops;
-#endif
-
-  // Build dominators
-  void Dominators();
-
-  // Estimate block frequencies based on IfNode probabilities
-  void Estimate_Block_Frequency();
-
-  // Global Code Motion.  See Click's PLDI95 paper.  Place Nodes in specific
-  // basic blocks; i.e. _bbs now maps _idx for all Nodes to some Block.
-  void GlobalCodeMotion( Matcher &m, uint unique, Node_List &proj_list );
-
-  // Compute the (backwards) latency of a node from the uses
-  void latency_from_uses(Node *n);
-
-  // Compute the (backwards) latency of a node from a single use
-  int latency_from_use(Node *n, const Node *def, Node *use);
-
-  // Compute the (backwards) latency of a node from the uses of this instruction
-  void partial_latency_of_defs(Node *n);
-
-  // Schedule Nodes early in their basic blocks.
-  bool schedule_early(VectorSet &visited, Node_List &roots);
-
-  // For each node, find the latest block it can be scheduled into
-  // and then select the cheapest block between the latest and earliest
-  // block to place the node.
-  void schedule_late(VectorSet &visited, Node_List &stack);
-
-  // Pick a block between early and late that is a cheaper alternative
-  // to late. Helper for schedule_late.
-  Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
-
-  // Compute the instruction global latency with a backwards walk
-  void ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack);
-
-  // Set loop alignment
-  void set_loop_alignment();
-
-  // Remove empty basic blocks
-  void remove_empty();
-  void fixup_flow();
   bool move_to_next(Block* bx, uint b_index);
   void move_to_end(Block* bx, uint b_index);
+
   void insert_goto_at(uint block_no, uint succ_no);
 
   // Check for NeverBranch at block end.  This needs to become a GOTO to the
@@ -451,10 +452,106 @@
 
   CFGLoop* create_loop_tree();
 
-  // Insert a node into a block, and update the _bbs
-  void insert( Block *b, uint idx, Node *n ) {
+  #ifndef PRODUCT
+  bool _trace_opto_pipelining;  // tracing flag
+  #endif
+
+ public:
+  PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher);
+
+  void set_latency_for_node(Node* node, int latency) {
+    _node_latency->at_put_grow(node->_idx, latency);
+  }
+
+  uint get_latency_for_node(Node* node) {
+    return _node_latency->at_grow(node->_idx);
+  }
+
+  // Get the outer most frequency
+  float get_outer_loop_frequency() const {
+    return _outer_loop_frequency;
+  }
+
+  // Get the root node of the CFG
+  RootNode* get_root_node() const {
+    return _root;
+  }
+
+  // Get the block of the root node
+  Block* get_root_block() const {
+    return _root_block;
+  }
+
+  // Add a block at a position and moves the later ones one step
+  void add_block_at(uint pos, Block* block) {
+    _blocks.insert(pos, block);
+    _number_of_blocks++;
+  }
+
+  // Adds a block to the top of the block list
+  void add_block(Block* block) {
+    _blocks.push(block);
+    _number_of_blocks++;
+  }
+
+  // Clear the list of blocks
+  void clear_blocks() {
+    _blocks.reset();
+    _number_of_blocks = 0;
+  }
+
+  // Get the block at position pos in _blocks
+  Block* get_block(uint pos) const {
+    return _blocks[pos];
+  }
+
+  // Number of blocks
+  uint number_of_blocks() const {
+    return _number_of_blocks;
+  }
+
+  // set which block this node should reside in
+  void map_node_to_block(const Node* node, Block* block) {
+    _node_to_block_mapping.map(node->_idx, block);
+  }
+
+  // removes the mapping from a node to a block
+  void unmap_node_from_block(const Node* node) {
+    _node_to_block_mapping.map(node->_idx, NULL);
+  }
+
+  // get the block in which this node resides
+  Block* get_block_for_node(const Node* node) const {
+    return _node_to_block_mapping[node->_idx];
+  }
+
+  // does this node reside in a block; return true
+  bool has_block(const Node* node) const {
+    return (_node_to_block_mapping.lookup(node->_idx) != NULL);
+  }
+
+#ifdef ASSERT
+  Unique_Node_List _raw_oops;
+#endif
+
+  // Do global code motion by first building dominator tree and estimate block frequency
+  // Returns true on success
+  bool do_global_code_motion();
+
+  // Compute the (backwards) latency of a node from the uses
+  void latency_from_uses(Node *n);
+
+  // Set loop alignment
+  void set_loop_alignment();
+
+  // Remove empty basic blocks
+  void remove_empty_blocks();
+  void fixup_flow();
+
+  // Insert a node into a block at index and map the node to the block
+  void insert(Block *b, uint idx, Node *n) {
     b->_nodes.insert( idx, n );
-    _bbs.map( n->_idx, b );
+    map_node_to_block(n, b);
   }
 
 #ifndef PRODUCT
@@ -543,7 +640,7 @@
     _child(NULL),
     _exit_prob(1.0f) {}
   CFGLoop* parent() { return _parent; }
-  void push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk);
+  void push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg);
   void add_member(CFGElement *s) { _members.push(s); }
   void add_nested_loop(CFGLoop* cl);
   Block* head() {