comparison src/share/vm/opto/loopnode.hpp @ 1172:b2b6a9bf6238

6894779: Loop Predication for Loop Optimizer in C2 Summary: Loop predication implementation Reviewed-by: never, kvn
author cfang
date Tue, 12 Jan 2010 14:37:35 -0800
parents 046932b72aa2
children c047da02984c
comparison
equal deleted inserted replaced
1160:f24201449cac 1172:b2b6a9bf6238
28 class IdealLoopTree; 28 class IdealLoopTree;
29 class LoopNode; 29 class LoopNode;
30 class Node; 30 class Node;
31 class PhaseIdealLoop; 31 class PhaseIdealLoop;
32 class VectorSet; 32 class VectorSet;
33 class Invariance;
33 struct small_cache; 34 struct small_cache;
34 35
35 // 36 //
36 // I D E A L I Z E D L O O P S 37 // I D E A L I Z E D L O O P S
37 // 38 //
323 // Split shared headers and insert loop landing pads. 324 // Split shared headers and insert loop landing pads.
324 // Insert a LoopNode to replace the RegionNode. 325 // Insert a LoopNode to replace the RegionNode.
325 // Returns TRUE if loop tree is structurally changed. 326 // Returns TRUE if loop tree is structurally changed.
326 bool beautify_loops( PhaseIdealLoop *phase ); 327 bool beautify_loops( PhaseIdealLoop *phase );
327 328
329 // Perform optimization to use the loop predicates for null checks and range checks.
330 // Applies to any loop level (not just the innermost one)
331 bool loop_predication( PhaseIdealLoop *phase);
332
328 // Perform iteration-splitting on inner loops. Split iterations to 333 // Perform iteration-splitting on inner loops. Split iterations to
329 // avoid range checks or one-shot null checks. Returns false if the 334 // avoid range checks or one-shot null checks. Returns false if the
330 // current round of loop opts should stop. 335 // current round of loop opts should stop.
331 bool iteration_split( PhaseIdealLoop *phase, Node_List &old_new ); 336 bool iteration_split( PhaseIdealLoop *phase, Node_List &old_new );
332 337
392 // Gather the expression that does the alignment. Note that only 397 // Gather the expression that does the alignment. Note that only
393 // one array base can be aligned in a loop (unless the VM guarantees 398 // one array base can be aligned in a loop (unless the VM guarantees
394 // mutual alignment). Note that if we vectorize short memory ops 399 // mutual alignment). Note that if we vectorize short memory ops
395 // into longer memory ops, we may want to increase alignment. 400 // into longer memory ops, we may want to increase alignment.
396 bool policy_align( PhaseIdealLoop *phase ) const; 401 bool policy_align( PhaseIdealLoop *phase ) const;
402
403 // Return TRUE if "iff" is a range check.
404 bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const;
397 405
398 // Compute loop trip count from profile data 406 // Compute loop trip count from profile data
399 void compute_profile_trip_cnt( PhaseIdealLoop *phase ); 407 void compute_profile_trip_cnt( PhaseIdealLoop *phase );
400 408
401 // Reassociate invariant expressions. 409 // Reassociate invariant expressions.
519 } 527 }
520 return find_non_split_ctrl(n); 528 return find_non_split_ctrl(n);
521 } 529 }
522 Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag ); 530 Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag );
523 531
524 // true if CFG node d dominates CFG node n
525 bool is_dominator(Node *d, Node *n);
526
527 // Helper function for directing control inputs away from CFG split 532 // Helper function for directing control inputs away from CFG split
528 // points. 533 // points.
529 Node *find_non_split_ctrl( Node *ctrl ) const { 534 Node *find_non_split_ctrl( Node *ctrl ) const {
530 if (ctrl != NULL) { 535 if (ctrl != NULL) {
531 if (ctrl->is_MultiBranch()) { 536 if (ctrl->is_MultiBranch()) {
570 _nodes.map( i->_idx, (Node*)((intptr_t)n + 1) ); 575 _nodes.map( i->_idx, (Node*)((intptr_t)n + 1) );
571 assert(has_node(i) && has_ctrl(i), ""); 576 assert(has_node(i) && has_ctrl(i), "");
572 assert(n == find_non_split_ctrl(n), "must return legal ctrl" ); 577 assert(n == find_non_split_ctrl(n), "must return legal ctrl" );
573 return n; 578 return n;
574 } 579 }
580 // true if CFG node d dominates CFG node n
581 bool is_dominator(Node *d, Node *n);
582 // return get_ctrl for a data node and self(n) for a CFG node
583 Node* ctrl_or_self(Node* n) {
584 if (has_ctrl(n))
585 return get_ctrl(n);
586 else {
587 assert (n->is_CFG(), "must be a CFG node");
588 return n;
589 }
590 }
575 591
576 private: 592 private:
577 Node *get_ctrl_no_update( Node *i ) const { 593 Node *get_ctrl_no_update( Node *i ) const {
578 assert( has_ctrl(i), "" ); 594 assert( has_ctrl(i), "" );
579 Node *n = (Node*)(((intptr_t)_nodes[i->_idx]) & ~1); 595 Node *n = (Node*)(((intptr_t)_nodes[i->_idx]) & ~1);
598 _nodes.map(n->_idx, (Node*)loop); 614 _nodes.map(n->_idx, (Node*)loop);
599 } 615 }
600 // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms. Replace 616 // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms. Replace
601 // the 'old_node' with 'new_node'. Kill old-node. Add a reference 617 // the 'old_node' with 'new_node'. Kill old-node. Add a reference
602 // from old_node to new_node to support the lazy update. Reference 618 // from old_node to new_node to support the lazy update. Reference
603 // replaces loop reference, since that is not neede for dead node. 619 // replaces loop reference, since that is not needed for dead node.
604 public: 620 public:
605 void lazy_update( Node *old_node, Node *new_node ) { 621 void lazy_update( Node *old_node, Node *new_node ) {
606 assert( old_node != new_node, "no cycles please" ); 622 assert( old_node != new_node, "no cycles please" );
607 //old_node->set_req( 1, new_node /*NO DU INFO*/ ); 623 //old_node->set_req( 1, new_node /*NO DU INFO*/ );
608 // Nodes always have DU info now, so re-use the side array slot 624 // Nodes always have DU info now, so re-use the side array slot
677 PhaseTransform(Ideal_Loop), 693 PhaseTransform(Ideal_Loop),
678 _igvn(igvn), 694 _igvn(igvn),
679 _dom_lca_tags(C->comp_arena()), 695 _dom_lca_tags(C->comp_arena()),
680 _verify_me(NULL), 696 _verify_me(NULL),
681 _verify_only(true) { 697 _verify_only(true) {
682 build_and_optimize(false); 698 build_and_optimize(false, false);
683 } 699 }
684 700
685 // build the loop tree and perform any requested optimizations 701 // build the loop tree and perform any requested optimizations
686 void build_and_optimize(bool do_split_if); 702 void build_and_optimize(bool do_split_if, bool do_loop_pred);
687 703
688 public: 704 public:
689 // Dominators for the sea of nodes 705 // Dominators for the sea of nodes
690 void Dominators(); 706 void Dominators();
691 Node *dom_lca( Node *n1, Node *n2 ) const { 707 Node *dom_lca( Node *n1, Node *n2 ) const {
692 return find_non_split_ctrl(dom_lca_internal(n1, n2)); 708 return find_non_split_ctrl(dom_lca_internal(n1, n2));
693 } 709 }
694 Node *dom_lca_internal( Node *n1, Node *n2 ) const; 710 Node *dom_lca_internal( Node *n1, Node *n2 ) const;
695 711
696 // Compute the Ideal Node to Loop mapping 712 // Compute the Ideal Node to Loop mapping
697 PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) : 713 PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs, bool do_loop_pred) :
698 PhaseTransform(Ideal_Loop), 714 PhaseTransform(Ideal_Loop),
699 _igvn(igvn), 715 _igvn(igvn),
700 _dom_lca_tags(C->comp_arena()), 716 _dom_lca_tags(C->comp_arena()),
701 _verify_me(NULL), 717 _verify_me(NULL),
702 _verify_only(false) { 718 _verify_only(false) {
703 build_and_optimize(do_split_ifs); 719 build_and_optimize(do_split_ifs, do_loop_pred);
704 } 720 }
705 721
706 // Verify that verify_me made the same decisions as a fresh run. 722 // Verify that verify_me made the same decisions as a fresh run.
707 PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me) : 723 PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me) :
708 PhaseTransform(Ideal_Loop), 724 PhaseTransform(Ideal_Loop),
709 _igvn(igvn), 725 _igvn(igvn),
710 _dom_lca_tags(C->comp_arena()), 726 _dom_lca_tags(C->comp_arena()),
711 _verify_me(verify_me), 727 _verify_me(verify_me),
712 _verify_only(false) { 728 _verify_only(false) {
713 build_and_optimize(false); 729 build_and_optimize(false, false);
714 } 730 }
715 731
716 // Build and verify the loop tree without modifying the graph. This 732 // Build and verify the loop tree without modifying the graph. This
717 // is useful to verify that all inputs properly dominate their uses. 733 // is useful to verify that all inputs properly dominate their uses.
718 static void verify(PhaseIterGVN& igvn) { 734 static void verify(PhaseIterGVN& igvn) {
787 // Return true if exp is a constant times an induction var 803 // Return true if exp is a constant times an induction var
788 bool is_scaled_iv(Node* exp, Node* iv, int* p_scale); 804 bool is_scaled_iv(Node* exp, Node* iv, int* p_scale);
789 805
790 // Return true if exp is a scaled induction var plus (or minus) constant 806 // Return true if exp is a scaled induction var plus (or minus) constant
791 bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0); 807 bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0);
808
809 // Return true if proj is for "proj->[region->..]call_uct"
810 bool is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate = false);
811 // Return true for "if(test)-> proj -> ...
812 // |
813 // V
814 // other_proj->[region->..]call_uct"
815 bool is_uncommon_trap_if_pattern(ProjNode* proj, bool must_reason_predicate = false);
816 // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted
817 ProjNode* create_new_if_for_predicate(ProjNode* cont_proj);
818 // Find a good location to insert a predicate
819 ProjNode* find_predicate_insertion_point(Node* start_c);
820 // Construct a range check for a predicate if
821 BoolNode* rc_predicate(Node* ctrl,
822 int scale, Node* offset,
823 Node* init, Node* limit, Node* stride,
824 Node* range);
825
826 // Implementation of the loop predication to promote checks outside the loop
827 bool loop_predication_impl(IdealLoopTree *loop);
828
829 // Helper function to collect predicate for eliminating the useless ones
830 void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1);
831 void eliminate_useless_predicates();
792 832
793 // Eliminate range-checks and other trip-counter vs loop-invariant tests. 833 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
794 void do_range_check( IdealLoopTree *loop, Node_List &old_new ); 834 void do_range_check( IdealLoopTree *loop, Node_List &old_new );
795 835
796 // Create a slow version of the loop by cloning the loop 836 // Create a slow version of the loop by cloning the loop
904 const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); } 944 const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); }
905 // Helpers for filtered type 945 // Helpers for filtered type
906 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl); 946 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
907 947
908 // Helper functions 948 // Helper functions
909 void register_new_node( Node *n, Node *blk );
910 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache ); 949 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
911 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ); 950 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
912 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ); 951 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
913 bool split_up( Node *n, Node *blk1, Node *blk2 ); 952 bool split_up( Node *n, Node *blk1, Node *blk2 );
914 void sink_use( Node *use, Node *post_loop ); 953 void sink_use( Node *use, Node *post_loop );
916 955
917 bool _created_loop_node; 956 bool _created_loop_node;
918 public: 957 public:
919 void set_created_loop_node() { _created_loop_node = true; } 958 void set_created_loop_node() { _created_loop_node = true; }
920 bool created_loop_node() { return _created_loop_node; } 959 bool created_loop_node() { return _created_loop_node; }
960 void register_new_node( Node *n, Node *blk );
921 961
922 #ifndef PRODUCT 962 #ifndef PRODUCT
923 void dump( ) const; 963 void dump( ) const;
924 void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const; 964 void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const;
925 void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const; 965 void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const;