Mercurial > hg > graal-jvmci-8
comparison src/share/vm/opto/loopnode.hpp @ 1172:b2b6a9bf6238
6894779: Loop Predication for Loop Optimizer in C2
Summary: Loop predication implementation
Reviewed-by: never, kvn
author | cfang |
---|---|
date | Tue, 12 Jan 2010 14:37:35 -0800 |
parents | 046932b72aa2 |
children | c047da02984c |
comparison
equal
deleted
inserted
replaced
1160:f24201449cac | 1172:b2b6a9bf6238 |
---|---|
28 class IdealLoopTree; | 28 class IdealLoopTree; |
29 class LoopNode; | 29 class LoopNode; |
30 class Node; | 30 class Node; |
31 class PhaseIdealLoop; | 31 class PhaseIdealLoop; |
32 class VectorSet; | 32 class VectorSet; |
33 class Invariance; | |
33 struct small_cache; | 34 struct small_cache; |
34 | 35 |
35 // | 36 // |
36 // I D E A L I Z E D L O O P S | 37 // I D E A L I Z E D L O O P S |
37 // | 38 // |
323 // Split shared headers and insert loop landing pads. | 324 // Split shared headers and insert loop landing pads. |
324 // Insert a LoopNode to replace the RegionNode. | 325 // Insert a LoopNode to replace the RegionNode. |
325 // Returns TRUE if loop tree is structurally changed. | 326 // Returns TRUE if loop tree is structurally changed. |
326 bool beautify_loops( PhaseIdealLoop *phase ); | 327 bool beautify_loops( PhaseIdealLoop *phase ); |
327 | 328 |
329 // Perform optimization to use the loop predicates for null checks and range checks. | |
330 // Applies to any loop level (not just the innermost one) | |
331 bool loop_predication( PhaseIdealLoop *phase); | |
332 | |
328 // Perform iteration-splitting on inner loops. Split iterations to | 333 // Perform iteration-splitting on inner loops. Split iterations to |
329 // avoid range checks or one-shot null checks. Returns false if the | 334 // avoid range checks or one-shot null checks. Returns false if the |
330 // current round of loop opts should stop. | 335 // current round of loop opts should stop. |
331 bool iteration_split( PhaseIdealLoop *phase, Node_List &old_new ); | 336 bool iteration_split( PhaseIdealLoop *phase, Node_List &old_new ); |
332 | 337 |
392 // Gather the expression that does the alignment. Note that only | 397 // Gather the expression that does the alignment. Note that only |
393 // one array base can be aligned in a loop (unless the VM guarantees | 398 // one array base can be aligned in a loop (unless the VM guarantees |
394 // mutual alignment). Note that if we vectorize short memory ops | 399 // mutual alignment). Note that if we vectorize short memory ops |
395 // into longer memory ops, we may want to increase alignment. | 400 // into longer memory ops, we may want to increase alignment. |
396 bool policy_align( PhaseIdealLoop *phase ) const; | 401 bool policy_align( PhaseIdealLoop *phase ) const; |
402 | |
403 // Return TRUE if "iff" is a range check. | |
404 bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const; | |
397 | 405 |
398 // Compute loop trip count from profile data | 406 // Compute loop trip count from profile data |
399 void compute_profile_trip_cnt( PhaseIdealLoop *phase ); | 407 void compute_profile_trip_cnt( PhaseIdealLoop *phase ); |
400 | 408 |
401 // Reassociate invariant expressions. | 409 // Reassociate invariant expressions. |
519 } | 527 } |
520 return find_non_split_ctrl(n); | 528 return find_non_split_ctrl(n); |
521 } | 529 } |
522 Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag ); | 530 Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag ); |
523 | 531 |
524 // true if CFG node d dominates CFG node n | |
525 bool is_dominator(Node *d, Node *n); | |
526 | |
527 // Helper function for directing control inputs away from CFG split | 532 // Helper function for directing control inputs away from CFG split |
528 // points. | 533 // points. |
529 Node *find_non_split_ctrl( Node *ctrl ) const { | 534 Node *find_non_split_ctrl( Node *ctrl ) const { |
530 if (ctrl != NULL) { | 535 if (ctrl != NULL) { |
531 if (ctrl->is_MultiBranch()) { | 536 if (ctrl->is_MultiBranch()) { |
570 _nodes.map( i->_idx, (Node*)((intptr_t)n + 1) ); | 575 _nodes.map( i->_idx, (Node*)((intptr_t)n + 1) ); |
571 assert(has_node(i) && has_ctrl(i), ""); | 576 assert(has_node(i) && has_ctrl(i), ""); |
572 assert(n == find_non_split_ctrl(n), "must return legal ctrl" ); | 577 assert(n == find_non_split_ctrl(n), "must return legal ctrl" ); |
573 return n; | 578 return n; |
574 } | 579 } |
580 // true if CFG node d dominates CFG node n | |
581 bool is_dominator(Node *d, Node *n); | |
582 // return get_ctrl for a data node and self(n) for a CFG node | |
583 Node* ctrl_or_self(Node* n) { | |
584 if (has_ctrl(n)) | |
585 return get_ctrl(n); | |
586 else { | |
587 assert (n->is_CFG(), "must be a CFG node"); | |
588 return n; | |
589 } | |
590 } | |
575 | 591 |
576 private: | 592 private: |
577 Node *get_ctrl_no_update( Node *i ) const { | 593 Node *get_ctrl_no_update( Node *i ) const { |
578 assert( has_ctrl(i), "" ); | 594 assert( has_ctrl(i), "" ); |
579 Node *n = (Node*)(((intptr_t)_nodes[i->_idx]) & ~1); | 595 Node *n = (Node*)(((intptr_t)_nodes[i->_idx]) & ~1); |
598 _nodes.map(n->_idx, (Node*)loop); | 614 _nodes.map(n->_idx, (Node*)loop); |
599 } | 615 } |
600 // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms. Replace | 616 // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms. Replace |
601 // the 'old_node' with 'new_node'. Kill old-node. Add a reference | 617 // the 'old_node' with 'new_node'. Kill old-node. Add a reference |
602 // from old_node to new_node to support the lazy update. Reference | 618 // from old_node to new_node to support the lazy update. Reference |
603 // replaces loop reference, since that is not neede for dead node. | 619 // replaces loop reference, since that is not needed for dead node. |
604 public: | 620 public: |
605 void lazy_update( Node *old_node, Node *new_node ) { | 621 void lazy_update( Node *old_node, Node *new_node ) { |
606 assert( old_node != new_node, "no cycles please" ); | 622 assert( old_node != new_node, "no cycles please" ); |
607 //old_node->set_req( 1, new_node /*NO DU INFO*/ ); | 623 //old_node->set_req( 1, new_node /*NO DU INFO*/ ); |
608 // Nodes always have DU info now, so re-use the side array slot | 624 // Nodes always have DU info now, so re-use the side array slot |
677 PhaseTransform(Ideal_Loop), | 693 PhaseTransform(Ideal_Loop), |
678 _igvn(igvn), | 694 _igvn(igvn), |
679 _dom_lca_tags(C->comp_arena()), | 695 _dom_lca_tags(C->comp_arena()), |
680 _verify_me(NULL), | 696 _verify_me(NULL), |
681 _verify_only(true) { | 697 _verify_only(true) { |
682 build_and_optimize(false); | 698 build_and_optimize(false, false); |
683 } | 699 } |
684 | 700 |
685 // build the loop tree and perform any requested optimizations | 701 // build the loop tree and perform any requested optimizations |
686 void build_and_optimize(bool do_split_if); | 702 void build_and_optimize(bool do_split_if, bool do_loop_pred); |
687 | 703 |
688 public: | 704 public: |
689 // Dominators for the sea of nodes | 705 // Dominators for the sea of nodes |
690 void Dominators(); | 706 void Dominators(); |
691 Node *dom_lca( Node *n1, Node *n2 ) const { | 707 Node *dom_lca( Node *n1, Node *n2 ) const { |
692 return find_non_split_ctrl(dom_lca_internal(n1, n2)); | 708 return find_non_split_ctrl(dom_lca_internal(n1, n2)); |
693 } | 709 } |
694 Node *dom_lca_internal( Node *n1, Node *n2 ) const; | 710 Node *dom_lca_internal( Node *n1, Node *n2 ) const; |
695 | 711 |
696 // Compute the Ideal Node to Loop mapping | 712 // Compute the Ideal Node to Loop mapping |
697 PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) : | 713 PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs, bool do_loop_pred) : |
698 PhaseTransform(Ideal_Loop), | 714 PhaseTransform(Ideal_Loop), |
699 _igvn(igvn), | 715 _igvn(igvn), |
700 _dom_lca_tags(C->comp_arena()), | 716 _dom_lca_tags(C->comp_arena()), |
701 _verify_me(NULL), | 717 _verify_me(NULL), |
702 _verify_only(false) { | 718 _verify_only(false) { |
703 build_and_optimize(do_split_ifs); | 719 build_and_optimize(do_split_ifs, do_loop_pred); |
704 } | 720 } |
705 | 721 |
706 // Verify that verify_me made the same decisions as a fresh run. | 722 // Verify that verify_me made the same decisions as a fresh run. |
707 PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me) : | 723 PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me) : |
708 PhaseTransform(Ideal_Loop), | 724 PhaseTransform(Ideal_Loop), |
709 _igvn(igvn), | 725 _igvn(igvn), |
710 _dom_lca_tags(C->comp_arena()), | 726 _dom_lca_tags(C->comp_arena()), |
711 _verify_me(verify_me), | 727 _verify_me(verify_me), |
712 _verify_only(false) { | 728 _verify_only(false) { |
713 build_and_optimize(false); | 729 build_and_optimize(false, false); |
714 } | 730 } |
715 | 731 |
716 // Build and verify the loop tree without modifying the graph. This | 732 // Build and verify the loop tree without modifying the graph. This |
717 // is useful to verify that all inputs properly dominate their uses. | 733 // is useful to verify that all inputs properly dominate their uses. |
718 static void verify(PhaseIterGVN& igvn) { | 734 static void verify(PhaseIterGVN& igvn) { |
787 // Return true if exp is a constant times an induction var | 803 // Return true if exp is a constant times an induction var |
788 bool is_scaled_iv(Node* exp, Node* iv, int* p_scale); | 804 bool is_scaled_iv(Node* exp, Node* iv, int* p_scale); |
789 | 805 |
790 // Return true if exp is a scaled induction var plus (or minus) constant | 806 // Return true if exp is a scaled induction var plus (or minus) constant |
791 bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0); | 807 bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0); |
808 | |
809 // Return true if proj is for "proj->[region->..]call_uct" | |
810 bool is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate = false); | |
811 // Return true for "if(test)-> proj -> ... | |
812 // | | |
813 // V | |
814 // other_proj->[region->..]call_uct" | |
815 bool is_uncommon_trap_if_pattern(ProjNode* proj, bool must_reason_predicate = false); | |
816 // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted | |
817 ProjNode* create_new_if_for_predicate(ProjNode* cont_proj); | |
818 // Find a good location to insert a predicate | |
819 ProjNode* find_predicate_insertion_point(Node* start_c); | |
820 // Construct a range check for a predicate if | |
821 BoolNode* rc_predicate(Node* ctrl, | |
822 int scale, Node* offset, | |
823 Node* init, Node* limit, Node* stride, | |
824 Node* range); | |
825 | |
826 // Implementation of the loop predication to promote checks outside the loop | |
827 bool loop_predication_impl(IdealLoopTree *loop); | |
828 | |
829 // Helper function to collect predicate for eliminating the useless ones | |
830 void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1); | |
831 void eliminate_useless_predicates(); | |
792 | 832 |
793 // Eliminate range-checks and other trip-counter vs loop-invariant tests. | 833 // Eliminate range-checks and other trip-counter vs loop-invariant tests. |
794 void do_range_check( IdealLoopTree *loop, Node_List &old_new ); | 834 void do_range_check( IdealLoopTree *loop, Node_List &old_new ); |
795 | 835 |
796 // Create a slow version of the loop by cloning the loop | 836 // Create a slow version of the loop by cloning the loop |
904 const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); } | 944 const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); } |
905 // Helpers for filtered type | 945 // Helpers for filtered type |
906 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl); | 946 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl); |
907 | 947 |
908 // Helper functions | 948 // Helper functions |
909 void register_new_node( Node *n, Node *blk ); | |
910 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache ); | 949 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache ); |
911 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ); | 950 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ); |
912 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ); | 951 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ); |
913 bool split_up( Node *n, Node *blk1, Node *blk2 ); | 952 bool split_up( Node *n, Node *blk1, Node *blk2 ); |
914 void sink_use( Node *use, Node *post_loop ); | 953 void sink_use( Node *use, Node *post_loop ); |
916 | 955 |
917 bool _created_loop_node; | 956 bool _created_loop_node; |
918 public: | 957 public: |
919 void set_created_loop_node() { _created_loop_node = true; } | 958 void set_created_loop_node() { _created_loop_node = true; } |
920 bool created_loop_node() { return _created_loop_node; } | 959 bool created_loop_node() { return _created_loop_node; } |
960 void register_new_node( Node *n, Node *blk ); | |
921 | 961 |
922 #ifndef PRODUCT | 962 #ifndef PRODUCT |
923 void dump( ) const; | 963 void dump( ) const; |
924 void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const; | 964 void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const; |
925 void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const; | 965 void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const; |