Mercurial > hg > truffle
comparison src/share/vm/opto/loopTransform.cpp @ 3788:e3cbc9ddd434
7044738: Loop unroll optimization causes incorrect result
Summary: take into account memory dependencies when clonning nodes in clone_up_backedge_goo().
Reviewed-by: never
author | kvn |
---|---|
date | Tue, 28 Jun 2011 15:24:29 -0700 |
parents | aacaff365100 |
children | c96c3eb1efae |
comparison
equal
deleted
inserted
replaced
3787:6ae7a1561b53 | 3788:e3cbc9ddd434 |
---|---|
822 } | 822 } |
823 | 823 |
824 //------------------------------clone_up_backedge_goo-------------------------- | 824 //------------------------------clone_up_backedge_goo-------------------------- |
825 // If Node n lives in the back_ctrl block and cannot float, we clone a private | 825 // If Node n lives in the back_ctrl block and cannot float, we clone a private |
826 // version of n in preheader_ctrl block and return that, otherwise return n. | 826 // version of n in preheader_ctrl block and return that, otherwise return n. |
827 Node *PhaseIdealLoop::clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n ) { | 827 Node *PhaseIdealLoop::clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones ) { |
828 if( get_ctrl(n) != back_ctrl ) return n; | 828 if( get_ctrl(n) != back_ctrl ) return n; |
829 | |
830 // Only visit once | |
831 if (visited.test_set(n->_idx)) { | |
832 Node *x = clones.find(n->_idx); | |
833 if (x != NULL) | |
834 return x; | |
835 return n; | |
836 } | |
829 | 837 |
830 Node *x = NULL; // If required, a clone of 'n' | 838 Node *x = NULL; // If required, a clone of 'n' |
831 // Check for 'n' being pinned in the backedge. | 839 // Check for 'n' being pinned in the backedge. |
832 if( n->in(0) && n->in(0) == back_ctrl ) { | 840 if( n->in(0) && n->in(0) == back_ctrl ) { |
841 assert(clones.find(n->_idx) == NULL, "dead loop"); | |
833 x = n->clone(); // Clone a copy of 'n' to preheader | 842 x = n->clone(); // Clone a copy of 'n' to preheader |
843 clones.push(x, n->_idx); | |
834 x->set_req( 0, preheader_ctrl ); // Fix x's control input to preheader | 844 x->set_req( 0, preheader_ctrl ); // Fix x's control input to preheader |
835 } | 845 } |
836 | 846 |
837 // Recursive fixup any other input edges into x. | 847 // Recursive fixup any other input edges into x. |
838 // If there are no changes we can just return 'n', otherwise | 848 // If there are no changes we can just return 'n', otherwise |
839 // we need to clone a private copy and change it. | 849 // we need to clone a private copy and change it. |
840 for( uint i = 1; i < n->req(); i++ ) { | 850 for( uint i = 1; i < n->req(); i++ ) { |
841 Node *g = clone_up_backedge_goo( back_ctrl, preheader_ctrl, n->in(i) ); | 851 Node *g = clone_up_backedge_goo( back_ctrl, preheader_ctrl, n->in(i), visited, clones ); |
842 if( g != n->in(i) ) { | 852 if( g != n->in(i) ) { |
843 if( !x ) | 853 if( !x ) { |
854 assert(clones.find(n->_idx) == NULL, "dead loop"); | |
844 x = n->clone(); | 855 x = n->clone(); |
856 clones.push(x, n->_idx); | |
857 } | |
845 x->set_req(i, g); | 858 x->set_req(i, g); |
846 } | 859 } |
847 } | 860 } |
848 if( x ) { // x can legally float to pre-header location | 861 if( x ) { // x can legally float to pre-header location |
849 register_new_node( x, preheader_ctrl ); | 862 register_new_node( x, preheader_ctrl ); |
958 // Plug in the true path | 971 // Plug in the true path |
959 _igvn.hash_delete( post_head ); | 972 _igvn.hash_delete( post_head ); |
960 post_head->set_req(LoopNode::EntryControl, zer_taken); | 973 post_head->set_req(LoopNode::EntryControl, zer_taken); |
961 set_idom(post_head, zer_taken, dd_main_exit); | 974 set_idom(post_head, zer_taken, dd_main_exit); |
962 | 975 |
976 Arena *a = Thread::current()->resource_area(); | |
977 VectorSet visited(a); | |
978 Node_Stack clones(a, main_head->back_control()->outcnt()); | |
963 // Step A3: Make the fall-in values to the post-loop come from the | 979 // Step A3: Make the fall-in values to the post-loop come from the |
964 // fall-out values of the main-loop. | 980 // fall-out values of the main-loop. |
965 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) { | 981 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) { |
966 Node* main_phi = main_head->fast_out(i); | 982 Node* main_phi = main_head->fast_out(i); |
967 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) { | 983 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) { |
968 Node *post_phi = old_new[main_phi->_idx]; | 984 Node *post_phi = old_new[main_phi->_idx]; |
969 Node *fallmain = clone_up_backedge_goo(main_head->back_control(), | 985 Node *fallmain = clone_up_backedge_goo(main_head->back_control(), |
970 post_head->init_control(), | 986 post_head->init_control(), |
971 main_phi->in(LoopNode::LoopBackControl)); | 987 main_phi->in(LoopNode::LoopBackControl), |
988 visited, clones); | |
972 _igvn.hash_delete(post_phi); | 989 _igvn.hash_delete(post_phi); |
973 post_phi->set_req( LoopNode::EntryControl, fallmain ); | 990 post_phi->set_req( LoopNode::EntryControl, fallmain ); |
974 } | 991 } |
975 } | 992 } |
976 | 993 |
1030 // Plug in the true path | 1047 // Plug in the true path |
1031 _igvn.hash_delete( main_head ); | 1048 _igvn.hash_delete( main_head ); |
1032 main_head->set_req(LoopNode::EntryControl, min_taken); | 1049 main_head->set_req(LoopNode::EntryControl, min_taken); |
1033 set_idom(main_head, min_taken, dd_main_head); | 1050 set_idom(main_head, min_taken, dd_main_head); |
1034 | 1051 |
1052 visited.Clear(); | |
1053 clones.clear(); | |
1035 // Step B3: Make the fall-in values to the main-loop come from the | 1054 // Step B3: Make the fall-in values to the main-loop come from the |
1036 // fall-out values of the pre-loop. | 1055 // fall-out values of the pre-loop. |
1037 for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) { | 1056 for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) { |
1038 Node* main_phi = main_head->fast_out(i2); | 1057 Node* main_phi = main_head->fast_out(i2); |
1039 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) { | 1058 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) { |
1040 Node *pre_phi = old_new[main_phi->_idx]; | 1059 Node *pre_phi = old_new[main_phi->_idx]; |
1041 Node *fallpre = clone_up_backedge_goo(pre_head->back_control(), | 1060 Node *fallpre = clone_up_backedge_goo(pre_head->back_control(), |
1042 main_head->init_control(), | 1061 main_head->init_control(), |
1043 pre_phi->in(LoopNode::LoopBackControl)); | 1062 pre_phi->in(LoopNode::LoopBackControl), |
1063 visited, clones); | |
1044 _igvn.hash_delete(main_phi); | 1064 _igvn.hash_delete(main_phi); |
1045 main_phi->set_req( LoopNode::EntryControl, fallpre ); | 1065 main_phi->set_req( LoopNode::EntryControl, fallpre ); |
1046 } | 1066 } |
1047 } | 1067 } |
1048 | 1068 |