comparison src/share/vm/opto/loopTransform.cpp @ 3788:e3cbc9ddd434

7044738: Loop unroll optimization causes incorrect result Summary: take into account memory dependencies when clonning nodes in clone_up_backedge_goo(). Reviewed-by: never
author kvn
date Tue, 28 Jun 2011 15:24:29 -0700
parents aacaff365100
children c96c3eb1efae
comparison
equal deleted inserted replaced
3787:6ae7a1561b53 3788:e3cbc9ddd434
822 } 822 }
823 823
824 //------------------------------clone_up_backedge_goo-------------------------- 824 //------------------------------clone_up_backedge_goo--------------------------
825 // If Node n lives in the back_ctrl block and cannot float, we clone a private 825 // If Node n lives in the back_ctrl block and cannot float, we clone a private
826 // version of n in preheader_ctrl block and return that, otherwise return n. 826 // version of n in preheader_ctrl block and return that, otherwise return n.
827 Node *PhaseIdealLoop::clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n ) { 827 Node *PhaseIdealLoop::clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones ) {
828 if( get_ctrl(n) != back_ctrl ) return n; 828 if( get_ctrl(n) != back_ctrl ) return n;
829
830 // Only visit once
831 if (visited.test_set(n->_idx)) {
832 Node *x = clones.find(n->_idx);
833 if (x != NULL)
834 return x;
835 return n;
836 }
829 837
830 Node *x = NULL; // If required, a clone of 'n' 838 Node *x = NULL; // If required, a clone of 'n'
831 // Check for 'n' being pinned in the backedge. 839 // Check for 'n' being pinned in the backedge.
832 if( n->in(0) && n->in(0) == back_ctrl ) { 840 if( n->in(0) && n->in(0) == back_ctrl ) {
841 assert(clones.find(n->_idx) == NULL, "dead loop");
833 x = n->clone(); // Clone a copy of 'n' to preheader 842 x = n->clone(); // Clone a copy of 'n' to preheader
843 clones.push(x, n->_idx);
834 x->set_req( 0, preheader_ctrl ); // Fix x's control input to preheader 844 x->set_req( 0, preheader_ctrl ); // Fix x's control input to preheader
835 } 845 }
836 846
837 // Recursive fixup any other input edges into x. 847 // Recursive fixup any other input edges into x.
838 // If there are no changes we can just return 'n', otherwise 848 // If there are no changes we can just return 'n', otherwise
839 // we need to clone a private copy and change it. 849 // we need to clone a private copy and change it.
840 for( uint i = 1; i < n->req(); i++ ) { 850 for( uint i = 1; i < n->req(); i++ ) {
841 Node *g = clone_up_backedge_goo( back_ctrl, preheader_ctrl, n->in(i) ); 851 Node *g = clone_up_backedge_goo( back_ctrl, preheader_ctrl, n->in(i), visited, clones );
842 if( g != n->in(i) ) { 852 if( g != n->in(i) ) {
843 if( !x ) 853 if( !x ) {
854 assert(clones.find(n->_idx) == NULL, "dead loop");
844 x = n->clone(); 855 x = n->clone();
856 clones.push(x, n->_idx);
857 }
845 x->set_req(i, g); 858 x->set_req(i, g);
846 } 859 }
847 } 860 }
848 if( x ) { // x can legally float to pre-header location 861 if( x ) { // x can legally float to pre-header location
849 register_new_node( x, preheader_ctrl ); 862 register_new_node( x, preheader_ctrl );
958 // Plug in the true path 971 // Plug in the true path
959 _igvn.hash_delete( post_head ); 972 _igvn.hash_delete( post_head );
960 post_head->set_req(LoopNode::EntryControl, zer_taken); 973 post_head->set_req(LoopNode::EntryControl, zer_taken);
961 set_idom(post_head, zer_taken, dd_main_exit); 974 set_idom(post_head, zer_taken, dd_main_exit);
962 975
976 Arena *a = Thread::current()->resource_area();
977 VectorSet visited(a);
978 Node_Stack clones(a, main_head->back_control()->outcnt());
963 // Step A3: Make the fall-in values to the post-loop come from the 979 // Step A3: Make the fall-in values to the post-loop come from the
964 // fall-out values of the main-loop. 980 // fall-out values of the main-loop.
965 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) { 981 for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
966 Node* main_phi = main_head->fast_out(i); 982 Node* main_phi = main_head->fast_out(i);
967 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) { 983 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) {
968 Node *post_phi = old_new[main_phi->_idx]; 984 Node *post_phi = old_new[main_phi->_idx];
969 Node *fallmain = clone_up_backedge_goo(main_head->back_control(), 985 Node *fallmain = clone_up_backedge_goo(main_head->back_control(),
970 post_head->init_control(), 986 post_head->init_control(),
971 main_phi->in(LoopNode::LoopBackControl)); 987 main_phi->in(LoopNode::LoopBackControl),
988 visited, clones);
972 _igvn.hash_delete(post_phi); 989 _igvn.hash_delete(post_phi);
973 post_phi->set_req( LoopNode::EntryControl, fallmain ); 990 post_phi->set_req( LoopNode::EntryControl, fallmain );
974 } 991 }
975 } 992 }
976 993
1030 // Plug in the true path 1047 // Plug in the true path
1031 _igvn.hash_delete( main_head ); 1048 _igvn.hash_delete( main_head );
1032 main_head->set_req(LoopNode::EntryControl, min_taken); 1049 main_head->set_req(LoopNode::EntryControl, min_taken);
1033 set_idom(main_head, min_taken, dd_main_head); 1050 set_idom(main_head, min_taken, dd_main_head);
1034 1051
1052 visited.Clear();
1053 clones.clear();
1035 // Step B3: Make the fall-in values to the main-loop come from the 1054 // Step B3: Make the fall-in values to the main-loop come from the
1036 // fall-out values of the pre-loop. 1055 // fall-out values of the pre-loop.
1037 for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) { 1056 for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
1038 Node* main_phi = main_head->fast_out(i2); 1057 Node* main_phi = main_head->fast_out(i2);
1039 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) { 1058 if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
1040 Node *pre_phi = old_new[main_phi->_idx]; 1059 Node *pre_phi = old_new[main_phi->_idx];
1041 Node *fallpre = clone_up_backedge_goo(pre_head->back_control(), 1060 Node *fallpre = clone_up_backedge_goo(pre_head->back_control(),
1042 main_head->init_control(), 1061 main_head->init_control(),
1043 pre_phi->in(LoopNode::LoopBackControl)); 1062 pre_phi->in(LoopNode::LoopBackControl),
1063 visited, clones);
1044 _igvn.hash_delete(main_phi); 1064 _igvn.hash_delete(main_phi);
1045 main_phi->set_req( LoopNode::EntryControl, fallpre ); 1065 main_phi->set_req( LoopNode::EntryControl, fallpre );
1046 } 1066 }
1047 } 1067 }
1048 1068