Mercurial > hg > graal-compiler
comparison src/share/vm/opto/gcm.cpp @ 12071:adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
Summary: public methods that don't need to be public should be private.
Reviewed-by: kvn, twisti
author | adlertz |
---|---|
date | Fri, 16 Aug 2013 10:23:55 +0200 |
parents | d1034bd8cefc |
children | 650868c062a9 e2722a66aba7 |
comparison
equal
deleted
inserted
replaced
12070:afbe18ae0905 | 12071:adb9a7d94cb5 |
---|---|
119 } | 119 } |
120 | 120 |
121 | 121 |
122 //------------------------------schedule_pinned_nodes-------------------------- | 122 //------------------------------schedule_pinned_nodes-------------------------- |
123 // Set the basic block for Nodes pinned into blocks | 123 // Set the basic block for Nodes pinned into blocks |
124 void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) { | 124 void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) { |
125 // Allocate node stack of size C->unique()+8 to avoid frequent realloc | 125 // Allocate node stack of size C->unique()+8 to avoid frequent realloc |
126 GrowableArray <Node *> spstack(C->unique()+8); | 126 GrowableArray <Node *> spstack(C->unique() + 8); |
127 spstack.push(_root); | 127 spstack.push(_root); |
128 while ( spstack.is_nonempty() ) { | 128 while (spstack.is_nonempty()) { |
129 Node *n = spstack.pop(); | 129 Node* node = spstack.pop(); |
130 if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited | 130 if (!visited.test_set(node->_idx)) { // Test node and flag it as visited |
131 if( n->pinned() && !has_block(n)) { // Pinned? Nail it down! | 131 if (node->pinned() && !has_block(node)) { // Pinned? Nail it down! |
132 assert( n->in(0), "pinned Node must have Control" ); | 132 assert(node->in(0), "pinned Node must have Control"); |
133 // Before setting block replace block_proj control edge | 133 // Before setting block replace block_proj control edge |
134 replace_block_proj_ctrl(n); | 134 replace_block_proj_ctrl(node); |
135 Node *input = n->in(0); | 135 Node* input = node->in(0); |
136 while (!input->is_block_start()) { | 136 while (!input->is_block_start()) { |
137 input = input->in(0); | 137 input = input->in(0); |
138 } | 138 } |
139 Block *b = get_block_for_node(input); // Basic block of controlling input | 139 Block* block = get_block_for_node(input); // Basic block of controlling input |
140 schedule_node_into_block(n, b); | 140 schedule_node_into_block(node, block); |
141 } | 141 } |
142 for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs | 142 |
143 if( n->in(i) != NULL ) | 143 // process all inputs that are non NULL |
144 spstack.push(n->in(i)); | 144 for (int i = node->req() - 1; i >= 0; --i) { |
145 if (node->in(i) != NULL) { | |
146 spstack.push(node->in(i)); | |
147 } | |
145 } | 148 } |
146 } | 149 } |
147 } | 150 } |
148 } | 151 } |
149 | 152 |
203 // Find the earliest Block any instruction can be placed in. Some instructions | 206 // Find the earliest Block any instruction can be placed in. Some instructions |
204 // are pinned into Blocks. Unpinned instructions can appear in last block in | 207 // are pinned into Blocks. Unpinned instructions can appear in last block in |
205 // which all their inputs occur. | 208 // which all their inputs occur. |
206 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { | 209 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { |
207 // Allocate stack with enough space to avoid frequent realloc | 210 // Allocate stack with enough space to avoid frequent realloc |
208 Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats | 211 Node_Stack nstack(roots.Size() + 8); |
209 // roots.push(_root); _root will be processed among C->top() inputs | 212 // _root will be processed among C->top() inputs |
210 roots.push(C->top()); | 213 roots.push(C->top()); |
211 visited.set(C->top()->_idx); | 214 visited.set(C->top()->_idx); |
212 | 215 |
213 while (roots.size() != 0) { | 216 while (roots.size() != 0) { |
214 // Use local variables nstack_top_n & nstack_top_i to cache values | 217 // Use local variables nstack_top_n & nstack_top_i to cache values |
215 // on stack's top. | 218 // on stack's top. |
216 Node *nstack_top_n = roots.pop(); | 219 Node* parent_node = roots.pop(); |
217 uint nstack_top_i = 0; | 220 uint input_index = 0; |
218 //while_nstack_nonempty: | 221 |
219 while (true) { | 222 while (true) { |
220 // Get parent node and next input's index from stack's top. | 223 if (input_index == 0) { |
221 Node *n = nstack_top_n; | |
222 uint i = nstack_top_i; | |
223 | |
224 if (i == 0) { | |
225 // Fixup some control. Constants without control get attached | 224 // Fixup some control. Constants without control get attached |
226 // to root and nodes that use is_block_proj() nodes should be attached | 225 // to root and nodes that use is_block_proj() nodes should be attached |
227 // to the region that starts their block. | 226 // to the region that starts their block. |
228 const Node *in0 = n->in(0); | 227 const Node* control_input = parent_node->in(0); |
229 if (in0 != NULL) { // Control-dependent? | 228 if (control_input != NULL) { |
230 replace_block_proj_ctrl(n); | 229 replace_block_proj_ctrl(parent_node); |
231 } else { // n->in(0) == NULL | 230 } else { |
232 if (n->req() == 1) { // This guy is a constant with NO inputs? | 231 // Is a constant with NO inputs? |
233 n->set_req(0, _root); | 232 if (parent_node->req() == 1) { |
233 parent_node->set_req(0, _root); | |
234 } | 234 } |
235 } | 235 } |
236 } | 236 } |
237 | 237 |
238 // First, visit all inputs and force them to get a block. If an | 238 // First, visit all inputs and force them to get a block. If an |
239 // input is already in a block we quit following inputs (to avoid | 239 // input is already in a block we quit following inputs (to avoid |
240 // cycles). Instead we put that Node on a worklist to be handled | 240 // cycles). Instead we put that Node on a worklist to be handled |
241 // later (since IT'S inputs may not have a block yet). | 241 // later (since IT'S inputs may not have a block yet). |
242 bool done = true; // Assume all n's inputs will be processed | 242 |
243 while (i < n->len()) { // For all inputs | 243 // Assume all n's inputs will be processed |
244 Node *in = n->in(i); // Get input | 244 bool done = true; |
245 ++i; | 245 |
246 if (in == NULL) continue; // Ignore NULL, missing inputs | 246 while (input_index < parent_node->len()) { |
247 Node* in = parent_node->in(input_index++); | |
248 if (in == NULL) { | |
249 continue; | |
250 } | |
251 | |
247 int is_visited = visited.test_set(in->_idx); | 252 int is_visited = visited.test_set(in->_idx); |
248 if (!has_block(in)) { // Missing block selection? | 253 if (!has_block(in)) { |
249 if (is_visited) { | 254 if (is_visited) { |
250 // assert( !visited.test(in->_idx), "did not schedule early" ); | |
251 return false; | 255 return false; |
252 } | 256 } |
253 nstack.push(n, i); // Save parent node and next input's index. | 257 // Save parent node and next input's index. |
254 nstack_top_n = in; // Process current input now. | 258 nstack.push(parent_node, input_index); |
255 nstack_top_i = 0; | 259 // Process current input now. |
256 done = false; // Not all n's inputs processed. | 260 parent_node = in; |
257 break; // continue while_nstack_nonempty; | 261 input_index = 0; |
258 } else if (!is_visited) { // Input not yet visited? | 262 // Not all n's inputs processed. |
259 roots.push(in); // Visit this guy later, using worklist | 263 done = false; |
264 break; | |
265 } else if (!is_visited) { | |
266 // Visit this guy later, using worklist | |
267 roots.push(in); | |
260 } | 268 } |
261 } | 269 } |
270 | |
262 if (done) { | 271 if (done) { |
263 // All of n's inputs have been processed, complete post-processing. | 272 // All of n's inputs have been processed, complete post-processing. |
264 | 273 |
265 // Some instructions are pinned into a block. These include Region, | 274 // Some instructions are pinned into a block. These include Region, |
266 // Phi, Start, Return, and other control-dependent instructions and | 275 // Phi, Start, Return, and other control-dependent instructions and |
267 // any projections which depend on them. | 276 // any projections which depend on them. |
268 if (!n->pinned()) { | 277 if (!parent_node->pinned()) { |
269 // Set earliest legal block. | 278 // Set earliest legal block. |
270 map_node_to_block(n, find_deepest_input(n, this)); | 279 Block* earliest_block = find_deepest_input(parent_node, this); |
280 map_node_to_block(parent_node, earliest_block); | |
271 } else { | 281 } else { |
272 assert(get_block_for_node(n) == get_block_for_node(n->in(0)), "Pinned Node should be at the same block as its control edge"); | 282 assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge"); |
273 } | 283 } |
274 | 284 |
275 if (nstack.is_empty()) { | 285 if (nstack.is_empty()) { |
276 // Finished all nodes on stack. | 286 // Finished all nodes on stack. |
277 // Process next node on the worklist 'roots'. | 287 // Process next node on the worklist 'roots'. |
278 break; | 288 break; |
279 } | 289 } |
280 // Get saved parent node and next input's index. | 290 // Get saved parent node and next input's index. |
281 nstack_top_n = nstack.node(); | 291 parent_node = nstack.node(); |
282 nstack_top_i = nstack.index(); | 292 input_index = nstack.index(); |
283 nstack.pop(); | 293 nstack.pop(); |
284 } // if (done) | 294 } |
285 } // while (true) | 295 } |
286 } // while (roots.size() != 0) | 296 } |
287 return true; | 297 return true; |
288 } | 298 } |
289 | 299 |
290 //------------------------------dom_lca---------------------------------------- | 300 //------------------------------dom_lca---------------------------------------- |
291 // Find least common ancestor in dominator tree | 301 // Find least common ancestor in dominator tree |
845 return self; | 855 return self; |
846 } | 856 } |
847 | 857 |
848 //------------------------------ComputeLatenciesBackwards---------------------- | 858 //------------------------------ComputeLatenciesBackwards---------------------- |
849 // Compute the latency of all the instructions. | 859 // Compute the latency of all the instructions. |
850 void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) { | 860 void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) { |
851 #ifndef PRODUCT | 861 #ifndef PRODUCT |
852 if (trace_opto_pipelining()) | 862 if (trace_opto_pipelining()) |
853 tty->print("\n#---- ComputeLatenciesBackwards ----\n"); | 863 tty->print("\n#---- ComputeLatenciesBackwards ----\n"); |
854 #endif | 864 #endif |
855 | 865 |
868 // a number that increases as we approach the beginning of the routine. | 878 // a number that increases as we approach the beginning of the routine. |
869 void PhaseCFG::partial_latency_of_defs(Node *n) { | 879 void PhaseCFG::partial_latency_of_defs(Node *n) { |
870 // Set the latency for this instruction | 880 // Set the latency for this instruction |
871 #ifndef PRODUCT | 881 #ifndef PRODUCT |
872 if (trace_opto_pipelining()) { | 882 if (trace_opto_pipelining()) { |
873 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", | 883 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); |
874 n->_idx, _node_latency->at_grow(n->_idx)); | |
875 dump(); | 884 dump(); |
876 } | 885 } |
877 #endif | 886 #endif |
878 | 887 |
879 if (n->is_Proj()) | 888 if (n->is_Proj()) { |
880 n = n->in(0); | 889 n = n->in(0); |
881 | 890 } |
882 if (n->is_Root()) | 891 |
892 if (n->is_Root()) { | |
883 return; | 893 return; |
894 } | |
884 | 895 |
885 uint nlen = n->len(); | 896 uint nlen = n->len(); |
886 uint use_latency = _node_latency->at_grow(n->_idx); | 897 uint use_latency = get_latency_for_node(n); |
887 uint use_pre_order = get_block_for_node(n)->_pre_order; | 898 uint use_pre_order = get_block_for_node(n)->_pre_order; |
888 | 899 |
889 for ( uint j=0; j<nlen; j++ ) { | 900 for (uint j = 0; j < nlen; j++) { |
890 Node *def = n->in(j); | 901 Node *def = n->in(j); |
891 | 902 |
892 if (!def || def == n) | 903 if (!def || def == n) { |
893 continue; | 904 continue; |
905 } | |
894 | 906 |
895 // Walk backwards thru projections | 907 // Walk backwards thru projections |
896 if (def->is_Proj()) | 908 if (def->is_Proj()) { |
897 def = def->in(0); | 909 def = def->in(0); |
910 } | |
898 | 911 |
899 #ifndef PRODUCT | 912 #ifndef PRODUCT |
900 if (trace_opto_pipelining()) { | 913 if (trace_opto_pipelining()) { |
901 tty->print("# in(%2d): ", j); | 914 tty->print("# in(%2d): ", j); |
902 def->dump(); | 915 def->dump(); |
905 | 918 |
906 // If the defining block is not known, assume it is ok | 919 // If the defining block is not known, assume it is ok |
907 Block *def_block = get_block_for_node(def); | 920 Block *def_block = get_block_for_node(def); |
908 uint def_pre_order = def_block ? def_block->_pre_order : 0; | 921 uint def_pre_order = def_block ? def_block->_pre_order : 0; |
909 | 922 |
910 if ( (use_pre_order < def_pre_order) || | 923 if ((use_pre_order < def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) { |
911 (use_pre_order == def_pre_order && n->is_Phi()) ) | |
912 continue; | 924 continue; |
925 } | |
913 | 926 |
914 uint delta_latency = n->latency(j); | 927 uint delta_latency = n->latency(j); |
915 uint current_latency = delta_latency + use_latency; | 928 uint current_latency = delta_latency + use_latency; |
916 | 929 |
917 if (_node_latency->at_grow(def->_idx) < current_latency) { | 930 if (get_latency_for_node(def) < current_latency) { |
918 _node_latency->at_put_grow(def->_idx, current_latency); | 931 set_latency_for_node(def, current_latency); |
919 } | 932 } |
920 | 933 |
921 #ifndef PRODUCT | 934 #ifndef PRODUCT |
922 if (trace_opto_pipelining()) { | 935 if (trace_opto_pipelining()) { |
923 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", | 936 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def)); |
924 use_latency, j, delta_latency, current_latency, def->_idx, | |
925 _node_latency->at_grow(def->_idx)); | |
926 } | 937 } |
927 #endif | 938 #endif |
928 } | 939 } |
929 } | 940 } |
930 | 941 |
955 | 966 |
956 if (use_pre_order == def_pre_order && use->is_Phi()) | 967 if (use_pre_order == def_pre_order && use->is_Phi()) |
957 return 0; | 968 return 0; |
958 | 969 |
959 uint nlen = use->len(); | 970 uint nlen = use->len(); |
960 uint nl = _node_latency->at_grow(use->_idx); | 971 uint nl = get_latency_for_node(use); |
961 | 972 |
962 for ( uint j=0; j<nlen; j++ ) { | 973 for ( uint j=0; j<nlen; j++ ) { |
963 if (use->in(j) == n) { | 974 if (use->in(j) == n) { |
964 // Change this if we want local latencies | 975 // Change this if we want local latencies |
965 uint ul = use->latency(j); | 976 uint ul = use->latency(j); |
990 // routine. | 1001 // routine. |
991 void PhaseCFG::latency_from_uses(Node *n) { | 1002 void PhaseCFG::latency_from_uses(Node *n) { |
992 // Set the latency for this instruction | 1003 // Set the latency for this instruction |
993 #ifndef PRODUCT | 1004 #ifndef PRODUCT |
994 if (trace_opto_pipelining()) { | 1005 if (trace_opto_pipelining()) { |
995 tty->print("# latency_from_outputs: node_latency[%d] = %d for node", | 1006 tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); |
996 n->_idx, _node_latency->at_grow(n->_idx)); | |
997 dump(); | 1007 dump(); |
998 } | 1008 } |
999 #endif | 1009 #endif |
1000 uint latency=0; | 1010 uint latency=0; |
1001 const Node *def = n->is_Proj() ? n->in(0): n; | 1011 const Node *def = n->is_Proj() ? n->in(0): n; |
1004 uint l = latency_from_use(n, def, n->fast_out(i)); | 1014 uint l = latency_from_use(n, def, n->fast_out(i)); |
1005 | 1015 |
1006 if (latency < l) latency = l; | 1016 if (latency < l) latency = l; |
1007 } | 1017 } |
1008 | 1018 |
1009 _node_latency->at_put_grow(n->_idx, latency); | 1019 set_latency_for_node(n, latency); |
1010 } | 1020 } |
1011 | 1021 |
1012 //------------------------------hoist_to_cheaper_block------------------------- | 1022 //------------------------------hoist_to_cheaper_block------------------------- |
1013 // Pick a block for node self, between early and LCA, that is a cheaper | 1023 // Pick a block for node self, between early and LCA, that is a cheaper |
1014 // alternative to LCA. | 1024 // alternative to LCA. |
1015 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { | 1025 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { |
1016 const double delta = 1+PROB_UNLIKELY_MAG(4); | 1026 const double delta = 1+PROB_UNLIKELY_MAG(4); |
1017 Block* least = LCA; | 1027 Block* least = LCA; |
1018 double least_freq = least->_freq; | 1028 double least_freq = least->_freq; |
1019 uint target = _node_latency->at_grow(self->_idx); | 1029 uint target = get_latency_for_node(self); |
1020 uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx); | 1030 uint start_latency = get_latency_for_node(LCA->_nodes[0]); |
1021 uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx); | 1031 uint end_latency = get_latency_for_node(LCA->_nodes[LCA->end_idx()]); |
1022 bool in_latency = (target <= start_latency); | 1032 bool in_latency = (target <= start_latency); |
1023 const Block* root_block = get_block_for_node(_root); | 1033 const Block* root_block = get_block_for_node(_root); |
1024 | 1034 |
1025 // Turn off latency scheduling if scheduling is just plain off | 1035 // Turn off latency scheduling if scheduling is just plain off |
1026 if (!C->do_scheduling()) | 1036 if (!C->do_scheduling()) |
1033 if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty()) | 1043 if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty()) |
1034 in_latency = true; | 1044 in_latency = true; |
1035 | 1045 |
1036 #ifndef PRODUCT | 1046 #ifndef PRODUCT |
1037 if (trace_opto_pipelining()) { | 1047 if (trace_opto_pipelining()) { |
1038 tty->print("# Find cheaper block for latency %d: ", | 1048 tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self)); |
1039 _node_latency->at_grow(self->_idx)); | |
1040 self->dump(); | 1049 self->dump(); |
1041 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | 1050 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", |
1042 LCA->_pre_order, | 1051 LCA->_pre_order, |
1043 LCA->_nodes[0]->_idx, | 1052 LCA->_nodes[0]->_idx, |
1044 start_latency, | 1053 start_latency, |
1063 | 1072 |
1064 // Don't hoist machine instructions to the root basic block | 1073 // Don't hoist machine instructions to the root basic block |
1065 if (mach && LCA == root_block) | 1074 if (mach && LCA == root_block) |
1066 break; | 1075 break; |
1067 | 1076 |
1068 uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx); | 1077 uint start_lat = get_latency_for_node(LCA->_nodes[0]); |
1069 uint end_idx = LCA->end_idx(); | 1078 uint end_idx = LCA->end_idx(); |
1070 uint end_lat = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx); | 1079 uint end_lat = get_latency_for_node(LCA->_nodes[end_idx]); |
1071 double LCA_freq = LCA->_freq; | 1080 double LCA_freq = LCA->_freq; |
1072 #ifndef PRODUCT | 1081 #ifndef PRODUCT |
1073 if (trace_opto_pipelining()) { | 1082 if (trace_opto_pipelining()) { |
1074 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | 1083 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", |
1075 LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq); | 1084 LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq); |
1107 #ifndef PRODUCT | 1116 #ifndef PRODUCT |
1108 if (trace_opto_pipelining()) { | 1117 if (trace_opto_pipelining()) { |
1109 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); | 1118 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); |
1110 } | 1119 } |
1111 #endif | 1120 #endif |
1112 _node_latency->at_put_grow(self->_idx, end_latency); | 1121 set_latency_for_node(self, end_latency); |
1113 partial_latency_of_defs(self); | 1122 partial_latency_of_defs(self); |
1114 } | 1123 } |
1115 | 1124 |
1116 return least; | 1125 return least; |
1117 } | 1126 } |
1253 } // Loop until all nodes have been visited | 1262 } // Loop until all nodes have been visited |
1254 | 1263 |
1255 } // end ScheduleLate | 1264 } // end ScheduleLate |
1256 | 1265 |
1257 //------------------------------GlobalCodeMotion------------------------------- | 1266 //------------------------------GlobalCodeMotion------------------------------- |
1258 void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) { | 1267 void PhaseCFG::global_code_motion() { |
1259 ResourceMark rm; | 1268 ResourceMark rm; |
1260 | 1269 |
1261 #ifndef PRODUCT | 1270 #ifndef PRODUCT |
1262 if (trace_opto_pipelining()) { | 1271 if (trace_opto_pipelining()) { |
1263 tty->print("\n---- Start GlobalCodeMotion ----\n"); | 1272 tty->print("\n---- Start GlobalCodeMotion ----\n"); |
1264 } | 1273 } |
1265 #endif | 1274 #endif |
1266 | 1275 |
1267 // Initialize the node to block mapping for things on the proj_list | 1276 // Initialize the node to block mapping for things on the proj_list |
1268 for (uint i = 0; i < proj_list.size(); i++) { | 1277 for (uint i = 0; i < _matcher.number_of_projections(); i++) { |
1269 unmap_node_from_block(proj_list[i]); | 1278 unmap_node_from_block(_matcher.get_projection(i)); |
1270 } | 1279 } |
1271 | 1280 |
1272 // Set the basic block for Nodes pinned into blocks | 1281 // Set the basic block for Nodes pinned into blocks |
1273 Arena *a = Thread::current()->resource_area(); | 1282 Arena* arena = Thread::current()->resource_area(); |
1274 VectorSet visited(a); | 1283 VectorSet visited(arena); |
1275 schedule_pinned_nodes( visited ); | 1284 schedule_pinned_nodes(visited); |
1276 | 1285 |
1277 // Find the earliest Block any instruction can be placed in. Some | 1286 // Find the earliest Block any instruction can be placed in. Some |
1278 // instructions are pinned into Blocks. Unpinned instructions can | 1287 // instructions are pinned into Blocks. Unpinned instructions can |
1279 // appear in last block in which all their inputs occur. | 1288 // appear in last block in which all their inputs occur. |
1280 visited.Clear(); | 1289 visited.Clear(); |
1281 Node_List stack(a); | 1290 Node_List stack(arena); |
1282 stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list | 1291 // Pre-grow the list |
1292 stack.map((C->unique() >> 1) + 16, NULL); | |
1283 if (!schedule_early(visited, stack)) { | 1293 if (!schedule_early(visited, stack)) { |
1284 // Bailout without retry | 1294 // Bailout without retry |
1285 C->record_method_not_compilable("early schedule failed"); | 1295 C->record_method_not_compilable("early schedule failed"); |
1286 return; | 1296 return; |
1287 } | 1297 } |
1288 | 1298 |
1289 // Build Def-Use edges. | 1299 // Build Def-Use edges. |
1290 proj_list.push(_root); // Add real root as another root | |
1291 proj_list.pop(); | |
1292 | |
1293 // Compute the latency information (via backwards walk) for all the | 1300 // Compute the latency information (via backwards walk) for all the |
1294 // instructions in the graph | 1301 // instructions in the graph |
1295 _node_latency = new GrowableArray<uint>(); // resource_area allocation | 1302 _node_latency = new GrowableArray<uint>(); // resource_area allocation |
1296 | 1303 |
1297 if( C->do_scheduling() ) | 1304 if (C->do_scheduling()) { |
1298 ComputeLatenciesBackwards(visited, stack); | 1305 compute_latencies_backwards(visited, stack); |
1306 } | |
1299 | 1307 |
1300 // Now schedule all codes as LATE as possible. This is the LCA in the | 1308 // Now schedule all codes as LATE as possible. This is the LCA in the |
1301 // dominator tree of all USES of a value. Pick the block with the least | 1309 // dominator tree of all USES of a value. Pick the block with the least |
1302 // loop nesting depth that is lowest in the dominator tree. | 1310 // loop nesting depth that is lowest in the dominator tree. |
1303 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) | 1311 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) |
1304 schedule_late(visited, stack); | 1312 schedule_late(visited, stack); |
1305 if( C->failing() ) { | 1313 if (C->failing()) { |
1306 // schedule_late fails only when graph is incorrect. | 1314 // schedule_late fails only when graph is incorrect. |
1307 assert(!VerifyGraphEdges, "verification should have failed"); | 1315 assert(!VerifyGraphEdges, "verification should have failed"); |
1308 return; | 1316 return; |
1309 } | 1317 } |
1310 | |
1311 unique = C->unique(); | |
1312 | 1318 |
1313 #ifndef PRODUCT | 1319 #ifndef PRODUCT |
1314 if (trace_opto_pipelining()) { | 1320 if (trace_opto_pipelining()) { |
1315 tty->print("\n---- Detect implicit null checks ----\n"); | 1321 tty->print("\n---- Detect implicit null checks ----\n"); |
1316 } | 1322 } |
1330 allowed_reasons |= nth_bit(reason); | 1336 allowed_reasons |= nth_bit(reason); |
1331 } | 1337 } |
1332 // By reversing the loop direction we get a very minor gain on mpegaudio. | 1338 // By reversing the loop direction we get a very minor gain on mpegaudio. |
1333 // Feel free to revert to a forward loop for clarity. | 1339 // Feel free to revert to a forward loop for clarity. |
1334 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { | 1340 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { |
1335 for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) { | 1341 for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) { |
1336 Node *proj = matcher._null_check_tests[i ]; | 1342 Node* proj = _matcher._null_check_tests[i]; |
1337 Node *val = matcher._null_check_tests[i+1]; | 1343 Node* val = _matcher._null_check_tests[i + 1]; |
1338 get_block_for_node(proj)->implicit_null_check(this, proj, val, allowed_reasons); | 1344 Block* block = get_block_for_node(proj); |
1345 block->implicit_null_check(this, proj, val, allowed_reasons); | |
1339 // The implicit_null_check will only perform the transformation | 1346 // The implicit_null_check will only perform the transformation |
1340 // if the null branch is truly uncommon, *and* it leads to an | 1347 // if the null branch is truly uncommon, *and* it leads to an |
1341 // uncommon trap. Combined with the too_many_traps guards | 1348 // uncommon trap. Combined with the too_many_traps guards |
1342 // above, this prevents SEGV storms reported in 6366351, | 1349 // above, this prevents SEGV storms reported in 6366351, |
1343 // by recompiling offending methods without this optimization. | 1350 // by recompiling offending methods without this optimization. |
1350 } | 1357 } |
1351 #endif | 1358 #endif |
1352 | 1359 |
1353 // Schedule locally. Right now a simple topological sort. | 1360 // Schedule locally. Right now a simple topological sort. |
1354 // Later, do a real latency aware scheduler. | 1361 // Later, do a real latency aware scheduler. |
1355 uint max_idx = C->unique(); | 1362 GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1); |
1356 GrowableArray<int> ready_cnt(max_idx, max_idx, -1); | |
1357 visited.Clear(); | 1363 visited.Clear(); |
1358 for (uint i = 0; i < _num_blocks; i++) { | 1364 for (uint i = 0; i < number_of_blocks(); i++) { |
1359 if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) { | 1365 Block* block = get_block(i); |
1366 if (!block->schedule_local(this, _matcher, ready_cnt, visited)) { | |
1360 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { | 1367 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { |
1361 C->record_method_not_compilable("local schedule failed"); | 1368 C->record_method_not_compilable("local schedule failed"); |
1362 } | 1369 } |
1363 return; | 1370 return; |
1364 } | 1371 } |
1365 } | 1372 } |
1366 | 1373 |
1367 // If we inserted any instructions between a Call and his CatchNode, | 1374 // If we inserted any instructions between a Call and his CatchNode, |
1368 // clone the instructions on all paths below the Catch. | 1375 // clone the instructions on all paths below the Catch. |
1369 for (uint i = 0; i < _num_blocks; i++) { | 1376 for (uint i = 0; i < number_of_blocks(); i++) { |
1370 _blocks[i]->call_catch_cleanup(this, C); | 1377 Block* block = get_block(i); |
1378 block->call_catch_cleanup(this, C); | |
1371 } | 1379 } |
1372 | 1380 |
1373 #ifndef PRODUCT | 1381 #ifndef PRODUCT |
1374 if (trace_opto_pipelining()) { | 1382 if (trace_opto_pipelining()) { |
1375 tty->print("\n---- After GlobalCodeMotion ----\n"); | 1383 tty->print("\n---- After GlobalCodeMotion ----\n"); |
1376 for (uint i = 0; i < _num_blocks; i++) { | 1384 for (uint i = 0; i < number_of_blocks(); i++) { |
1377 _blocks[i]->dump(); | 1385 Block* block = get_block(i); |
1386 block->dump(); | |
1378 } | 1387 } |
1379 } | 1388 } |
1380 #endif | 1389 #endif |
1381 // Dead. | 1390 // Dead. |
1382 _node_latency = (GrowableArray<uint> *)0xdeadbeef; | 1391 _node_latency = (GrowableArray<uint> *)0xdeadbeef; |
1383 } | 1392 } |
1384 | 1393 |
1394 bool PhaseCFG::do_global_code_motion() { | |
1395 | |
1396 build_dominator_tree(); | |
1397 if (C->failing()) { | |
1398 return false; | |
1399 } | |
1400 | |
1401 NOT_PRODUCT( C->verify_graph_edges(); ) | |
1402 | |
1403 estimate_block_frequency(); | |
1404 | |
1405 global_code_motion(); | |
1406 | |
1407 if (C->failing()) { | |
1408 return false; | |
1409 } | |
1410 | |
1411 return true; | |
1412 } | |
1385 | 1413 |
1386 //------------------------------Estimate_Block_Frequency----------------------- | 1414 //------------------------------Estimate_Block_Frequency----------------------- |
1387 // Estimate block frequencies based on IfNode probabilities. | 1415 // Estimate block frequencies based on IfNode probabilities. |
1388 void PhaseCFG::Estimate_Block_Frequency() { | 1416 void PhaseCFG::estimate_block_frequency() { |
1389 | 1417 |
1390 // Force conditional branches leading to uncommon traps to be unlikely, | 1418 // Force conditional branches leading to uncommon traps to be unlikely, |
1391 // not because we get to the uncommon_trap with less relative frequency, | 1419 // not because we get to the uncommon_trap with less relative frequency, |
1392 // but because an uncommon_trap typically causes a deopt, so we only get | 1420 // but because an uncommon_trap typically causes a deopt, so we only get |
1393 // there once. | 1421 // there once. |
1394 if (C->do_freq_based_layout()) { | 1422 if (C->do_freq_based_layout()) { |
1395 Block_List worklist; | 1423 Block_List worklist; |
1396 Block* root_blk = _blocks[0]; | 1424 Block* root_blk = get_block(0); |
1397 for (uint i = 1; i < root_blk->num_preds(); i++) { | 1425 for (uint i = 1; i < root_blk->num_preds(); i++) { |
1398 Block *pb = get_block_for_node(root_blk->pred(i)); | 1426 Block *pb = get_block_for_node(root_blk->pred(i)); |
1399 if (pb->has_uncommon_code()) { | 1427 if (pb->has_uncommon_code()) { |
1400 worklist.push(pb); | 1428 worklist.push(pb); |
1401 } | 1429 } |
1402 } | 1430 } |
1403 while (worklist.size() > 0) { | 1431 while (worklist.size() > 0) { |
1404 Block* uct = worklist.pop(); | 1432 Block* uct = worklist.pop(); |
1405 if (uct == _broot) continue; | 1433 if (uct == get_root_block()) { |
1434 continue; | |
1435 } | |
1406 for (uint i = 1; i < uct->num_preds(); i++) { | 1436 for (uint i = 1; i < uct->num_preds(); i++) { |
1407 Block *pb = get_block_for_node(uct->pred(i)); | 1437 Block *pb = get_block_for_node(uct->pred(i)); |
1408 if (pb->_num_succs == 1) { | 1438 if (pb->_num_succs == 1) { |
1409 worklist.push(pb); | 1439 worklist.push(pb); |
1410 } else if (pb->num_fall_throughs() == 2) { | 1440 } else if (pb->num_fall_throughs() == 2) { |
1424 // Adjust all frequencies to be relative to a single method entry | 1454 // Adjust all frequencies to be relative to a single method entry |
1425 _root_loop->_freq = 1.0; | 1455 _root_loop->_freq = 1.0; |
1426 _root_loop->scale_freq(); | 1456 _root_loop->scale_freq(); |
1427 | 1457 |
1428 // Save outmost loop frequency for LRG frequency threshold | 1458 // Save outmost loop frequency for LRG frequency threshold |
1429 _outer_loop_freq = _root_loop->outer_loop_freq(); | 1459 _outer_loop_frequency = _root_loop->outer_loop_freq(); |
1430 | 1460 |
1431 // force paths ending at uncommon traps to be infrequent | 1461 // force paths ending at uncommon traps to be infrequent |
1432 if (!C->do_freq_based_layout()) { | 1462 if (!C->do_freq_based_layout()) { |
1433 Block_List worklist; | 1463 Block_List worklist; |
1434 Block* root_blk = _blocks[0]; | 1464 Block* root_blk = get_block(0); |
1435 for (uint i = 1; i < root_blk->num_preds(); i++) { | 1465 for (uint i = 1; i < root_blk->num_preds(); i++) { |
1436 Block *pb = get_block_for_node(root_blk->pred(i)); | 1466 Block *pb = get_block_for_node(root_blk->pred(i)); |
1437 if (pb->has_uncommon_code()) { | 1467 if (pb->has_uncommon_code()) { |
1438 worklist.push(pb); | 1468 worklist.push(pb); |
1439 } | 1469 } |
1449 } | 1479 } |
1450 } | 1480 } |
1451 } | 1481 } |
1452 | 1482 |
1453 #ifdef ASSERT | 1483 #ifdef ASSERT |
1454 for (uint i = 0; i < _num_blocks; i++ ) { | 1484 for (uint i = 0; i < number_of_blocks(); i++) { |
1455 Block *b = _blocks[i]; | 1485 Block* b = get_block(i); |
1456 assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); | 1486 assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); |
1457 } | 1487 } |
1458 #endif | 1488 #endif |
1459 | 1489 |
1460 #ifndef PRODUCT | 1490 #ifndef PRODUCT |
1474 //----------------------------create_loop_tree-------------------------------- | 1504 //----------------------------create_loop_tree-------------------------------- |
1475 // Create a loop tree from the CFG | 1505 // Create a loop tree from the CFG |
1476 CFGLoop* PhaseCFG::create_loop_tree() { | 1506 CFGLoop* PhaseCFG::create_loop_tree() { |
1477 | 1507 |
1478 #ifdef ASSERT | 1508 #ifdef ASSERT |
1479 assert( _blocks[0] == _broot, "" ); | 1509 assert(get_block(0) == get_root_block(), "first block should be root block"); |
1480 for (uint i = 0; i < _num_blocks; i++ ) { | 1510 for (uint i = 0; i < number_of_blocks(); i++) { |
1481 Block *b = _blocks[i]; | 1511 Block* block = get_block(i); |
1482 // Check that _loop field are clear...we could clear them if not. | 1512 // Check that _loop field are clear...we could clear them if not. |
1483 assert(b->_loop == NULL, "clear _loop expected"); | 1513 assert(block->_loop == NULL, "clear _loop expected"); |
1484 // Sanity check that the RPO numbering is reflected in the _blocks array. | 1514 // Sanity check that the RPO numbering is reflected in the _blocks array. |
1485 // It doesn't have to be for the loop tree to be built, but if it is not, | 1515 // It doesn't have to be for the loop tree to be built, but if it is not, |
1486 // then the blocks have been reordered since dom graph building...which | 1516 // then the blocks have been reordered since dom graph building...which |
1487 // may question the RPO numbering | 1517 // may question the RPO numbering |
1488 assert(b->_rpo == i, "unexpected reverse post order number"); | 1518 assert(block->_rpo == i, "unexpected reverse post order number"); |
1489 } | 1519 } |
1490 #endif | 1520 #endif |
1491 | 1521 |
1492 int idct = 0; | 1522 int idct = 0; |
1493 CFGLoop* root_loop = new CFGLoop(idct++); | 1523 CFGLoop* root_loop = new CFGLoop(idct++); |
1494 | 1524 |
1495 Block_List worklist; | 1525 Block_List worklist; |
1496 | 1526 |
1497 // Assign blocks to loops | 1527 // Assign blocks to loops |
1498 for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block | 1528 for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block |
1499 Block *b = _blocks[i]; | 1529 Block* block = get_block(i); |
1500 | 1530 |
1501 if (b->head()->is_Loop()) { | 1531 if (block->head()->is_Loop()) { |
1502 Block* loop_head = b; | 1532 Block* loop_head = block; |
1503 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); | 1533 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); |
1504 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); | 1534 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); |
1505 Block* tail = get_block_for_node(tail_n); | 1535 Block* tail = get_block_for_node(tail_n); |
1506 | 1536 |
1507 // Defensively filter out Loop nodes for non-single-entry loops. | 1537 // Defensively filter out Loop nodes for non-single-entry loops. |
1531 } | 1561 } |
1532 } | 1562 } |
1533 | 1563 |
1534 // Create a member list for each loop consisting | 1564 // Create a member list for each loop consisting |
1535 // of both blocks and (immediate child) loops. | 1565 // of both blocks and (immediate child) loops. |
1536 for (uint i = 0; i < _num_blocks; i++) { | 1566 for (uint i = 0; i < number_of_blocks(); i++) { |
1537 Block *b = _blocks[i]; | 1567 Block* block = get_block(i); |
1538 CFGLoop* lp = b->_loop; | 1568 CFGLoop* lp = block->_loop; |
1539 if (lp == NULL) { | 1569 if (lp == NULL) { |
1540 // Not assigned to a loop. Add it to the method's pseudo loop. | 1570 // Not assigned to a loop. Add it to the method's pseudo loop. |
1541 b->_loop = root_loop; | 1571 block->_loop = root_loop; |
1542 lp = root_loop; | 1572 lp = root_loop; |
1543 } | 1573 } |
1544 if (lp == root_loop || b != lp->head()) { // loop heads are already members | 1574 if (lp == root_loop || block != lp->head()) { // loop heads are already members |
1545 lp->add_member(b); | 1575 lp->add_member(block); |
1546 } | 1576 } |
1547 if (lp != root_loop) { | 1577 if (lp != root_loop) { |
1548 if (lp->parent() == NULL) { | 1578 if (lp->parent() == NULL) { |
1549 // Not a nested loop. Make it a child of the method's pseudo loop. | 1579 // Not a nested loop. Make it a child of the method's pseudo loop. |
1550 root_loop->add_nested_loop(lp); | 1580 root_loop->add_nested_loop(lp); |
1551 } | 1581 } |
1552 if (b == lp->head()) { | 1582 if (block == lp->head()) { |
1553 // Add nested loop to member list of parent loop. | 1583 // Add nested loop to member list of parent loop. |
1554 lp->parent()->add_member(lp); | 1584 lp->parent()->add_member(lp); |
1555 } | 1585 } |
1556 } | 1586 } |
1557 } | 1587 } |