comparison src/share/vm/opto/gcm.cpp @ 12071:adb9a7d94cb5

8023003: Cleanup the public interface to PhaseCFG Summary: public methods that don't need to be public should be private. Reviewed-by: kvn, twisti
author adlertz
date Fri, 16 Aug 2013 10:23:55 +0200
parents d1034bd8cefc
children 650868c062a9 e2722a66aba7
comparison
equal deleted inserted replaced
12070:afbe18ae0905 12071:adb9a7d94cb5
119 } 119 }
120 120
121 121
122 //------------------------------schedule_pinned_nodes-------------------------- 122 //------------------------------schedule_pinned_nodes--------------------------
123 // Set the basic block for Nodes pinned into blocks 123 // Set the basic block for Nodes pinned into blocks
124 void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) { 124 void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) {
125 // Allocate node stack of size C->unique()+8 to avoid frequent realloc 125 // Allocate node stack of size C->unique()+8 to avoid frequent realloc
126 GrowableArray <Node *> spstack(C->unique()+8); 126 GrowableArray <Node *> spstack(C->unique() + 8);
127 spstack.push(_root); 127 spstack.push(_root);
128 while ( spstack.is_nonempty() ) { 128 while (spstack.is_nonempty()) {
129 Node *n = spstack.pop(); 129 Node* node = spstack.pop();
130 if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited 130 if (!visited.test_set(node->_idx)) { // Test node and flag it as visited
131 if( n->pinned() && !has_block(n)) { // Pinned? Nail it down! 131 if (node->pinned() && !has_block(node)) { // Pinned? Nail it down!
132 assert( n->in(0), "pinned Node must have Control" ); 132 assert(node->in(0), "pinned Node must have Control");
133 // Before setting block replace block_proj control edge 133 // Before setting block replace block_proj control edge
134 replace_block_proj_ctrl(n); 134 replace_block_proj_ctrl(node);
135 Node *input = n->in(0); 135 Node* input = node->in(0);
136 while (!input->is_block_start()) { 136 while (!input->is_block_start()) {
137 input = input->in(0); 137 input = input->in(0);
138 } 138 }
139 Block *b = get_block_for_node(input); // Basic block of controlling input 139 Block* block = get_block_for_node(input); // Basic block of controlling input
140 schedule_node_into_block(n, b); 140 schedule_node_into_block(node, block);
141 } 141 }
142 for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs 142
143 if( n->in(i) != NULL ) 143 // process all inputs that are non NULL
144 spstack.push(n->in(i)); 144 for (int i = node->req() - 1; i >= 0; --i) {
145 if (node->in(i) != NULL) {
146 spstack.push(node->in(i));
147 }
145 } 148 }
146 } 149 }
147 } 150 }
148 } 151 }
149 152
203 // Find the earliest Block any instruction can be placed in. Some instructions 206 // Find the earliest Block any instruction can be placed in. Some instructions
204 // are pinned into Blocks. Unpinned instructions can appear in last block in 207 // are pinned into Blocks. Unpinned instructions can appear in last block in
205 // which all their inputs occur. 208 // which all their inputs occur.
206 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { 209 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
207 // Allocate stack with enough space to avoid frequent realloc 210 // Allocate stack with enough space to avoid frequent realloc
208 Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats 211 Node_Stack nstack(roots.Size() + 8);
209 // roots.push(_root); _root will be processed among C->top() inputs 212 // _root will be processed among C->top() inputs
210 roots.push(C->top()); 213 roots.push(C->top());
211 visited.set(C->top()->_idx); 214 visited.set(C->top()->_idx);
212 215
213 while (roots.size() != 0) { 216 while (roots.size() != 0) {
214 // Use local variables nstack_top_n & nstack_top_i to cache values 217 // Use local variables nstack_top_n & nstack_top_i to cache values
215 // on stack's top. 218 // on stack's top.
216 Node *nstack_top_n = roots.pop(); 219 Node* parent_node = roots.pop();
217 uint nstack_top_i = 0; 220 uint input_index = 0;
218 //while_nstack_nonempty: 221
219 while (true) { 222 while (true) {
220 // Get parent node and next input's index from stack's top. 223 if (input_index == 0) {
221 Node *n = nstack_top_n;
222 uint i = nstack_top_i;
223
224 if (i == 0) {
225 // Fixup some control. Constants without control get attached 224 // Fixup some control. Constants without control get attached
226 // to root and nodes that use is_block_proj() nodes should be attached 225 // to root and nodes that use is_block_proj() nodes should be attached
227 // to the region that starts their block. 226 // to the region that starts their block.
228 const Node *in0 = n->in(0); 227 const Node* control_input = parent_node->in(0);
229 if (in0 != NULL) { // Control-dependent? 228 if (control_input != NULL) {
230 replace_block_proj_ctrl(n); 229 replace_block_proj_ctrl(parent_node);
231 } else { // n->in(0) == NULL 230 } else {
232 if (n->req() == 1) { // This guy is a constant with NO inputs? 231 // Is a constant with NO inputs?
233 n->set_req(0, _root); 232 if (parent_node->req() == 1) {
233 parent_node->set_req(0, _root);
234 } 234 }
235 } 235 }
236 } 236 }
237 237
238 // First, visit all inputs and force them to get a block. If an 238 // First, visit all inputs and force them to get a block. If an
239 // input is already in a block we quit following inputs (to avoid 239 // input is already in a block we quit following inputs (to avoid
240 // cycles). Instead we put that Node on a worklist to be handled 240 // cycles). Instead we put that Node on a worklist to be handled
241 // later (since IT'S inputs may not have a block yet). 241 // later (since IT'S inputs may not have a block yet).
242 bool done = true; // Assume all n's inputs will be processed 242
243 while (i < n->len()) { // For all inputs 243 // Assume all n's inputs will be processed
244 Node *in = n->in(i); // Get input 244 bool done = true;
245 ++i; 245
246 if (in == NULL) continue; // Ignore NULL, missing inputs 246 while (input_index < parent_node->len()) {
247 Node* in = parent_node->in(input_index++);
248 if (in == NULL) {
249 continue;
250 }
251
247 int is_visited = visited.test_set(in->_idx); 252 int is_visited = visited.test_set(in->_idx);
248 if (!has_block(in)) { // Missing block selection? 253 if (!has_block(in)) {
249 if (is_visited) { 254 if (is_visited) {
250 // assert( !visited.test(in->_idx), "did not schedule early" );
251 return false; 255 return false;
252 } 256 }
253 nstack.push(n, i); // Save parent node and next input's index. 257 // Save parent node and next input's index.
254 nstack_top_n = in; // Process current input now. 258 nstack.push(parent_node, input_index);
255 nstack_top_i = 0; 259 // Process current input now.
256 done = false; // Not all n's inputs processed. 260 parent_node = in;
257 break; // continue while_nstack_nonempty; 261 input_index = 0;
258 } else if (!is_visited) { // Input not yet visited? 262 // Not all n's inputs processed.
259 roots.push(in); // Visit this guy later, using worklist 263 done = false;
264 break;
265 } else if (!is_visited) {
266 // Visit this guy later, using worklist
267 roots.push(in);
260 } 268 }
261 } 269 }
270
262 if (done) { 271 if (done) {
263 // All of n's inputs have been processed, complete post-processing. 272 // All of n's inputs have been processed, complete post-processing.
264 273
265 // Some instructions are pinned into a block. These include Region, 274 // Some instructions are pinned into a block. These include Region,
266 // Phi, Start, Return, and other control-dependent instructions and 275 // Phi, Start, Return, and other control-dependent instructions and
267 // any projections which depend on them. 276 // any projections which depend on them.
268 if (!n->pinned()) { 277 if (!parent_node->pinned()) {
269 // Set earliest legal block. 278 // Set earliest legal block.
270 map_node_to_block(n, find_deepest_input(n, this)); 279 Block* earliest_block = find_deepest_input(parent_node, this);
280 map_node_to_block(parent_node, earliest_block);
271 } else { 281 } else {
272 assert(get_block_for_node(n) == get_block_for_node(n->in(0)), "Pinned Node should be at the same block as its control edge"); 282 assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge");
273 } 283 }
274 284
275 if (nstack.is_empty()) { 285 if (nstack.is_empty()) {
276 // Finished all nodes on stack. 286 // Finished all nodes on stack.
277 // Process next node on the worklist 'roots'. 287 // Process next node on the worklist 'roots'.
278 break; 288 break;
279 } 289 }
280 // Get saved parent node and next input's index. 290 // Get saved parent node and next input's index.
281 nstack_top_n = nstack.node(); 291 parent_node = nstack.node();
282 nstack_top_i = nstack.index(); 292 input_index = nstack.index();
283 nstack.pop(); 293 nstack.pop();
284 } // if (done) 294 }
285 } // while (true) 295 }
286 } // while (roots.size() != 0) 296 }
287 return true; 297 return true;
288 } 298 }
289 299
290 //------------------------------dom_lca---------------------------------------- 300 //------------------------------dom_lca----------------------------------------
291 // Find least common ancestor in dominator tree 301 // Find least common ancestor in dominator tree
845 return self; 855 return self;
846 } 856 }
847 857
848 //------------------------------ComputeLatenciesBackwards---------------------- 858 //------------------------------ComputeLatenciesBackwards----------------------
849 // Compute the latency of all the instructions. 859 // Compute the latency of all the instructions.
850 void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) { 860 void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) {
851 #ifndef PRODUCT 861 #ifndef PRODUCT
852 if (trace_opto_pipelining()) 862 if (trace_opto_pipelining())
853 tty->print("\n#---- ComputeLatenciesBackwards ----\n"); 863 tty->print("\n#---- ComputeLatenciesBackwards ----\n");
854 #endif 864 #endif
855 865
868 // a number that increases as we approach the beginning of the routine. 878 // a number that increases as we approach the beginning of the routine.
869 void PhaseCFG::partial_latency_of_defs(Node *n) { 879 void PhaseCFG::partial_latency_of_defs(Node *n) {
870 // Set the latency for this instruction 880 // Set the latency for this instruction
871 #ifndef PRODUCT 881 #ifndef PRODUCT
872 if (trace_opto_pipelining()) { 882 if (trace_opto_pipelining()) {
873 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", 883 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
874 n->_idx, _node_latency->at_grow(n->_idx));
875 dump(); 884 dump();
876 } 885 }
877 #endif 886 #endif
878 887
879 if (n->is_Proj()) 888 if (n->is_Proj()) {
880 n = n->in(0); 889 n = n->in(0);
881 890 }
882 if (n->is_Root()) 891
892 if (n->is_Root()) {
883 return; 893 return;
894 }
884 895
885 uint nlen = n->len(); 896 uint nlen = n->len();
886 uint use_latency = _node_latency->at_grow(n->_idx); 897 uint use_latency = get_latency_for_node(n);
887 uint use_pre_order = get_block_for_node(n)->_pre_order; 898 uint use_pre_order = get_block_for_node(n)->_pre_order;
888 899
889 for ( uint j=0; j<nlen; j++ ) { 900 for (uint j = 0; j < nlen; j++) {
890 Node *def = n->in(j); 901 Node *def = n->in(j);
891 902
892 if (!def || def == n) 903 if (!def || def == n) {
893 continue; 904 continue;
905 }
894 906
895 // Walk backwards thru projections 907 // Walk backwards thru projections
896 if (def->is_Proj()) 908 if (def->is_Proj()) {
897 def = def->in(0); 909 def = def->in(0);
910 }
898 911
899 #ifndef PRODUCT 912 #ifndef PRODUCT
900 if (trace_opto_pipelining()) { 913 if (trace_opto_pipelining()) {
901 tty->print("# in(%2d): ", j); 914 tty->print("# in(%2d): ", j);
902 def->dump(); 915 def->dump();
905 918
906 // If the defining block is not known, assume it is ok 919 // If the defining block is not known, assume it is ok
907 Block *def_block = get_block_for_node(def); 920 Block *def_block = get_block_for_node(def);
908 uint def_pre_order = def_block ? def_block->_pre_order : 0; 921 uint def_pre_order = def_block ? def_block->_pre_order : 0;
909 922
910 if ( (use_pre_order < def_pre_order) || 923 if ((use_pre_order < def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) {
911 (use_pre_order == def_pre_order && n->is_Phi()) )
912 continue; 924 continue;
925 }
913 926
914 uint delta_latency = n->latency(j); 927 uint delta_latency = n->latency(j);
915 uint current_latency = delta_latency + use_latency; 928 uint current_latency = delta_latency + use_latency;
916 929
917 if (_node_latency->at_grow(def->_idx) < current_latency) { 930 if (get_latency_for_node(def) < current_latency) {
918 _node_latency->at_put_grow(def->_idx, current_latency); 931 set_latency_for_node(def, current_latency);
919 } 932 }
920 933
921 #ifndef PRODUCT 934 #ifndef PRODUCT
922 if (trace_opto_pipelining()) { 935 if (trace_opto_pipelining()) {
923 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", 936 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def));
924 use_latency, j, delta_latency, current_latency, def->_idx,
925 _node_latency->at_grow(def->_idx));
926 } 937 }
927 #endif 938 #endif
928 } 939 }
929 } 940 }
930 941
955 966
956 if (use_pre_order == def_pre_order && use->is_Phi()) 967 if (use_pre_order == def_pre_order && use->is_Phi())
957 return 0; 968 return 0;
958 969
959 uint nlen = use->len(); 970 uint nlen = use->len();
960 uint nl = _node_latency->at_grow(use->_idx); 971 uint nl = get_latency_for_node(use);
961 972
962 for ( uint j=0; j<nlen; j++ ) { 973 for ( uint j=0; j<nlen; j++ ) {
963 if (use->in(j) == n) { 974 if (use->in(j) == n) {
964 // Change this if we want local latencies 975 // Change this if we want local latencies
965 uint ul = use->latency(j); 976 uint ul = use->latency(j);
990 // routine. 1001 // routine.
991 void PhaseCFG::latency_from_uses(Node *n) { 1002 void PhaseCFG::latency_from_uses(Node *n) {
992 // Set the latency for this instruction 1003 // Set the latency for this instruction
993 #ifndef PRODUCT 1004 #ifndef PRODUCT
994 if (trace_opto_pipelining()) { 1005 if (trace_opto_pipelining()) {
995 tty->print("# latency_from_outputs: node_latency[%d] = %d for node", 1006 tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
996 n->_idx, _node_latency->at_grow(n->_idx));
997 dump(); 1007 dump();
998 } 1008 }
999 #endif 1009 #endif
1000 uint latency=0; 1010 uint latency=0;
1001 const Node *def = n->is_Proj() ? n->in(0): n; 1011 const Node *def = n->is_Proj() ? n->in(0): n;
1004 uint l = latency_from_use(n, def, n->fast_out(i)); 1014 uint l = latency_from_use(n, def, n->fast_out(i));
1005 1015
1006 if (latency < l) latency = l; 1016 if (latency < l) latency = l;
1007 } 1017 }
1008 1018
1009 _node_latency->at_put_grow(n->_idx, latency); 1019 set_latency_for_node(n, latency);
1010 } 1020 }
1011 1021
1012 //------------------------------hoist_to_cheaper_block------------------------- 1022 //------------------------------hoist_to_cheaper_block-------------------------
1013 // Pick a block for node self, between early and LCA, that is a cheaper 1023 // Pick a block for node self, between early and LCA, that is a cheaper
1014 // alternative to LCA. 1024 // alternative to LCA.
1015 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { 1025 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
1016 const double delta = 1+PROB_UNLIKELY_MAG(4); 1026 const double delta = 1+PROB_UNLIKELY_MAG(4);
1017 Block* least = LCA; 1027 Block* least = LCA;
1018 double least_freq = least->_freq; 1028 double least_freq = least->_freq;
1019 uint target = _node_latency->at_grow(self->_idx); 1029 uint target = get_latency_for_node(self);
1020 uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx); 1030 uint start_latency = get_latency_for_node(LCA->_nodes[0]);
1021 uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx); 1031 uint end_latency = get_latency_for_node(LCA->_nodes[LCA->end_idx()]);
1022 bool in_latency = (target <= start_latency); 1032 bool in_latency = (target <= start_latency);
1023 const Block* root_block = get_block_for_node(_root); 1033 const Block* root_block = get_block_for_node(_root);
1024 1034
1025 // Turn off latency scheduling if scheduling is just plain off 1035 // Turn off latency scheduling if scheduling is just plain off
1026 if (!C->do_scheduling()) 1036 if (!C->do_scheduling())
1033 if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty()) 1043 if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty())
1034 in_latency = true; 1044 in_latency = true;
1035 1045
1036 #ifndef PRODUCT 1046 #ifndef PRODUCT
1037 if (trace_opto_pipelining()) { 1047 if (trace_opto_pipelining()) {
1038 tty->print("# Find cheaper block for latency %d: ", 1048 tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self));
1039 _node_latency->at_grow(self->_idx));
1040 self->dump(); 1049 self->dump();
1041 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", 1050 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1042 LCA->_pre_order, 1051 LCA->_pre_order,
1043 LCA->_nodes[0]->_idx, 1052 LCA->_nodes[0]->_idx,
1044 start_latency, 1053 start_latency,
1063 1072
1064 // Don't hoist machine instructions to the root basic block 1073 // Don't hoist machine instructions to the root basic block
1065 if (mach && LCA == root_block) 1074 if (mach && LCA == root_block)
1066 break; 1075 break;
1067 1076
1068 uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx); 1077 uint start_lat = get_latency_for_node(LCA->_nodes[0]);
1069 uint end_idx = LCA->end_idx(); 1078 uint end_idx = LCA->end_idx();
1070 uint end_lat = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx); 1079 uint end_lat = get_latency_for_node(LCA->_nodes[end_idx]);
1071 double LCA_freq = LCA->_freq; 1080 double LCA_freq = LCA->_freq;
1072 #ifndef PRODUCT 1081 #ifndef PRODUCT
1073 if (trace_opto_pipelining()) { 1082 if (trace_opto_pipelining()) {
1074 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", 1083 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1075 LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq); 1084 LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq);
1107 #ifndef PRODUCT 1116 #ifndef PRODUCT
1108 if (trace_opto_pipelining()) { 1117 if (trace_opto_pipelining()) {
1109 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); 1118 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
1110 } 1119 }
1111 #endif 1120 #endif
1112 _node_latency->at_put_grow(self->_idx, end_latency); 1121 set_latency_for_node(self, end_latency);
1113 partial_latency_of_defs(self); 1122 partial_latency_of_defs(self);
1114 } 1123 }
1115 1124
1116 return least; 1125 return least;
1117 } 1126 }
1253 } // Loop until all nodes have been visited 1262 } // Loop until all nodes have been visited
1254 1263
1255 } // end ScheduleLate 1264 } // end ScheduleLate
1256 1265
1257 //------------------------------GlobalCodeMotion------------------------------- 1266 //------------------------------GlobalCodeMotion-------------------------------
1258 void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) { 1267 void PhaseCFG::global_code_motion() {
1259 ResourceMark rm; 1268 ResourceMark rm;
1260 1269
1261 #ifndef PRODUCT 1270 #ifndef PRODUCT
1262 if (trace_opto_pipelining()) { 1271 if (trace_opto_pipelining()) {
1263 tty->print("\n---- Start GlobalCodeMotion ----\n"); 1272 tty->print("\n---- Start GlobalCodeMotion ----\n");
1264 } 1273 }
1265 #endif 1274 #endif
1266 1275
1267 // Initialize the node to block mapping for things on the proj_list 1276 // Initialize the node to block mapping for things on the proj_list
1268 for (uint i = 0; i < proj_list.size(); i++) { 1277 for (uint i = 0; i < _matcher.number_of_projections(); i++) {
1269 unmap_node_from_block(proj_list[i]); 1278 unmap_node_from_block(_matcher.get_projection(i));
1270 } 1279 }
1271 1280
1272 // Set the basic block for Nodes pinned into blocks 1281 // Set the basic block for Nodes pinned into blocks
1273 Arena *a = Thread::current()->resource_area(); 1282 Arena* arena = Thread::current()->resource_area();
1274 VectorSet visited(a); 1283 VectorSet visited(arena);
1275 schedule_pinned_nodes( visited ); 1284 schedule_pinned_nodes(visited);
1276 1285
1277 // Find the earliest Block any instruction can be placed in. Some 1286 // Find the earliest Block any instruction can be placed in. Some
1278 // instructions are pinned into Blocks. Unpinned instructions can 1287 // instructions are pinned into Blocks. Unpinned instructions can
1279 // appear in last block in which all their inputs occur. 1288 // appear in last block in which all their inputs occur.
1280 visited.Clear(); 1289 visited.Clear();
1281 Node_List stack(a); 1290 Node_List stack(arena);
1282 stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list 1291 // Pre-grow the list
1292 stack.map((C->unique() >> 1) + 16, NULL);
1283 if (!schedule_early(visited, stack)) { 1293 if (!schedule_early(visited, stack)) {
1284 // Bailout without retry 1294 // Bailout without retry
1285 C->record_method_not_compilable("early schedule failed"); 1295 C->record_method_not_compilable("early schedule failed");
1286 return; 1296 return;
1287 } 1297 }
1288 1298
1289 // Build Def-Use edges. 1299 // Build Def-Use edges.
1290 proj_list.push(_root); // Add real root as another root
1291 proj_list.pop();
1292
1293 // Compute the latency information (via backwards walk) for all the 1300 // Compute the latency information (via backwards walk) for all the
1294 // instructions in the graph 1301 // instructions in the graph
1295 _node_latency = new GrowableArray<uint>(); // resource_area allocation 1302 _node_latency = new GrowableArray<uint>(); // resource_area allocation
1296 1303
1297 if( C->do_scheduling() ) 1304 if (C->do_scheduling()) {
1298 ComputeLatenciesBackwards(visited, stack); 1305 compute_latencies_backwards(visited, stack);
1306 }
1299 1307
1300 // Now schedule all codes as LATE as possible. This is the LCA in the 1308 // Now schedule all codes as LATE as possible. This is the LCA in the
1301 // dominator tree of all USES of a value. Pick the block with the least 1309 // dominator tree of all USES of a value. Pick the block with the least
1302 // loop nesting depth that is lowest in the dominator tree. 1310 // loop nesting depth that is lowest in the dominator tree.
1303 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) 1311 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
1304 schedule_late(visited, stack); 1312 schedule_late(visited, stack);
1305 if( C->failing() ) { 1313 if (C->failing()) {
1306 // schedule_late fails only when graph is incorrect. 1314 // schedule_late fails only when graph is incorrect.
1307 assert(!VerifyGraphEdges, "verification should have failed"); 1315 assert(!VerifyGraphEdges, "verification should have failed");
1308 return; 1316 return;
1309 } 1317 }
1310
1311 unique = C->unique();
1312 1318
1313 #ifndef PRODUCT 1319 #ifndef PRODUCT
1314 if (trace_opto_pipelining()) { 1320 if (trace_opto_pipelining()) {
1315 tty->print("\n---- Detect implicit null checks ----\n"); 1321 tty->print("\n---- Detect implicit null checks ----\n");
1316 } 1322 }
1330 allowed_reasons |= nth_bit(reason); 1336 allowed_reasons |= nth_bit(reason);
1331 } 1337 }
1332 // By reversing the loop direction we get a very minor gain on mpegaudio. 1338 // By reversing the loop direction we get a very minor gain on mpegaudio.
1333 // Feel free to revert to a forward loop for clarity. 1339 // Feel free to revert to a forward loop for clarity.
1334 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { 1340 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
1335 for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) { 1341 for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) {
1336 Node *proj = matcher._null_check_tests[i ]; 1342 Node* proj = _matcher._null_check_tests[i];
1337 Node *val = matcher._null_check_tests[i+1]; 1343 Node* val = _matcher._null_check_tests[i + 1];
1338 get_block_for_node(proj)->implicit_null_check(this, proj, val, allowed_reasons); 1344 Block* block = get_block_for_node(proj);
1345 block->implicit_null_check(this, proj, val, allowed_reasons);
1339 // The implicit_null_check will only perform the transformation 1346 // The implicit_null_check will only perform the transformation
1340 // if the null branch is truly uncommon, *and* it leads to an 1347 // if the null branch is truly uncommon, *and* it leads to an
1341 // uncommon trap. Combined with the too_many_traps guards 1348 // uncommon trap. Combined with the too_many_traps guards
1342 // above, this prevents SEGV storms reported in 6366351, 1349 // above, this prevents SEGV storms reported in 6366351,
1343 // by recompiling offending methods without this optimization. 1350 // by recompiling offending methods without this optimization.
1350 } 1357 }
1351 #endif 1358 #endif
1352 1359
1353 // Schedule locally. Right now a simple topological sort. 1360 // Schedule locally. Right now a simple topological sort.
1354 // Later, do a real latency aware scheduler. 1361 // Later, do a real latency aware scheduler.
1355 uint max_idx = C->unique(); 1362 GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1);
1356 GrowableArray<int> ready_cnt(max_idx, max_idx, -1);
1357 visited.Clear(); 1363 visited.Clear();
1358 for (uint i = 0; i < _num_blocks; i++) { 1364 for (uint i = 0; i < number_of_blocks(); i++) {
1359 if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) { 1365 Block* block = get_block(i);
1366 if (!block->schedule_local(this, _matcher, ready_cnt, visited)) {
1360 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { 1367 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
1361 C->record_method_not_compilable("local schedule failed"); 1368 C->record_method_not_compilable("local schedule failed");
1362 } 1369 }
1363 return; 1370 return;
1364 } 1371 }
1365 } 1372 }
1366 1373
1367 // If we inserted any instructions between a Call and his CatchNode, 1374 // If we inserted any instructions between a Call and his CatchNode,
1368 // clone the instructions on all paths below the Catch. 1375 // clone the instructions on all paths below the Catch.
1369 for (uint i = 0; i < _num_blocks; i++) { 1376 for (uint i = 0; i < number_of_blocks(); i++) {
1370 _blocks[i]->call_catch_cleanup(this, C); 1377 Block* block = get_block(i);
1378 block->call_catch_cleanup(this, C);
1371 } 1379 }
1372 1380
1373 #ifndef PRODUCT 1381 #ifndef PRODUCT
1374 if (trace_opto_pipelining()) { 1382 if (trace_opto_pipelining()) {
1375 tty->print("\n---- After GlobalCodeMotion ----\n"); 1383 tty->print("\n---- After GlobalCodeMotion ----\n");
1376 for (uint i = 0; i < _num_blocks; i++) { 1384 for (uint i = 0; i < number_of_blocks(); i++) {
1377 _blocks[i]->dump(); 1385 Block* block = get_block(i);
1386 block->dump();
1378 } 1387 }
1379 } 1388 }
1380 #endif 1389 #endif
1381 // Dead. 1390 // Dead.
1382 _node_latency = (GrowableArray<uint> *)0xdeadbeef; 1391 _node_latency = (GrowableArray<uint> *)0xdeadbeef;
1383 } 1392 }
1384 1393
1394 bool PhaseCFG::do_global_code_motion() {
1395
1396 build_dominator_tree();
1397 if (C->failing()) {
1398 return false;
1399 }
1400
1401 NOT_PRODUCT( C->verify_graph_edges(); )
1402
1403 estimate_block_frequency();
1404
1405 global_code_motion();
1406
1407 if (C->failing()) {
1408 return false;
1409 }
1410
1411 return true;
1412 }
1385 1413
1386 //------------------------------Estimate_Block_Frequency----------------------- 1414 //------------------------------Estimate_Block_Frequency-----------------------
1387 // Estimate block frequencies based on IfNode probabilities. 1415 // Estimate block frequencies based on IfNode probabilities.
1388 void PhaseCFG::Estimate_Block_Frequency() { 1416 void PhaseCFG::estimate_block_frequency() {
1389 1417
1390 // Force conditional branches leading to uncommon traps to be unlikely, 1418 // Force conditional branches leading to uncommon traps to be unlikely,
1391 // not because we get to the uncommon_trap with less relative frequency, 1419 // not because we get to the uncommon_trap with less relative frequency,
1392 // but because an uncommon_trap typically causes a deopt, so we only get 1420 // but because an uncommon_trap typically causes a deopt, so we only get
1393 // there once. 1421 // there once.
1394 if (C->do_freq_based_layout()) { 1422 if (C->do_freq_based_layout()) {
1395 Block_List worklist; 1423 Block_List worklist;
1396 Block* root_blk = _blocks[0]; 1424 Block* root_blk = get_block(0);
1397 for (uint i = 1; i < root_blk->num_preds(); i++) { 1425 for (uint i = 1; i < root_blk->num_preds(); i++) {
1398 Block *pb = get_block_for_node(root_blk->pred(i)); 1426 Block *pb = get_block_for_node(root_blk->pred(i));
1399 if (pb->has_uncommon_code()) { 1427 if (pb->has_uncommon_code()) {
1400 worklist.push(pb); 1428 worklist.push(pb);
1401 } 1429 }
1402 } 1430 }
1403 while (worklist.size() > 0) { 1431 while (worklist.size() > 0) {
1404 Block* uct = worklist.pop(); 1432 Block* uct = worklist.pop();
1405 if (uct == _broot) continue; 1433 if (uct == get_root_block()) {
1434 continue;
1435 }
1406 for (uint i = 1; i < uct->num_preds(); i++) { 1436 for (uint i = 1; i < uct->num_preds(); i++) {
1407 Block *pb = get_block_for_node(uct->pred(i)); 1437 Block *pb = get_block_for_node(uct->pred(i));
1408 if (pb->_num_succs == 1) { 1438 if (pb->_num_succs == 1) {
1409 worklist.push(pb); 1439 worklist.push(pb);
1410 } else if (pb->num_fall_throughs() == 2) { 1440 } else if (pb->num_fall_throughs() == 2) {
1424 // Adjust all frequencies to be relative to a single method entry 1454 // Adjust all frequencies to be relative to a single method entry
1425 _root_loop->_freq = 1.0; 1455 _root_loop->_freq = 1.0;
1426 _root_loop->scale_freq(); 1456 _root_loop->scale_freq();
1427 1457
1428 // Save outmost loop frequency for LRG frequency threshold 1458 // Save outmost loop frequency for LRG frequency threshold
1429 _outer_loop_freq = _root_loop->outer_loop_freq(); 1459 _outer_loop_frequency = _root_loop->outer_loop_freq();
1430 1460
1431 // force paths ending at uncommon traps to be infrequent 1461 // force paths ending at uncommon traps to be infrequent
1432 if (!C->do_freq_based_layout()) { 1462 if (!C->do_freq_based_layout()) {
1433 Block_List worklist; 1463 Block_List worklist;
1434 Block* root_blk = _blocks[0]; 1464 Block* root_blk = get_block(0);
1435 for (uint i = 1; i < root_blk->num_preds(); i++) { 1465 for (uint i = 1; i < root_blk->num_preds(); i++) {
1436 Block *pb = get_block_for_node(root_blk->pred(i)); 1466 Block *pb = get_block_for_node(root_blk->pred(i));
1437 if (pb->has_uncommon_code()) { 1467 if (pb->has_uncommon_code()) {
1438 worklist.push(pb); 1468 worklist.push(pb);
1439 } 1469 }
1449 } 1479 }
1450 } 1480 }
1451 } 1481 }
1452 1482
1453 #ifdef ASSERT 1483 #ifdef ASSERT
1454 for (uint i = 0; i < _num_blocks; i++ ) { 1484 for (uint i = 0; i < number_of_blocks(); i++) {
1455 Block *b = _blocks[i]; 1485 Block* b = get_block(i);
1456 assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); 1486 assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency");
1457 } 1487 }
1458 #endif 1488 #endif
1459 1489
1460 #ifndef PRODUCT 1490 #ifndef PRODUCT
1474 //----------------------------create_loop_tree-------------------------------- 1504 //----------------------------create_loop_tree--------------------------------
1475 // Create a loop tree from the CFG 1505 // Create a loop tree from the CFG
1476 CFGLoop* PhaseCFG::create_loop_tree() { 1506 CFGLoop* PhaseCFG::create_loop_tree() {
1477 1507
1478 #ifdef ASSERT 1508 #ifdef ASSERT
1479 assert( _blocks[0] == _broot, "" ); 1509 assert(get_block(0) == get_root_block(), "first block should be root block");
1480 for (uint i = 0; i < _num_blocks; i++ ) { 1510 for (uint i = 0; i < number_of_blocks(); i++) {
1481 Block *b = _blocks[i]; 1511 Block* block = get_block(i);
1482 // Check that _loop field are clear...we could clear them if not. 1512 // Check that _loop field are clear...we could clear them if not.
1483 assert(b->_loop == NULL, "clear _loop expected"); 1513 assert(block->_loop == NULL, "clear _loop expected");
1484 // Sanity check that the RPO numbering is reflected in the _blocks array. 1514 // Sanity check that the RPO numbering is reflected in the _blocks array.
1485 // It doesn't have to be for the loop tree to be built, but if it is not, 1515 // It doesn't have to be for the loop tree to be built, but if it is not,
1486 // then the blocks have been reordered since dom graph building...which 1516 // then the blocks have been reordered since dom graph building...which
1487 // may question the RPO numbering 1517 // may question the RPO numbering
1488 assert(b->_rpo == i, "unexpected reverse post order number"); 1518 assert(block->_rpo == i, "unexpected reverse post order number");
1489 } 1519 }
1490 #endif 1520 #endif
1491 1521
1492 int idct = 0; 1522 int idct = 0;
1493 CFGLoop* root_loop = new CFGLoop(idct++); 1523 CFGLoop* root_loop = new CFGLoop(idct++);
1494 1524
1495 Block_List worklist; 1525 Block_List worklist;
1496 1526
1497 // Assign blocks to loops 1527 // Assign blocks to loops
1498 for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block 1528 for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block
1499 Block *b = _blocks[i]; 1529 Block* block = get_block(i);
1500 1530
1501 if (b->head()->is_Loop()) { 1531 if (block->head()->is_Loop()) {
1502 Block* loop_head = b; 1532 Block* loop_head = block;
1503 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); 1533 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
1504 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); 1534 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl);
1505 Block* tail = get_block_for_node(tail_n); 1535 Block* tail = get_block_for_node(tail_n);
1506 1536
1507 // Defensively filter out Loop nodes for non-single-entry loops. 1537 // Defensively filter out Loop nodes for non-single-entry loops.
1531 } 1561 }
1532 } 1562 }
1533 1563
1534 // Create a member list for each loop consisting 1564 // Create a member list for each loop consisting
1535 // of both blocks and (immediate child) loops. 1565 // of both blocks and (immediate child) loops.
1536 for (uint i = 0; i < _num_blocks; i++) { 1566 for (uint i = 0; i < number_of_blocks(); i++) {
1537 Block *b = _blocks[i]; 1567 Block* block = get_block(i);
1538 CFGLoop* lp = b->_loop; 1568 CFGLoop* lp = block->_loop;
1539 if (lp == NULL) { 1569 if (lp == NULL) {
1540 // Not assigned to a loop. Add it to the method's pseudo loop. 1570 // Not assigned to a loop. Add it to the method's pseudo loop.
1541 b->_loop = root_loop; 1571 block->_loop = root_loop;
1542 lp = root_loop; 1572 lp = root_loop;
1543 } 1573 }
1544 if (lp == root_loop || b != lp->head()) { // loop heads are already members 1574 if (lp == root_loop || block != lp->head()) { // loop heads are already members
1545 lp->add_member(b); 1575 lp->add_member(block);
1546 } 1576 }
1547 if (lp != root_loop) { 1577 if (lp != root_loop) {
1548 if (lp->parent() == NULL) { 1578 if (lp->parent() == NULL) {
1549 // Not a nested loop. Make it a child of the method's pseudo loop. 1579 // Not a nested loop. Make it a child of the method's pseudo loop.
1550 root_loop->add_nested_loop(lp); 1580 root_loop->add_nested_loop(lp);
1551 } 1581 }
1552 if (b == lp->head()) { 1582 if (block == lp->head()) {
1553 // Add nested loop to member list of parent loop. 1583 // Add nested loop to member list of parent loop.
1554 lp->parent()->add_member(lp); 1584 lp->parent()->add_member(lp);
1555 } 1585 }
1556 } 1586 }
1557 } 1587 }