Mercurial > hg > graal-compiler
comparison src/share/vm/opto/block.cpp @ 14428:044b28168e20
8003854: PPC64 (part 115): Introduce PostallocExpand that expands nodes after register allocation
Summary: added ability in C2 to expand mach nodes to several mach nodes after register allocation
Reviewed-by: kvn
author | goetz |
---|---|
date | Thu, 14 Nov 2013 19:24:59 -0800 |
parents | 4b078f877b56 |
children | 41b780b43b74 |
comparison
equal
deleted
inserted
replaced
14427:eb178e97560c | 14428:044b28168e20 |
---|---|
140 } | 140 } |
141 | 141 |
142 // Find and remove n from block list | 142 // Find and remove n from block list |
143 void Block::find_remove( const Node *n ) { | 143 void Block::find_remove( const Node *n ) { |
144 remove_node(find_node(n)); | 144 remove_node(find_node(n)); |
145 } | |
146 | |
147 bool Block::contains(const Node *n) const { | |
148 return _nodes.contains(n); | |
145 } | 149 } |
146 | 150 |
147 // Return empty status of a block. Empty blocks contain only the head, other | 151 // Return empty status of a block. Empty blocks contain only the head, other |
148 // ideal nodes, and an optional trailing goto. | 152 // ideal nodes, and an optional trailing goto. |
149 int Block::is_Empty() const { | 153 int Block::is_Empty() const { |
697 } | 701 } |
698 | 702 |
699 // Fix up the final control flow for basic blocks. | 703 // Fix up the final control flow for basic blocks. |
700 void PhaseCFG::fixup_flow() { | 704 void PhaseCFG::fixup_flow() { |
701 // Fixup final control flow for the blocks. Remove jump-to-next | 705 // Fixup final control flow for the blocks. Remove jump-to-next |
702 // block. If neither arm of a IF follows the conditional branch, we | 706 // block. If neither arm of an IF follows the conditional branch, we |
703 // have to add a second jump after the conditional. We place the | 707 // have to add a second jump after the conditional. We place the |
704 // TRUE branch target in succs[0] for both GOTOs and IFs. | 708 // TRUE branch target in succs[0] for both GOTOs and IFs. |
705 for (uint i = 0; i < number_of_blocks(); i++) { | 709 for (uint i = 0; i < number_of_blocks(); i++) { |
706 Block* block = get_block(i); | 710 Block* block = get_block(i); |
707 block->_pre_order = i; // turn pre-order into block-index | 711 block->_pre_order = i; // turn pre-order into block-index |
842 } | 846 } |
843 } // End of for all blocks | 847 } // End of for all blocks |
844 } | 848 } |
845 | 849 |
846 | 850 |
851 // postalloc_expand: Expand nodes after register allocation. | |
852 // | |
853 // postalloc_expand has to be called after register allocation, just | |
854 // before output (i.e. scheduling). It only gets called if | |
855 // Matcher::require_postalloc_expand is true. | |
856 // | |
857 // Background: | |
858 // | |
859 // Nodes that are expandend (one compound node requiring several | |
860 // assembler instructions to be implemented split into two or more | |
861 // non-compound nodes) after register allocation are not as nice as | |
862 // the ones expanded before register allocation - they don't | |
863 // participate in optimizations as global code motion. But after | |
864 // register allocation we can expand nodes that use registers which | |
865 // are not spillable or registers that are not allocated, because the | |
866 // old compound node is simply replaced (in its location in the basic | |
867 // block) by a new subgraph which does not contain compound nodes any | |
868 // more. The scheduler called during output can later on process these | |
869 // non-compound nodes. | |
870 // | |
871 // Implementation: | |
872 // | |
873 // Nodes requiring postalloc expand are specified in the ad file by using | |
874 // a postalloc_expand statement instead of ins_encode. A postalloc_expand | |
875 // contains a single call to an encoding, as does an ins_encode | |
876 // statement. Instead of an emit() function a postalloc_expand() function | |
877 // is generated that doesn't emit assembler but creates a new | |
878 // subgraph. The code below calls this postalloc_expand function for each | |
879 // node with the appropriate attribute. This function returns the new | |
880 // nodes generated in an array passed in the call. The old node, | |
881 // potential MachTemps before and potential Projs after it then get | |
882 // disconnected and replaced by the new nodes. The instruction | |
883 // generating the result has to be the last one in the array. In | |
884 // general it is assumed that Projs after the node expanded are | |
885 // kills. These kills are not required any more after expanding as | |
886 // there are now explicitly visible def-use chains and the Projs are | |
887 // removed. This does not hold for calls: They do not only have | |
888 // kill-Projs but also Projs defining values. Therefore Projs after | |
889 // the node expanded are removed for all but for calls. If a node is | |
890 // to be reused, it must be added to the nodes list returned, and it | |
891 // will be added again. | |
892 // | |
893 // Implementing the postalloc_expand function for a node in an enc_class | |
894 // is rather tedious. It requires knowledge about many node details, as | |
895 // the nodes and the subgraph must be hand crafted. To simplify this, | |
896 // adlc generates some utility variables into the postalloc_expand function, | |
897 // e.g., holding the operands as specified by the postalloc_expand encoding | |
898 // specification, e.g.: | |
899 // * unsigned idx_<par_name> holding the index of the node in the ins | |
900 // * Node *n_<par_name> holding the node loaded from the ins | |
901 // * MachOpnd *op_<par_name> holding the corresponding operand | |
902 // | |
903 // The ordering of operands can not be determined by looking at a | |
904 // rule. Especially if a match rule matches several different trees, | |
905 // several nodes are generated from one instruct specification with | |
906 // different operand orderings. In this case the adlc generated | |
907 // variables are the only way to access the ins and operands | |
908 // deterministically. | |
909 // | |
910 // If assigning a register to a node that contains an oop, don't | |
911 // forget to call ra_->set_oop() for the node. | |
912 void PhaseCFG::postalloc_expand(PhaseRegAlloc* _ra) { | |
913 GrowableArray <Node *> new_nodes(32); // Array with new nodes filled by postalloc_expand function of node. | |
914 GrowableArray <Node *> remove(32); | |
915 GrowableArray <Node *> succs(32); | |
916 unsigned int max_idx = C->unique(); // Remember to distinguish new from old nodes. | |
917 DEBUG_ONLY(bool foundNode = false); | |
918 | |
919 // for all blocks | |
920 for (uint i = 0; i < number_of_blocks(); i++) { | |
921 Block *b = _blocks[i]; | |
922 // For all instructions in the current block. | |
923 for (uint j = 0; j < b->number_of_nodes(); j++) { | |
924 Node *n = b->get_node(j); | |
925 if (n->is_Mach() && n->as_Mach()->requires_postalloc_expand()) { | |
926 #ifdef ASSERT | |
927 if (TracePostallocExpand) { | |
928 if (!foundNode) { | |
929 foundNode = true; | |
930 tty->print("POSTALLOC EXPANDING %d %s\n", C->compile_id(), | |
931 C->method() ? C->method()->name()->as_utf8() : C->stub_name()); | |
932 } | |
933 tty->print(" postalloc expanding "); n->dump(); | |
934 if (Verbose) { | |
935 tty->print(" with ins:\n"); | |
936 for (uint k = 0; k < n->len(); ++k) { | |
937 if (n->in(k)) { tty->print(" "); n->in(k)->dump(); } | |
938 } | |
939 } | |
940 } | |
941 #endif | |
942 new_nodes.clear(); | |
943 // Collect nodes that have to be removed from the block later on. | |
944 uint req = n->req(); | |
945 remove.clear(); | |
946 for (uint k = 0; k < req; ++k) { | |
947 if (n->in(k) && n->in(k)->is_MachTemp()) { | |
948 remove.push(n->in(k)); // MachTemps which are inputs to the old node have to be removed. | |
949 n->in(k)->del_req(0); | |
950 j--; | |
951 } | |
952 } | |
953 | |
954 // Check whether we can allocate enough nodes. We set a fix limit for | |
955 // the size of postalloc expands with this. | |
956 uint unique_limit = C->unique() + 40; | |
957 if (unique_limit >= _ra->node_regs_max_index()) { | |
958 Compile::current()->record_failure("out of nodes in postalloc expand"); | |
959 return; | |
960 } | |
961 | |
962 // Emit (i.e. generate new nodes). | |
963 n->as_Mach()->postalloc_expand(&new_nodes, _ra); | |
964 | |
965 assert(C->unique() < unique_limit, "You allocated too many nodes in your postalloc expand."); | |
966 | |
967 // Disconnect the inputs of the old node. | |
968 // | |
969 // We reuse MachSpillCopy nodes. If we need to expand them, there | |
970 // are many, so reusing pays off. If reused, the node already | |
971 // has the new ins. n must be the last node on new_nodes list. | |
972 if (!n->is_MachSpillCopy()) { | |
973 for (int k = req - 1; k >= 0; --k) { | |
974 n->del_req(k); | |
975 } | |
976 } | |
977 | |
978 #ifdef ASSERT | |
979 // Check that all nodes have proper operands. | |
980 for (int k = 0; k < new_nodes.length(); ++k) { | |
981 if (new_nodes.at(k)->_idx < max_idx || !new_nodes.at(k)->is_Mach()) continue; // old node, Proj ... | |
982 MachNode *m = new_nodes.at(k)->as_Mach(); | |
983 for (unsigned int l = 0; l < m->num_opnds(); ++l) { | |
984 if (MachOper::notAnOper(m->_opnds[l])) { | |
985 outputStream *os = tty; | |
986 os->print("Node %s ", m->Name()); | |
987 os->print("has invalid opnd %d: %p\n", l, m->_opnds[l]); | |
988 assert(0, "Invalid operands, see inline trace in hs_err_pid file."); | |
989 } | |
990 } | |
991 } | |
992 #endif | |
993 | |
994 // Collect succs of old node in remove (for projections) and in succs (for | |
995 // all other nodes) do _not_ collect projections in remove (but in succs) | |
996 // in case the node is a call. We need the projections for calls as they are | |
997 // associated with registes (i.e. they are defs). | |
998 succs.clear(); | |
999 for (DUIterator k = n->outs(); n->has_out(k); k++) { | |
1000 if (n->out(k)->is_Proj() && !n->is_MachCall() && !n->is_MachBranch()) { | |
1001 remove.push(n->out(k)); | |
1002 } else { | |
1003 succs.push(n->out(k)); | |
1004 } | |
1005 } | |
1006 // Replace old node n as input of its succs by last of the new nodes. | |
1007 for (int k = 0; k < succs.length(); ++k) { | |
1008 Node *succ = succs.at(k); | |
1009 for (uint l = 0; l < succ->req(); ++l) { | |
1010 if (succ->in(l) == n) { | |
1011 succ->set_req(l, new_nodes.at(new_nodes.length() - 1)); | |
1012 } | |
1013 } | |
1014 for (uint l = succ->req(); l < succ->len(); ++l) { | |
1015 if (succ->in(l) == n) { | |
1016 succ->set_prec(l, new_nodes.at(new_nodes.length() - 1)); | |
1017 } | |
1018 } | |
1019 } | |
1020 | |
1021 // Index of old node in block. | |
1022 uint index = b->find_node(n); | |
1023 // Insert new nodes into block and map them in nodes->blocks array | |
1024 // and remember last node in n2. | |
1025 Node *n2 = NULL; | |
1026 for (int k = 0; k < new_nodes.length(); ++k) { | |
1027 n2 = new_nodes.at(k); | |
1028 b->insert_node(n2, ++index); | |
1029 map_node_to_block(n2, b); | |
1030 } | |
1031 | |
1032 // Add old node n to remove and remove them all from block. | |
1033 remove.push(n); | |
1034 j--; | |
1035 #ifdef ASSERT | |
1036 if (TracePostallocExpand && Verbose) { | |
1037 tty->print(" removing:\n"); | |
1038 for (int k = 0; k < remove.length(); ++k) { | |
1039 tty->print(" "); remove.at(k)->dump(); | |
1040 } | |
1041 tty->print(" inserting:\n"); | |
1042 for (int k = 0; k < new_nodes.length(); ++k) { | |
1043 tty->print(" "); new_nodes.at(k)->dump(); | |
1044 } | |
1045 } | |
1046 #endif | |
1047 for (int k = 0; k < remove.length(); ++k) { | |
1048 if (b->contains(remove.at(k))) { | |
1049 b->find_remove(remove.at(k)); | |
1050 } else { | |
1051 assert(remove.at(k)->is_Proj() && (remove.at(k)->in(0)->is_MachBranch()), ""); | |
1052 } | |
1053 } | |
1054 // If anything has been inserted (n2 != NULL), continue after last node inserted. | |
1055 // This does not always work. Some postalloc expands don't insert any nodes, if they | |
1056 // do optimizations (e.g., max(x,x)). In this case we decrement j accordingly. | |
1057 j = n2 ? b->find_node(n2) : j; | |
1058 } | |
1059 } | |
1060 } | |
1061 | |
1062 #ifdef ASSERT | |
1063 if (foundNode) { | |
1064 tty->print("FINISHED %d %s\n", C->compile_id(), | |
1065 C->method() ? C->method()->name()->as_utf8() : C->stub_name()); | |
1066 tty->flush(); | |
1067 } | |
1068 #endif | |
1069 } | |
1070 | |
1071 | |
1072 //------------------------------dump------------------------------------------- | |
847 #ifndef PRODUCT | 1073 #ifndef PRODUCT |
848 void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited ) const { | 1074 void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited ) const { |
849 const Node *x = end->is_block_proj(); | 1075 const Node *x = end->is_block_proj(); |
850 assert( x, "not a CFG" ); | 1076 assert( x, "not a CFG" ); |
851 | 1077 |