comparison src/share/vm/opto/block.cpp @ 14428:044b28168e20

8003854: PPC64 (part 115): Introduce PostallocExpand that expands nodes after register allocation Summary: added ability in C2 to expand mach nodes to several mach nodes after register allocation Reviewed-by: kvn
author goetz
date Thu, 14 Nov 2013 19:24:59 -0800
parents 4b078f877b56
children 41b780b43b74
comparison
equal deleted inserted replaced
14427:eb178e97560c 14428:044b28168e20
140 } 140 }
141 141
142 // Find and remove n from block list 142 // Find and remove n from block list
143 void Block::find_remove( const Node *n ) { 143 void Block::find_remove( const Node *n ) {
144 remove_node(find_node(n)); 144 remove_node(find_node(n));
145 }
146
147 bool Block::contains(const Node *n) const {
148 return _nodes.contains(n);
145 } 149 }
146 150
147 // Return empty status of a block. Empty blocks contain only the head, other 151 // Return empty status of a block. Empty blocks contain only the head, other
148 // ideal nodes, and an optional trailing goto. 152 // ideal nodes, and an optional trailing goto.
149 int Block::is_Empty() const { 153 int Block::is_Empty() const {
697 } 701 }
698 702
699 // Fix up the final control flow for basic blocks. 703 // Fix up the final control flow for basic blocks.
700 void PhaseCFG::fixup_flow() { 704 void PhaseCFG::fixup_flow() {
701 // Fixup final control flow for the blocks. Remove jump-to-next 705 // Fixup final control flow for the blocks. Remove jump-to-next
702 // block. If neither arm of a IF follows the conditional branch, we 706 // block. If neither arm of an IF follows the conditional branch, we
703 // have to add a second jump after the conditional. We place the 707 // have to add a second jump after the conditional. We place the
704 // TRUE branch target in succs[0] for both GOTOs and IFs. 708 // TRUE branch target in succs[0] for both GOTOs and IFs.
705 for (uint i = 0; i < number_of_blocks(); i++) { 709 for (uint i = 0; i < number_of_blocks(); i++) {
706 Block* block = get_block(i); 710 Block* block = get_block(i);
707 block->_pre_order = i; // turn pre-order into block-index 711 block->_pre_order = i; // turn pre-order into block-index
842 } 846 }
843 } // End of for all blocks 847 } // End of for all blocks
844 } 848 }
845 849
846 850
851 // postalloc_expand: Expand nodes after register allocation.
852 //
853 // postalloc_expand has to be called after register allocation, just
854 // before output (i.e. scheduling). It only gets called if
855 // Matcher::require_postalloc_expand is true.
856 //
857 // Background:
858 //
859 // Nodes that are expandend (one compound node requiring several
860 // assembler instructions to be implemented split into two or more
861 // non-compound nodes) after register allocation are not as nice as
862 // the ones expanded before register allocation - they don't
863 // participate in optimizations as global code motion. But after
864 // register allocation we can expand nodes that use registers which
865 // are not spillable or registers that are not allocated, because the
866 // old compound node is simply replaced (in its location in the basic
867 // block) by a new subgraph which does not contain compound nodes any
868 // more. The scheduler called during output can later on process these
869 // non-compound nodes.
870 //
871 // Implementation:
872 //
873 // Nodes requiring postalloc expand are specified in the ad file by using
874 // a postalloc_expand statement instead of ins_encode. A postalloc_expand
875 // contains a single call to an encoding, as does an ins_encode
876 // statement. Instead of an emit() function a postalloc_expand() function
877 // is generated that doesn't emit assembler but creates a new
878 // subgraph. The code below calls this postalloc_expand function for each
879 // node with the appropriate attribute. This function returns the new
880 // nodes generated in an array passed in the call. The old node,
881 // potential MachTemps before and potential Projs after it then get
882 // disconnected and replaced by the new nodes. The instruction
883 // generating the result has to be the last one in the array. In
884 // general it is assumed that Projs after the node expanded are
885 // kills. These kills are not required any more after expanding as
886 // there are now explicitly visible def-use chains and the Projs are
887 // removed. This does not hold for calls: They do not only have
888 // kill-Projs but also Projs defining values. Therefore Projs after
889 // the node expanded are removed for all but for calls. If a node is
890 // to be reused, it must be added to the nodes list returned, and it
891 // will be added again.
892 //
893 // Implementing the postalloc_expand function for a node in an enc_class
894 // is rather tedious. It requires knowledge about many node details, as
895 // the nodes and the subgraph must be hand crafted. To simplify this,
896 // adlc generates some utility variables into the postalloc_expand function,
897 // e.g., holding the operands as specified by the postalloc_expand encoding
898 // specification, e.g.:
899 // * unsigned idx_<par_name> holding the index of the node in the ins
900 // * Node *n_<par_name> holding the node loaded from the ins
901 // * MachOpnd *op_<par_name> holding the corresponding operand
902 //
903 // The ordering of operands can not be determined by looking at a
904 // rule. Especially if a match rule matches several different trees,
905 // several nodes are generated from one instruct specification with
906 // different operand orderings. In this case the adlc generated
907 // variables are the only way to access the ins and operands
908 // deterministically.
909 //
910 // If assigning a register to a node that contains an oop, don't
911 // forget to call ra_->set_oop() for the node.
912 void PhaseCFG::postalloc_expand(PhaseRegAlloc* _ra) {
913 GrowableArray <Node *> new_nodes(32); // Array with new nodes filled by postalloc_expand function of node.
914 GrowableArray <Node *> remove(32);
915 GrowableArray <Node *> succs(32);
916 unsigned int max_idx = C->unique(); // Remember to distinguish new from old nodes.
917 DEBUG_ONLY(bool foundNode = false);
918
919 // for all blocks
920 for (uint i = 0; i < number_of_blocks(); i++) {
921 Block *b = _blocks[i];
922 // For all instructions in the current block.
923 for (uint j = 0; j < b->number_of_nodes(); j++) {
924 Node *n = b->get_node(j);
925 if (n->is_Mach() && n->as_Mach()->requires_postalloc_expand()) {
926 #ifdef ASSERT
927 if (TracePostallocExpand) {
928 if (!foundNode) {
929 foundNode = true;
930 tty->print("POSTALLOC EXPANDING %d %s\n", C->compile_id(),
931 C->method() ? C->method()->name()->as_utf8() : C->stub_name());
932 }
933 tty->print(" postalloc expanding "); n->dump();
934 if (Verbose) {
935 tty->print(" with ins:\n");
936 for (uint k = 0; k < n->len(); ++k) {
937 if (n->in(k)) { tty->print(" "); n->in(k)->dump(); }
938 }
939 }
940 }
941 #endif
942 new_nodes.clear();
943 // Collect nodes that have to be removed from the block later on.
944 uint req = n->req();
945 remove.clear();
946 for (uint k = 0; k < req; ++k) {
947 if (n->in(k) && n->in(k)->is_MachTemp()) {
948 remove.push(n->in(k)); // MachTemps which are inputs to the old node have to be removed.
949 n->in(k)->del_req(0);
950 j--;
951 }
952 }
953
954 // Check whether we can allocate enough nodes. We set a fix limit for
955 // the size of postalloc expands with this.
956 uint unique_limit = C->unique() + 40;
957 if (unique_limit >= _ra->node_regs_max_index()) {
958 Compile::current()->record_failure("out of nodes in postalloc expand");
959 return;
960 }
961
962 // Emit (i.e. generate new nodes).
963 n->as_Mach()->postalloc_expand(&new_nodes, _ra);
964
965 assert(C->unique() < unique_limit, "You allocated too many nodes in your postalloc expand.");
966
967 // Disconnect the inputs of the old node.
968 //
969 // We reuse MachSpillCopy nodes. If we need to expand them, there
970 // are many, so reusing pays off. If reused, the node already
971 // has the new ins. n must be the last node on new_nodes list.
972 if (!n->is_MachSpillCopy()) {
973 for (int k = req - 1; k >= 0; --k) {
974 n->del_req(k);
975 }
976 }
977
978 #ifdef ASSERT
979 // Check that all nodes have proper operands.
980 for (int k = 0; k < new_nodes.length(); ++k) {
981 if (new_nodes.at(k)->_idx < max_idx || !new_nodes.at(k)->is_Mach()) continue; // old node, Proj ...
982 MachNode *m = new_nodes.at(k)->as_Mach();
983 for (unsigned int l = 0; l < m->num_opnds(); ++l) {
984 if (MachOper::notAnOper(m->_opnds[l])) {
985 outputStream *os = tty;
986 os->print("Node %s ", m->Name());
987 os->print("has invalid opnd %d: %p\n", l, m->_opnds[l]);
988 assert(0, "Invalid operands, see inline trace in hs_err_pid file.");
989 }
990 }
991 }
992 #endif
993
994 // Collect succs of old node in remove (for projections) and in succs (for
995 // all other nodes) do _not_ collect projections in remove (but in succs)
996 // in case the node is a call. We need the projections for calls as they are
997 // associated with registes (i.e. they are defs).
998 succs.clear();
999 for (DUIterator k = n->outs(); n->has_out(k); k++) {
1000 if (n->out(k)->is_Proj() && !n->is_MachCall() && !n->is_MachBranch()) {
1001 remove.push(n->out(k));
1002 } else {
1003 succs.push(n->out(k));
1004 }
1005 }
1006 // Replace old node n as input of its succs by last of the new nodes.
1007 for (int k = 0; k < succs.length(); ++k) {
1008 Node *succ = succs.at(k);
1009 for (uint l = 0; l < succ->req(); ++l) {
1010 if (succ->in(l) == n) {
1011 succ->set_req(l, new_nodes.at(new_nodes.length() - 1));
1012 }
1013 }
1014 for (uint l = succ->req(); l < succ->len(); ++l) {
1015 if (succ->in(l) == n) {
1016 succ->set_prec(l, new_nodes.at(new_nodes.length() - 1));
1017 }
1018 }
1019 }
1020
1021 // Index of old node in block.
1022 uint index = b->find_node(n);
1023 // Insert new nodes into block and map them in nodes->blocks array
1024 // and remember last node in n2.
1025 Node *n2 = NULL;
1026 for (int k = 0; k < new_nodes.length(); ++k) {
1027 n2 = new_nodes.at(k);
1028 b->insert_node(n2, ++index);
1029 map_node_to_block(n2, b);
1030 }
1031
1032 // Add old node n to remove and remove them all from block.
1033 remove.push(n);
1034 j--;
1035 #ifdef ASSERT
1036 if (TracePostallocExpand && Verbose) {
1037 tty->print(" removing:\n");
1038 for (int k = 0; k < remove.length(); ++k) {
1039 tty->print(" "); remove.at(k)->dump();
1040 }
1041 tty->print(" inserting:\n");
1042 for (int k = 0; k < new_nodes.length(); ++k) {
1043 tty->print(" "); new_nodes.at(k)->dump();
1044 }
1045 }
1046 #endif
1047 for (int k = 0; k < remove.length(); ++k) {
1048 if (b->contains(remove.at(k))) {
1049 b->find_remove(remove.at(k));
1050 } else {
1051 assert(remove.at(k)->is_Proj() && (remove.at(k)->in(0)->is_MachBranch()), "");
1052 }
1053 }
1054 // If anything has been inserted (n2 != NULL), continue after last node inserted.
1055 // This does not always work. Some postalloc expands don't insert any nodes, if they
1056 // do optimizations (e.g., max(x,x)). In this case we decrement j accordingly.
1057 j = n2 ? b->find_node(n2) : j;
1058 }
1059 }
1060 }
1061
1062 #ifdef ASSERT
1063 if (foundNode) {
1064 tty->print("FINISHED %d %s\n", C->compile_id(),
1065 C->method() ? C->method()->name()->as_utf8() : C->stub_name());
1066 tty->flush();
1067 }
1068 #endif
1069 }
1070
1071
1072 //------------------------------dump-------------------------------------------
847 #ifndef PRODUCT 1073 #ifndef PRODUCT
848 void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited ) const { 1074 void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited ) const {
849 const Node *x = end->is_block_proj(); 1075 const Node *x = end->is_block_proj();
850 assert( x, "not a CFG" ); 1076 assert( x, "not a CFG" );
851 1077