OLD | NEW |
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// | 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 784 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
795 } | 795 } |
796 } while (FoundNewAssignment); | 796 } while (FoundNewAssignment); |
797 } | 797 } |
798 | 798 |
799 void Cfg::doAddressOpt() { | 799 void Cfg::doAddressOpt() { |
800 TimerMarker T(TimerStack::TT_doAddressOpt, this); | 800 TimerMarker T(TimerStack::TT_doAddressOpt, this); |
801 for (CfgNode *Node : Nodes) | 801 for (CfgNode *Node : Nodes) |
802 Node->doAddressOpt(); | 802 Node->doAddressOpt(); |
803 } | 803 } |
804 | 804 |
| 805 namespace { |
| 806 // ShuffleVectorUtils implements helper functions for rematerializing |
| 807 // shufflevector instructions from a sequence of extractelement/insertelement |
| 808 // instructions. It looks for the following pattern: |
| 809 // |
| 810 // %t0 = extractelement A, %n0 |
| 811 // %t1 = extractelement B, %n1 |
| 812 // %t2 = extractelement C, %n2 |
| 813 // ... |
| 814 // %tN = extractelement N, %nN |
| 815 // %d0 = insertelement undef, %t0, 0 |
| 816 // %d1 = insertelement %d0, %t1, 1 |
| 817 // %d2 = insertelement %d1, %t2, 2 |
| 818 // ... |
| 819 // %dest = insertelement %d_N-1, %tN, N |
| 820 // |
| 821 // where N is num_element(typeof(%dest)), and A, B, C, ... N are at most two |
| 822 // distinct variables. |
| 823 namespace ShuffleVectorUtils { |
| 824 // findAllInserts is used when searching for all the insertelements that are |
| 825 // used in a shufflevector operation. This function works recursively, when |
| 826 // invoked with I = i, the function assumes Insts[i] is the last found |
| 827 // insertelement in the chain. The next insertelement insertruction is saved in |
| 828 // Insts[i+1]. |
| 829 bool findAllInserts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM, |
| 830 CfgVector<const Inst *> *Insts, SizeT I = 0) { |
| 831 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat); |
| 832 |
| 833 if (I > Insts->size()) { |
| 834 if (Verbose) { |
| 835 Ctx->getStrDump() << "\tToo many inserts.\n"; |
| 836 } |
| 837 return false; |
| 838 } |
| 839 |
| 840 const auto *LastInsert = Insts->at(I); |
| 841 assert(llvm::isa<InstInsertElement>(LastInsert)); |
| 842 |
| 843 if (I == Insts->size() - 1) { |
| 844 // Matching against undef is not really needed because the value in Src(0) |
| 845 // will be totally overwritten. We still enforce it anyways because the |
| 846 // PNaCl toolchain generates the bitcode with it. |
| 847 if (!llvm::isa<ConstantUndef>(LastInsert->getSrc(0))) { |
| 848 if (Verbose) { |
| 849 Ctx->getStrDump() << "\tSrc0 is not undef: " << I << " " |
| 850 << Insts->size(); |
| 851 LastInsert->dump(Func); |
| 852 Ctx->getStrDump() << "\n"; |
| 853 } |
| 854 return false; |
| 855 } |
| 856 |
| 857 // The following loop ensures that the insertelements are sorted. In theory, |
| 858 // we could relax this restriction and allow any order. As long as each |
| 859 // index appears exactly once, this chain is still a candidate for becoming |
| 860 // a shufflevector. The Insts vector is traversed backwards because the |
| 861 // instructions are "enqueued" in reverse order. |
| 862 int32_t ExpectedElement = 0; |
| 863 for (const auto *I : reverse_range(*Insts)) { |
| 864 if (llvm::cast<ConstantInteger32>(I->getSrc(2))->getValue() != |
| 865 ExpectedElement) { |
| 866 return false; |
| 867 } |
| 868 ++ExpectedElement; |
| 869 } |
| 870 return true; |
| 871 } |
| 872 |
| 873 const auto *Src0V = llvm::cast<Variable>(LastInsert->getSrc(0)); |
| 874 const auto *Def = VM->getSingleDefinition(Src0V); |
| 875 |
| 876 // Only optimize if the first operand in |
| 877 // |
| 878 // Dest = insertelement A, B, 10 |
| 879 // |
| 880 // is singly-def'ed. |
| 881 if (Def == nullptr) { |
| 882 if (Verbose) { |
| 883 Ctx->getStrDump() << "\tmulti-def: "; |
| 884 (*Insts)[I]->dump(Func); |
| 885 Ctx->getStrDump() << "\n"; |
| 886 } |
| 887 return false; |
| 888 } |
| 889 |
| 890 // We also require the (single) definition to come from an insertelement |
| 891 // instruction. |
| 892 if (!llvm::isa<InstInsertElement>(Def)) { |
| 893 if (Verbose) { |
| 894 Ctx->getStrDump() << "\tnot insert element: "; |
| 895 Def->dump(Func); |
| 896 Ctx->getStrDump() << "\n"; |
| 897 } |
| 898 return false; |
| 899 } |
| 900 |
| 901 // Everything seems fine, so we save Def in Insts, and delegate the decision |
| 902 // to findAllInserts. |
| 903 (*Insts)[I + 1] = Def; |
| 904 |
| 905 return findAllInserts(Func, Ctx, VM, Insts, I + 1); |
| 906 } |
| 907 |
| 908 // insertsLastElement returns true if Insert is inserting an element in the last |
| 909 // position of a vector. |
| 910 bool insertsLastElement(const Inst &Insert) { |
| 911 const Type DestTy = Insert.getDest()->getType(); |
| 912 assert(isVectorType(DestTy)); |
| 913 const SizeT Elem = |
| 914 llvm::cast<ConstantInteger32>(Insert.getSrc(2))->getValue(); |
| 915 return Elem == typeNumElements(DestTy) - 1; |
| 916 } |
| 917 |
| 918 // findAllExtracts goes over all the insertelement instructions that are |
| 919 // candidates to be replaced by a shufflevector, and searches for all the |
| 920 // definitions of the elements being inserted. If all of the elements are the |
| 921 // result of an extractelement instruction, and all of the extractelements |
| 922 // operate on at most two different sources, than the instructions can be |
| 923 // replaced by a shufflevector. |
| 924 bool findAllExtracts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM, |
| 925 const CfgVector<const Inst *> &Insts, Variable **Src0, |
| 926 Variable **Src1, CfgVector<const Inst *> *Extracts) { |
| 927 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat); |
| 928 |
| 929 *Src0 = nullptr; |
| 930 *Src1 = nullptr; |
| 931 assert(Insts.size() > 0); |
| 932 for (SizeT I = 0; I < Insts.size(); ++I) { |
| 933 const auto *Insert = Insts.at(I); |
| 934 const auto *Src1V = llvm::dyn_cast<Variable>(Insert->getSrc(1)); |
| 935 if (Src1V == nullptr) { |
| 936 if (Verbose) { |
| 937 Ctx->getStrDump() << "src(1) is not a variable: "; |
| 938 Insert->dump(Func); |
| 939 Ctx->getStrDump() << "\n"; |
| 940 } |
| 941 return false; |
| 942 } |
| 943 |
| 944 const auto *Def = VM->getSingleDefinition(Src1V); |
| 945 if (Def == nullptr) { |
| 946 if (Verbose) { |
| 947 Ctx->getStrDump() << "multi-def src(1): "; |
| 948 Insert->dump(Func); |
| 949 Ctx->getStrDump() << "\n"; |
| 950 } |
| 951 return false; |
| 952 } |
| 953 |
| 954 if (!llvm::isa<InstExtractElement>(Def)) { |
| 955 if (Verbose) { |
| 956 Ctx->getStrDump() << "not extractelement: "; |
| 957 Def->dump(Func); |
| 958 Ctx->getStrDump() << "\n"; |
| 959 } |
| 960 return false; |
| 961 } |
| 962 |
| 963 auto *Src = llvm::cast<Variable>(Def->getSrc(0)); |
| 964 if (*Src0 == nullptr) { |
| 965 // No sources yet. Save Src to Src0. |
| 966 *Src0 = Src; |
| 967 } else if (*Src1 == nullptr) { |
| 968 // We already have a source, so we might save Src in Src1 -- but only if |
| 969 // Src0 is not Src. |
| 970 if (*Src0 != Src) { |
| 971 *Src1 = Src; |
| 972 } |
| 973 } else if (Src != *Src0 && Src != *Src1) { |
| 974 // More than two sources, so we can't rematerialize the shufflevector |
| 975 // instruction. |
| 976 if (Verbose) { |
| 977 Ctx->getStrDump() << "Can't shuffle more than two sources.\n"; |
| 978 } |
| 979 return false; |
| 980 } |
| 981 |
| 982 (*Extracts)[I] = Def; |
| 983 } |
| 984 |
| 985 // We should have seen at least one source operand. |
| 986 assert(*Src0 != nullptr); |
| 987 |
| 988 // If a second source was not seen, then we just make Src1 = Src0 to simplify |
| 989 // things down stream. This should not matter, as all of the indexes in the |
| 990 // shufflevector instruction will point to Src0. |
| 991 if (*Src1 == nullptr) { |
| 992 *Src1 = *Src0; |
| 993 } |
| 994 |
| 995 return true; |
| 996 } |
| 997 |
| 998 } // end of namespace ShuffleVectorUtils |
| 999 } // end of anonymous namespace |
| 1000 |
| 1001 void Cfg::materializeVectorShuffles() { |
| 1002 const bool Verbose = BuildDefs::dump() && isVerbose(IceV_ShufMat); |
| 1003 |
| 1004 std::unique_ptr<OstreamLocker> L; |
| 1005 if (Verbose) { |
| 1006 L.reset(new OstreamLocker(getContext())); |
| 1007 getContext()->getStrDump() << "\nShuffle materialization:\n"; |
| 1008 } |
| 1009 |
| 1010 // MaxVectorElements is the maximum number of elements in the vector types |
| 1011 // handled by Subzero. We use it to create the Inserts and Extracts vectors |
| 1012 // with the appropriate size, thus avoiding resize() calls. |
| 1013 const SizeT MaxVectorElements = typeNumElements(IceType_v16i8); |
| 1014 CfgVector<const Inst *> Inserts(MaxVectorElements); |
| 1015 CfgVector<const Inst *> Extracts(MaxVectorElements); |
| 1016 |
| 1017 TimerMarker T(TimerStack::TT_materializeVectorShuffles, this); |
| 1018 for (CfgNode *Node : Nodes) { |
| 1019 for (auto &Instr : Node->getInsts()) { |
| 1020 if (!llvm::isa<InstInsertElement>(Instr)) { |
| 1021 continue; |
| 1022 } |
| 1023 if (!ShuffleVectorUtils::insertsLastElement(Instr)) { |
| 1024 // To avoid wasting time, we only start the pattern match at the last |
| 1025 // insertelement instruction -- and go backwards from there. |
| 1026 continue; |
| 1027 } |
| 1028 if (Verbose) { |
| 1029 getContext()->getStrDump() << "\tCandidate: "; |
| 1030 Instr.dump(this); |
| 1031 getContext()->getStrDump() << "\n"; |
| 1032 } |
| 1033 Inserts.resize(typeNumElements(Instr.getDest()->getType())); |
| 1034 Inserts[0] = &Instr; |
| 1035 if (!ShuffleVectorUtils::findAllInserts(this, getContext(), |
| 1036 VMetadata.get(), &Inserts)) { |
| 1037 // If we fail to find a sequence of insertelements, we stop the |
| 1038 // optimization. |
| 1039 if (Verbose) { |
| 1040 getContext()->getStrDump() << "\tFalse alarm.\n"; |
| 1041 } |
| 1042 continue; |
| 1043 } |
| 1044 if (Verbose) { |
| 1045 getContext()->getStrDump() << "\tFound the following insertelement: \n"; |
| 1046 for (auto *I : reverse_range(Inserts)) { |
| 1047 getContext()->getStrDump() << "\t\t"; |
| 1048 I->dump(this); |
| 1049 getContext()->getStrDump() << "\n"; |
| 1050 } |
| 1051 } |
| 1052 Extracts.resize(Inserts.size()); |
| 1053 Variable *Src0; |
| 1054 Variable *Src1; |
| 1055 if (!ShuffleVectorUtils::findAllExtracts(this, getContext(), |
| 1056 VMetadata.get(), Inserts, &Src0, |
| 1057 &Src1, &Extracts)) { |
| 1058 // If we fail to match the definitions of the insertelements' sources |
| 1059 // with extractelement instructions -- or if those instructions operate |
| 1060 // on more than two different variables -- we stop the optimization. |
| 1061 if (Verbose) { |
| 1062 getContext()->getStrDump() << "\tFailed to match extractelements.\n"; |
| 1063 } |
| 1064 continue; |
| 1065 } |
| 1066 if (Verbose) { |
| 1067 getContext()->getStrDump() |
| 1068 << "\tFound the following insert/extract element pairs: \n"; |
| 1069 for (SizeT I = 0; I < Inserts.size(); ++I) { |
| 1070 const SizeT Pos = Inserts.size() - I - 1; |
| 1071 getContext()->getStrDump() << "\t\tInsert : "; |
| 1072 Inserts[Pos]->dump(this); |
| 1073 getContext()->getStrDump() << "\n\t\tExtract: "; |
| 1074 Extracts[Pos]->dump(this); |
| 1075 getContext()->getStrDump() << "\n"; |
| 1076 } |
| 1077 } |
| 1078 |
| 1079 assert(Src0 != nullptr); |
| 1080 assert(Src1 != nullptr); |
| 1081 |
| 1082 auto *ShuffleVector = |
| 1083 InstShuffleVector::create(this, Instr.getDest(), Src0, Src1); |
| 1084 assert(ShuffleVector->getSrc(0) == Src0); |
| 1085 assert(ShuffleVector->getSrc(1) == Src1); |
| 1086 for (SizeT I = 0; I < Extracts.size(); ++I) { |
| 1087 const SizeT Pos = Extracts.size() - I - 1; |
| 1088 auto *Index = llvm::cast<ConstantInteger32>(Extracts[Pos]->getSrc(1)); |
| 1089 if (Src0 == Extracts[Pos]->getSrc(0)) { |
| 1090 ShuffleVector->addIndex(Index); |
| 1091 } else { |
| 1092 ShuffleVector->addIndex(llvm::cast<ConstantInteger32>( |
| 1093 Ctx->getConstantInt32(Index->getValue() + Extracts.size()))); |
| 1094 } |
| 1095 } |
| 1096 |
| 1097 if (Verbose) { |
| 1098 getContext()->getStrDump() << "Created: "; |
| 1099 ShuffleVector->dump(this); |
| 1100 getContext()->getStrDump() << "\n"; |
| 1101 } |
| 1102 |
| 1103 Instr.setDeleted(); |
| 1104 auto &LoweringContext = getTarget()->getContext(); |
| 1105 LoweringContext.setInsertPoint(Instr); |
| 1106 LoweringContext.insert(ShuffleVector); |
| 1107 } |
| 1108 } |
| 1109 } |
| 1110 |
805 void Cfg::doNopInsertion() { | 1111 void Cfg::doNopInsertion() { |
806 if (!getFlags().getShouldDoNopInsertion()) | 1112 if (!getFlags().getShouldDoNopInsertion()) |
807 return; | 1113 return; |
808 TimerMarker T(TimerStack::TT_doNopInsertion, this); | 1114 TimerMarker T(TimerStack::TT_doNopInsertion, this); |
809 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_NopInsertion, | 1115 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_NopInsertion, |
810 SequenceNumber); | 1116 SequenceNumber); |
811 for (CfgNode *Node : Nodes) | 1117 for (CfgNode *Node : Nodes) |
812 Node->doNopInsertion(RNG); | 1118 Node->doNopInsertion(RNG); |
813 } | 1119 } |
814 | 1120 |
(...skipping 357 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1172 } | 1478 } |
1173 } | 1479 } |
1174 // Print each basic block | 1480 // Print each basic block |
1175 for (CfgNode *Node : Nodes) | 1481 for (CfgNode *Node : Nodes) |
1176 Node->dump(this); | 1482 Node->dump(this); |
1177 if (isVerbose(IceV_Instructions)) | 1483 if (isVerbose(IceV_Instructions)) |
1178 Str << "}\n"; | 1484 Str << "}\n"; |
1179 } | 1485 } |
1180 | 1486 |
1181 } // end of namespace Ice | 1487 } // end of namespace Ice |
OLD | NEW |