OLD | NEW |
---|---|
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// | 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 784 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
795 } | 795 } |
796 } while (FoundNewAssignment); | 796 } while (FoundNewAssignment); |
797 } | 797 } |
798 | 798 |
799 void Cfg::doAddressOpt() { | 799 void Cfg::doAddressOpt() { |
800 TimerMarker T(TimerStack::TT_doAddressOpt, this); | 800 TimerMarker T(TimerStack::TT_doAddressOpt, this); |
801 for (CfgNode *Node : Nodes) | 801 for (CfgNode *Node : Nodes) |
802 Node->doAddressOpt(); | 802 Node->doAddressOpt(); |
803 } | 803 } |
804 | 804 |
805 namespace { | |
806 // ShuffleVectorUtils implements helper functions for rematerializing | |
807 // shufflevector instructions from a sequence of extractelement/insertelement | |
808 // instructions. It looks for the following pattern: | |
809 // | |
810 // %t0 = extractelement A, %n0 | |
811 // %t1 = extractelement B, %n1 | |
812 // %t2 = extractelement C, %n2 | |
813 // ... | |
814 // %tN = extractelement N, %nN | |
815 // %d0 = insertelement undef, %t0, 0 | |
816 // %d1 = insertelement %d0, %t1, 1 | |
817 // %d2 = insertelement %d1, %t2, 2 | |
818 // ... | |
819 // %dest = insertelement %d_N-1, %tN, N | |
820 // | |
821 // where N is num_element(typeof(%dest)), and A, B, C, ... N are at most two | |
822 // distinct variables. | |
823 namespace ShuffleVectorUtils { | |
824 // findAllInserts is used when searching for all the insertelements that are | |
825 // used in a shufflevector operation. This function works recursively, when | |
826 // invoked with I = i, the function assumes Insts[i] is the last found | |
827 // insertelement in the chain. The next insertelement insertruction is saved in | |
828 // Insts[i+1]. | |
829 bool findAllInserts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM, | |
830 CfgVector<const Inst *> *Insts, SizeT I = 0) { | |
831 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat); | |
832 | |
833 if (I > Insts->size()) { | |
834 if (Verbose) { | |
835 Ctx->getStrDump() << "\tToo many inserts.\n"; | |
836 } | |
837 return false; | |
838 } | |
839 | |
840 const auto *LastInsert = Insts->at(I); | |
841 assert(llvm::isa<InstInsertElement>(LastInsert)); | |
842 | |
843 if (I == Insts->size() - 1) { | |
844 // Matching agains undef is not really needed because the value in Src(0) | |
Jim Stichnoth
2016/04/20 23:43:11
against
John
2016/04/21 00:02:17
Done.
| |
845 // will be totally overwritten. We still enforece it anyways because the | |
Jim Stichnoth
2016/04/20 23:43:11
enforce
John
2016/04/21 00:02:17
Done.
| |
846 // pnacl toolchain generates the bitcode with it. | |
Jim Stichnoth
2016/04/20 23:43:10
PNaCl
John
2016/04/21 00:02:18
Done.
| |
847 if (!llvm::isa<ConstantUndef>(LastInsert->getSrc(0))) { | |
848 if (Verbose) { | |
849 OstreamLocker _(Ctx); | |
Jim Stichnoth
2016/04/20 23:43:11
Instead of using OstreamLocker everywhere, you sho
John
2016/04/21 00:02:17
see if the solution works.
| |
850 Ctx->getStrDump() << "\tSrc0 is not undef: " << I << " " | |
851 << Insts->size(); | |
852 LastInsert->dump(Func); | |
853 Ctx->getStrDump() << "\n"; | |
854 } | |
855 return false; | |
856 } | |
857 | |
858 // The following loop ensures that the insertelements are sorted. In theory, | |
859 // we could relax this restriction and allow any order. As long as each | |
860 // index appear excatly once, this chain is still a candidate for becoming a | |
Jim Stichnoth
2016/04/20 23:43:10
appears exactly
John
2016/04/21 00:02:18
Done.
| |
861 // shufflevector. The Insts vector is traversed backwards because the | |
862 // instructions are "enqueued" in reverse order. | |
863 int32_t ExpectedElement = 0; | |
864 for (const auto *I : reverse_range(*Insts)) { | |
865 if (llvm::cast<ConstantInteger32>(I->getSrc(2))->getValue() != | |
866 ExpectedElement) { | |
867 return false; | |
868 } | |
869 ++ExpectedElement; | |
870 } | |
871 return true; | |
872 } | |
873 | |
874 const auto *Src0V = llvm::cast<Variable>(LastInsert->getSrc(0)); | |
875 const auto *Def = VM->getSingleDefinition(Src0V); | |
876 | |
877 // Only optimize if the first operand in | |
878 // | |
879 // Dest = insertelement A, B, 10 | |
880 // | |
881 // is singly-def'ed. | |
882 if (Def == nullptr) { | |
883 if (Verbose) { | |
884 OstreamLocker _(Ctx); | |
885 Ctx->getStrDump() << "\tmulti-def: "; | |
886 (*Insts)[I]->dump(Func); | |
887 Ctx->getStrDump() << "\n"; | |
888 } | |
889 return false; | |
890 } | |
891 | |
892 // We also require the (single) definition to come from an insertelement | |
893 // instruction. | |
894 if (!llvm::isa<InstInsertElement>(Def)) { | |
895 if (Verbose) { | |
896 OstreamLocker _(Ctx); | |
897 Ctx->getStrDump() << "\tnot insert element: "; | |
898 Def->dump(Func); | |
899 Ctx->getStrDump() << "\n"; | |
900 } | |
901 return false; | |
902 } | |
903 | |
904 // Everything seems fine, so we save Def in Insts, and delegate the decision | |
905 // to findAllInserts. | |
906 (*Insts)[I + 1] = Def; | |
907 | |
908 return findAllInserts(Func, Ctx, VM, Insts, I + 1); | |
909 } | |
910 | |
911 // insertsLastElement returns true if Insert is inserting an element in the last | |
912 // position of a vector. | |
913 bool insertsLastElement(const Inst &Insert) { | |
914 const Type DestTy = Insert.getDest()->getType(); | |
915 assert(isVectorType(DestTy)); | |
916 const SizeT Elem = | |
917 llvm::cast<ConstantInteger32>(Insert.getSrc(2))->getValue(); | |
918 return Elem == typeNumElements(DestTy) - 1; | |
919 } | |
920 | |
921 // findAllExtracts goes over all the insertelement instructions that are | |
922 // candidates to be replaced by a shufflevector, and searches for all the | |
923 // definitions of the elements being inserted. If all of the elements are the | |
924 // result of an extractelement instruction, and all of the extractelements | |
925 // operates on at most two different sources, than the instructions can be | |
Jim Stichnoth
2016/04/20 23:43:11
operate
John
2016/04/21 00:02:17
Done.
| |
926 // replaced by a shufflevector. | |
927 bool findAllExtracts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM, | |
928 const CfgVector<const Inst *> &Insts, Variable **Src0, | |
929 Variable **Src1, CfgVector<const Inst *> *Extracts) { | |
930 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat); | |
931 | |
932 *Src0 = nullptr; | |
933 *Src1 = nullptr; | |
934 assert(Insts.size() > 0); | |
935 for (SizeT I = 0; I < Insts.size(); ++I) { | |
936 const auto *Insert = Insts.at(I); | |
937 const auto *Src1V = llvm::dyn_cast<Variable>(Insert->getSrc(1)); | |
938 if (Src1V == nullptr) { | |
939 if (Verbose) { | |
940 OstreamLocker _(Ctx); | |
941 Ctx->getStrDump() << "src(1) is not a variable: "; | |
942 Insert->dump(Func); | |
943 Ctx->getStrDump() << "\n"; | |
944 } | |
945 return false; | |
946 } | |
947 | |
948 const auto *Def = VM->getSingleDefinition(Src1V); | |
949 if (Def == nullptr) { | |
950 if (Verbose) { | |
951 OstreamLocker _(Ctx); | |
952 Ctx->getStrDump() << "multi-def src(1): "; | |
953 Insert->dump(Func); | |
954 Ctx->getStrDump() << "\n"; | |
955 } | |
956 return false; | |
957 } | |
958 | |
959 if (!llvm::isa<InstExtractElement>(Def)) { | |
960 if (Verbose) { | |
961 OstreamLocker _(Ctx); | |
962 Ctx->getStrDump() << "not extractelement: "; | |
963 Def->dump(Func); | |
964 Ctx->getStrDump() << "\n"; | |
965 } | |
966 return false; | |
967 } | |
968 | |
969 auto *Src = llvm::cast<Variable>(Def->getSrc(0)); | |
970 if (*Src0 == nullptr) { | |
971 // No sources yet. Save Src to Src0. | |
972 *Src0 = Src; | |
973 } else if (*Src1 == nullptr) { | |
974 // We already have a source, so we might save Src in Src1 -- but only if | |
975 // Src0 is not Src. | |
976 if (*Src0 != Src) { | |
977 *Src1 = Src; | |
978 } | |
979 } else if (Src != *Src0 && Src != *Src1) { | |
980 // More than two sources, so we can't rematerialize the shufflevector | |
981 // instruction. | |
982 if (Verbose) { | |
983 Ctx->getStrDump() << "Can't shuffle more than two sources.\n"; | |
984 } | |
985 return false; | |
986 } | |
987 | |
988 (*Extracts)[I] = Def; | |
989 } | |
990 | |
991 // We should have seen at least one source operand. | |
992 assert(*Src0 != nullptr); | |
993 | |
994 // If a second source was not seeing, then we just make Src1 = Src0 to | |
995 // simplify things down stream. This should not matter, as all of the indexes | |
996 // in the shufflevector instruction will point to Src0. | |
997 if (*Src1 == nullptr) { | |
998 *Src1 = *Src0; | |
999 } | |
1000 | |
1001 return true; | |
1002 } | |
1003 | |
1004 } // end of namespace ShuffleVectorUtils | |
1005 } // end of anonymous namespace | |
1006 | |
1007 void Cfg::materializeVectorShuffles() { | |
1008 const bool Verbose = BuildDefs::dump() && isVerbose(IceV_ShufMat); | |
1009 | |
1010 if (Verbose) { | |
1011 getContext()->getStrDump() << "\nShuffle materialization:\n"; | |
1012 } | |
1013 | |
1014 // MaxVectorElements is the maximum number of elements in the vector types | |
1015 // handled by Subzero. We use it to create the Inserts and Extracts vectors | |
1016 // with the appropriate size, thus avoinding resize() calls. | |
Jim Stichnoth
2016/04/20 23:43:11
avoiding
John
2016/04/21 00:02:17
Done.
| |
1017 const SizeT MaxVectorElements = typeNumElements(IceType_v16i8); | |
1018 CfgVector<const Inst *> Inserts(MaxVectorElements); | |
1019 CfgVector<const Inst *> Extracts(MaxVectorElements); | |
1020 | |
1021 TimerMarker T(TimerStack::TT_materializeVectorShuffles, this); | |
1022 for (CfgNode *Node : Nodes) { | |
1023 for (auto &Instr : Node->getInsts()) { | |
1024 if (!llvm::isa<InstInsertElement>(Instr)) { | |
1025 continue; | |
1026 } | |
1027 if (!ShuffleVectorUtils::insertsLastElement(Instr)) { | |
1028 // To avoid waisting time, we only start the pattern match at the last | |
1029 // insertelement instruction -- and go backwards from there. | |
1030 continue; | |
1031 } | |
1032 if (Verbose) { | |
1033 OstreamLocker _(getContext()); | |
1034 getContext()->getStrDump() << "\tCandidate: "; | |
1035 Instr.dump(this); | |
1036 getContext()->getStrDump() << "\n"; | |
1037 } | |
1038 Inserts.resize(typeNumElements(Instr.getDest()->getType())); | |
1039 Inserts[0] = &Instr; | |
1040 if (!ShuffleVectorUtils::findAllInserts(this, getContext(), | |
1041 VMetadata.get(), &Inserts)) { | |
1042 // If we fail to find a sequence of insertelements, we stop the | |
1043 // optimization. | |
1044 if (Verbose) { | |
1045 getContext()->getStrDump() << "\tFalse alarm.\n"; | |
1046 } | |
1047 continue; | |
1048 } | |
1049 if (Verbose) { | |
1050 OstreamLocker _(getContext()); | |
1051 getContext()->getStrDump() << "\tFound the following insertelement: \n"; | |
1052 for (auto *I : reverse_range(Inserts)) { | |
1053 getContext()->getStrDump() << "\t\t"; | |
1054 I->dump(this); | |
1055 getContext()->getStrDump() << "\n"; | |
1056 } | |
1057 } | |
1058 Extracts.resize(Inserts.size()); | |
1059 Variable *Src0; | |
1060 Variable *Src1; | |
1061 if (!ShuffleVectorUtils::findAllExtracts(this, getContext(), | |
1062 VMetadata.get(), Inserts, &Src0, | |
1063 &Src1, &Extracts)) { | |
1064 // If we fail to match the definitions of the insertelements' sources | |
1065 // with extractelement instructions -- or if those instructions operate | |
1066 // on more than two different variables --, we stop the optimization. | |
Jim Stichnoth
2016/04/20 23:43:11
no comma
John
2016/04/21 00:02:17
Done.
| |
1067 if (Verbose) { | |
1068 getContext()->getStrDump() << "\tFailed to match extractelements.\n"; | |
1069 } | |
1070 continue; | |
1071 } | |
1072 if (Verbose) { | |
1073 OstreamLocker _(getContext()); | |
1074 getContext()->getStrDump() | |
1075 << "\tFound the following insert/extract element pairs: \n"; | |
1076 for (SizeT I = 0; I < Inserts.size(); ++I) { | |
1077 const SizeT Pos = Inserts.size() - I - 1; | |
1078 getContext()->getStrDump() << "\t\tInsert : "; | |
1079 Inserts[Pos]->dump(this); | |
1080 getContext()->getStrDump() << "\n\t\tExtract: "; | |
1081 Extracts[Pos]->dump(this); | |
1082 getContext()->getStrDump() << "\n"; | |
1083 } | |
1084 } | |
1085 | |
1086 assert(Src0 != nullptr); | |
1087 assert(Src1 != nullptr); | |
1088 | |
1089 auto *ShuffleVector = | |
1090 InstShuffleVector::create(this, Instr.getDest(), Src0, Src1); | |
1091 assert(ShuffleVector->getSrc(0) == Src0); | |
1092 assert(ShuffleVector->getSrc(1) == Src1); | |
1093 for (SizeT I = 0; I < Extracts.size(); ++I) { | |
1094 const SizeT Pos = Extracts.size() - I - 1; | |
1095 auto *Index = llvm::cast<ConstantInteger32>(Extracts[Pos]->getSrc(1)); | |
1096 if (Src0 == Extracts[Pos]->getSrc(0)) { | |
1097 ShuffleVector->addIndex(Index); | |
1098 } else { | |
1099 ShuffleVector->addIndex(llvm::cast<ConstantInteger32>( | |
1100 Ctx->getConstantInt32(Index->getValue() + Extracts.size()))); | |
1101 } | |
1102 } | |
1103 | |
1104 if (Verbose) { | |
1105 OstreamLocker _(getContext()); | |
1106 getContext()->getStrDump() << "Created: "; | |
1107 ShuffleVector->dump(this); | |
1108 getContext()->getStrDump() << "\n"; | |
1109 } | |
1110 | |
1111 Instr.setDeleted(); | |
1112 auto &LoweringContext = getTarget()->getContext(); | |
1113 LoweringContext.setInsertPoint(Instr); | |
1114 LoweringContext.insert(ShuffleVector); | |
1115 } | |
1116 } | |
1117 } | |
1118 | |
805 void Cfg::doNopInsertion() { | 1119 void Cfg::doNopInsertion() { |
806 if (!getFlags().getShouldDoNopInsertion()) | 1120 if (!getFlags().getShouldDoNopInsertion()) |
807 return; | 1121 return; |
808 TimerMarker T(TimerStack::TT_doNopInsertion, this); | 1122 TimerMarker T(TimerStack::TT_doNopInsertion, this); |
809 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_NopInsertion, | 1123 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_NopInsertion, |
810 SequenceNumber); | 1124 SequenceNumber); |
811 for (CfgNode *Node : Nodes) | 1125 for (CfgNode *Node : Nodes) |
812 Node->doNopInsertion(RNG); | 1126 Node->doNopInsertion(RNG); |
813 } | 1127 } |
814 | 1128 |
(...skipping 357 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1172 } | 1486 } |
1173 } | 1487 } |
1174 // Print each basic block | 1488 // Print each basic block |
1175 for (CfgNode *Node : Nodes) | 1489 for (CfgNode *Node : Nodes) |
1176 Node->dump(this); | 1490 Node->dump(this); |
1177 if (isVerbose(IceV_Instructions)) | 1491 if (isVerbose(IceV_Instructions)) |
1178 Str << "}\n"; | 1492 Str << "}\n"; |
1179 } | 1493 } |
1180 | 1494 |
1181 } // end of namespace Ice | 1495 } // end of namespace Ice |
OLD | NEW |