OLD | NEW |
---|---|
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// | 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 784 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
795 } | 795 } |
796 } while (FoundNewAssignment); | 796 } while (FoundNewAssignment); |
797 } | 797 } |
798 | 798 |
799 void Cfg::doAddressOpt() { | 799 void Cfg::doAddressOpt() { |
800 TimerMarker T(TimerStack::TT_doAddressOpt, this); | 800 TimerMarker T(TimerStack::TT_doAddressOpt, this); |
801 for (CfgNode *Node : Nodes) | 801 for (CfgNode *Node : Nodes) |
802 Node->doAddressOpt(); | 802 Node->doAddressOpt(); |
803 } | 803 } |
804 | 804 |
805 namespace { | |
806 // ShuffleVectorUtils implements helper functions for rematerializing | |
807 // shufflevector instructions from a sequence of extractelement/insertelement | |
808 // instructions. It looks for the following pattern: | |
809 // | |
810 // %t0 = extractelement A, %n0 | |
811 // %t1 = extractelement B, %n1 | |
812 // %t2 = extractelement C, %n2 | |
813 // ... | |
814 // %tN = extractelement N, %nN | |
815 // %d0 = insertelement undef, %t0, 0 | |
816 // %d1 = insertelement %d0, %t1, 1 | |
817 // %d2 = insertelement %d1, %t2, 2 | |
818 // ... | |
819 // %dest = insertelement %d_N-1, %tN, N | |
820 // | |
821 // where N is num_element(typeof(%dest)), and A, B, C, ... N are at most two | |
822 // distinct variables. | |
823 namespace ShuffleVectorUtils { | |
824 // findAllInserts is used when searching for all the insertelements that are | |
825 // used in a shufflevector operation. This function works recursively, when | |
826 // invoked with I = i, the function assumes Insts[i] is the last found | |
827 // insertelement in the chain. The next insertelement insertruction is saved in | |
828 // Insts[i+1]. | |
829 bool findAllInserts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM, | |
830 CfgVector<const Inst *> *Insts, SizeT I = 0) { | |
831 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat); | |
832 | |
833 if (I > Insts->size()) { | |
834 if (Verbose) { | |
835 Ctx->getStrDump() << "\tToo many inserts.\n"; | |
836 } | |
837 return false; | |
838 } | |
839 | |
840 const auto *LastInsert = Insts->at(I); | |
841 assert(llvm::isa<InstInsertElement>(LastInsert)); | |
842 | |
843 if (I == Insts->size() - 1) { | |
844 // Matching against undef is not really needed because the value in Src(0) | |
845 // will be totally overwritten. We still enforce it anyways because the | |
846 // PNaCl toolchain generates the bitcode with it. | |
847 if (!llvm::isa<ConstantUndef>(LastInsert->getSrc(0))) { | |
848 if (Verbose) { | |
849 Ctx->getStrDump() << "\tSrc0 is not undef: " << I << " " | |
850 << Insts->size(); | |
851 LastInsert->dump(Func); | |
852 Ctx->getStrDump() << "\n"; | |
853 } | |
854 return false; | |
855 } | |
856 | |
857 // The following loop ensures that the insertelements are sorted. In theory, | |
858 // we could relax this restriction and allow any order. As long as each | |
859 // index appears exactly once, this chain is still a candidate for becoming | |
860 // a shufflevector. The Insts vector is traversed backwards because the | |
861 // instructions are "enqueued" in reverse order. | |
862 int32_t ExpectedElement = 0; | |
863 for (const auto *I : reverse_range(*Insts)) { | |
864 if (llvm::cast<ConstantInteger32>(I->getSrc(2))->getValue() != | |
865 ExpectedElement) { | |
866 return false; | |
867 } | |
868 ++ExpectedElement; | |
869 } | |
870 return true; | |
871 } | |
872 | |
873 const auto *Src0V = llvm::cast<Variable>(LastInsert->getSrc(0)); | |
874 const auto *Def = VM->getSingleDefinition(Src0V); | |
875 | |
876 // Only optimize if the first operand in | |
877 // | |
878 // Dest = insertelement A, B, 10 | |
879 // | |
880 // is singly-def'ed. | |
881 if (Def == nullptr) { | |
882 if (Verbose) { | |
883 Ctx->getStrDump() << "\tmulti-def: "; | |
884 (*Insts)[I]->dump(Func); | |
885 Ctx->getStrDump() << "\n"; | |
886 } | |
887 return false; | |
888 } | |
889 | |
890 // We also require the (single) definition to come from an insertelement | |
891 // instruction. | |
892 if (!llvm::isa<InstInsertElement>(Def)) { | |
893 if (Verbose) { | |
894 Ctx->getStrDump() << "\tnot insert element: "; | |
895 Def->dump(Func); | |
896 Ctx->getStrDump() << "\n"; | |
897 } | |
898 return false; | |
899 } | |
900 | |
901 // Everything seems fine, so we save Def in Insts, and delegate the decision | |
902 // to findAllInserts. | |
903 (*Insts)[I + 1] = Def; | |
904 | |
905 return findAllInserts(Func, Ctx, VM, Insts, I + 1); | |
906 } | |
907 | |
908 // insertsLastElement returns true if Insert is inserting an element in the last | |
909 // position of a vector. | |
910 bool insertsLastElement(const Inst &Insert) { | |
911 const Type DestTy = Insert.getDest()->getType(); | |
912 assert(isVectorType(DestTy)); | |
913 const SizeT Elem = | |
914 llvm::cast<ConstantInteger32>(Insert.getSrc(2))->getValue(); | |
915 return Elem == typeNumElements(DestTy) - 1; | |
916 } | |
917 | |
918 // findAllExtracts goes over all the insertelement instructions that are | |
919 // candidates to be replaced by a shufflevector, and searches for all the | |
920 // definitions of the elements being inserted. If all of the elements are the | |
921 // result of an extractelement instruction, and all of the extractelements | |
922 // operate on at most two different sources, than the instructions can be | |
923 // replaced by a shufflevector. | |
924 bool findAllExtracts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM, | |
925 const CfgVector<const Inst *> &Insts, Variable **Src0, | |
926 Variable **Src1, CfgVector<const Inst *> *Extracts) { | |
927 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat); | |
928 | |
929 *Src0 = nullptr; | |
930 *Src1 = nullptr; | |
931 assert(Insts.size() > 0); | |
932 for (SizeT I = 0; I < Insts.size(); ++I) { | |
933 const auto *Insert = Insts.at(I); | |
934 const auto *Src1V = llvm::dyn_cast<Variable>(Insert->getSrc(1)); | |
935 if (Src1V == nullptr) { | |
936 if (Verbose) { | |
937 Ctx->getStrDump() << "src(1) is not a variable: "; | |
938 Insert->dump(Func); | |
939 Ctx->getStrDump() << "\n"; | |
940 } | |
941 return false; | |
942 } | |
943 | |
944 const auto *Def = VM->getSingleDefinition(Src1V); | |
945 if (Def == nullptr) { | |
946 if (Verbose) { | |
947 Ctx->getStrDump() << "multi-def src(1): "; | |
948 Insert->dump(Func); | |
949 Ctx->getStrDump() << "\n"; | |
950 } | |
951 return false; | |
952 } | |
953 | |
954 if (!llvm::isa<InstExtractElement>(Def)) { | |
955 if (Verbose) { | |
956 Ctx->getStrDump() << "not extractelement: "; | |
957 Def->dump(Func); | |
958 Ctx->getStrDump() << "\n"; | |
959 } | |
960 return false; | |
961 } | |
962 | |
963 auto *Src = llvm::cast<Variable>(Def->getSrc(0)); | |
964 if (*Src0 == nullptr) { | |
965 // No sources yet. Save Src to Src0. | |
966 *Src0 = Src; | |
967 } else if (*Src1 == nullptr) { | |
968 // We already have a source, so we might save Src in Src1 -- but only if | |
969 // Src0 is not Src. | |
970 if (*Src0 != Src) { | |
971 *Src1 = Src; | |
972 } | |
973 } else if (Src != *Src0 && Src != *Src1) { | |
974 // More than two sources, so we can't rematerialize the shufflevector | |
975 // instruction. | |
976 if (Verbose) { | |
977 Ctx->getStrDump() << "Can't shuffle more than two sources.\n"; | |
978 } | |
979 return false; | |
980 } | |
981 | |
982 (*Extracts)[I] = Def; | |
983 } | |
984 | |
985 // We should have seen at least one source operand. | |
986 assert(*Src0 != nullptr); | |
987 | |
988 // If a second source was not seeing, then we just make Src1 = Src0 to | |
Jim Stichnoth
2016/04/21 00:29:25
seen
John
2016/04/21 00:40:50
Done.
| |
989 // simplify things down stream. This should not matter, as all of the indexes | |
990 // in the shufflevector instruction will point to Src0. | |
991 if (*Src1 == nullptr) { | |
992 *Src1 = *Src0; | |
993 } | |
994 | |
995 return true; | |
996 } | |
997 | |
998 } // end of namespace ShuffleVectorUtils | |
999 } // end of anonymous namespace | |
1000 | |
1001 void Cfg::materializeVectorShuffles() { | |
1002 const bool Verbose = BuildDefs::dump() && isVerbose(IceV_ShufMat); | |
1003 | |
1004 std::unique_ptr<OstreamLocker> L; | |
1005 if (Verbose) { | |
1006 L.reset(new OstreamLocker(getContext())); | |
1007 getContext()->getStrDump() << "\nShuffle materialization:\n"; | |
1008 } | |
1009 | |
1010 // MaxVectorElements is the maximum number of elements in the vector types | |
1011 // handled by Subzero. We use it to create the Inserts and Extracts vectors | |
1012 // with the appropriate size, thus avoiding resize() calls. | |
1013 const SizeT MaxVectorElements = typeNumElements(IceType_v16i8); | |
1014 CfgVector<const Inst *> Inserts(MaxVectorElements); | |
1015 CfgVector<const Inst *> Extracts(MaxVectorElements); | |
1016 | |
1017 TimerMarker T(TimerStack::TT_materializeVectorShuffles, this); | |
1018 for (CfgNode *Node : Nodes) { | |
1019 for (auto &Instr : Node->getInsts()) { | |
1020 if (!llvm::isa<InstInsertElement>(Instr)) { | |
1021 continue; | |
1022 } | |
1023 if (!ShuffleVectorUtils::insertsLastElement(Instr)) { | |
1024 // To avoid waisting time, we only start the pattern match at the last | |
Jim Stichnoth
2016/04/21 00:29:25
wasting
John
2016/04/21 00:40:50
Done.
| |
1025 // insertelement instruction -- and go backwards from there. | |
1026 continue; | |
1027 } | |
1028 if (Verbose) { | |
1029 getContext()->getStrDump() << "\tCandidate: "; | |
1030 Instr.dump(this); | |
1031 getContext()->getStrDump() << "\n"; | |
1032 } | |
1033 Inserts.resize(typeNumElements(Instr.getDest()->getType())); | |
1034 Inserts[0] = &Instr; | |
1035 if (!ShuffleVectorUtils::findAllInserts(this, getContext(), | |
1036 VMetadata.get(), &Inserts)) { | |
1037 // If we fail to find a sequence of insertelements, we stop the | |
1038 // optimization. | |
1039 if (Verbose) { | |
1040 getContext()->getStrDump() << "\tFalse alarm.\n"; | |
1041 } | |
1042 continue; | |
1043 } | |
1044 if (Verbose) { | |
1045 getContext()->getStrDump() << "\tFound the following insertelement: \n"; | |
1046 for (auto *I : reverse_range(Inserts)) { | |
1047 getContext()->getStrDump() << "\t\t"; | |
1048 I->dump(this); | |
1049 getContext()->getStrDump() << "\n"; | |
1050 } | |
1051 } | |
1052 Extracts.resize(Inserts.size()); | |
1053 Variable *Src0; | |
1054 Variable *Src1; | |
1055 if (!ShuffleVectorUtils::findAllExtracts(this, getContext(), | |
1056 VMetadata.get(), Inserts, &Src0, | |
1057 &Src1, &Extracts)) { | |
1058 // If we fail to match the definitions of the insertelements' sources | |
1059 // with extractelement instructions -- or if those instructions operate | |
1060 // on more than two different variables -- we stop the optimization. | |
1061 if (Verbose) { | |
1062 getContext()->getStrDump() << "\tFailed to match extractelements.\n"; | |
1063 } | |
1064 continue; | |
1065 } | |
1066 if (Verbose) { | |
1067 getContext()->getStrDump() | |
1068 << "\tFound the following insert/extract element pairs: \n"; | |
1069 for (SizeT I = 0; I < Inserts.size(); ++I) { | |
1070 const SizeT Pos = Inserts.size() - I - 1; | |
1071 getContext()->getStrDump() << "\t\tInsert : "; | |
1072 Inserts[Pos]->dump(this); | |
1073 getContext()->getStrDump() << "\n\t\tExtract: "; | |
1074 Extracts[Pos]->dump(this); | |
1075 getContext()->getStrDump() << "\n"; | |
1076 } | |
1077 } | |
1078 | |
1079 assert(Src0 != nullptr); | |
1080 assert(Src1 != nullptr); | |
1081 | |
1082 auto *ShuffleVector = | |
1083 InstShuffleVector::create(this, Instr.getDest(), Src0, Src1); | |
1084 assert(ShuffleVector->getSrc(0) == Src0); | |
1085 assert(ShuffleVector->getSrc(1) == Src1); | |
1086 for (SizeT I = 0; I < Extracts.size(); ++I) { | |
1087 const SizeT Pos = Extracts.size() - I - 1; | |
1088 auto *Index = llvm::cast<ConstantInteger32>(Extracts[Pos]->getSrc(1)); | |
1089 if (Src0 == Extracts[Pos]->getSrc(0)) { | |
1090 ShuffleVector->addIndex(Index); | |
1091 } else { | |
1092 ShuffleVector->addIndex(llvm::cast<ConstantInteger32>( | |
1093 Ctx->getConstantInt32(Index->getValue() + Extracts.size()))); | |
1094 } | |
1095 } | |
1096 | |
1097 if (Verbose) { | |
1098 getContext()->getStrDump() << "Created: "; | |
1099 ShuffleVector->dump(this); | |
1100 getContext()->getStrDump() << "\n"; | |
1101 } | |
1102 | |
1103 Instr.setDeleted(); | |
1104 auto &LoweringContext = getTarget()->getContext(); | |
1105 LoweringContext.setInsertPoint(Instr); | |
1106 LoweringContext.insert(ShuffleVector); | |
1107 } | |
1108 } | |
1109 } | |
1110 | |
805 void Cfg::doNopInsertion() { | 1111 void Cfg::doNopInsertion() { |
806 if (!getFlags().getShouldDoNopInsertion()) | 1112 if (!getFlags().getShouldDoNopInsertion()) |
807 return; | 1113 return; |
808 TimerMarker T(TimerStack::TT_doNopInsertion, this); | 1114 TimerMarker T(TimerStack::TT_doNopInsertion, this); |
809 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_NopInsertion, | 1115 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_NopInsertion, |
810 SequenceNumber); | 1116 SequenceNumber); |
811 for (CfgNode *Node : Nodes) | 1117 for (CfgNode *Node : Nodes) |
812 Node->doNopInsertion(RNG); | 1118 Node->doNopInsertion(RNG); |
813 } | 1119 } |
814 | 1120 |
(...skipping 357 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1172 } | 1478 } |
1173 } | 1479 } |
1174 // Print each basic block | 1480 // Print each basic block |
1175 for (CfgNode *Node : Nodes) | 1481 for (CfgNode *Node : Nodes) |
1176 Node->dump(this); | 1482 Node->dump(this); |
1177 if (isVerbose(IceV_Instructions)) | 1483 if (isVerbose(IceV_Instructions)) |
1178 Str << "}\n"; | 1484 Str << "}\n"; |
1179 } | 1485 } |
1180 | 1486 |
1181 } // end of namespace Ice | 1487 } // end of namespace Ice |
OLD | NEW |