Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(33)

Side by Side Diff: src/IceCfg.cpp

Issue 1897243002: Subzero. Rematerializes shufflevector instructions. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Adds some comments to the pattern matching implementation. Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 784 matching lines...) Expand 10 before | Expand all | Expand 10 after
795 } 795 }
796 } while (FoundNewAssignment); 796 } while (FoundNewAssignment);
797 } 797 }
798 798
799 void Cfg::doAddressOpt() { 799 void Cfg::doAddressOpt() {
800 TimerMarker T(TimerStack::TT_doAddressOpt, this); 800 TimerMarker T(TimerStack::TT_doAddressOpt, this);
801 for (CfgNode *Node : Nodes) 801 for (CfgNode *Node : Nodes)
802 Node->doAddressOpt(); 802 Node->doAddressOpt();
803 } 803 }
804 804
805 namespace {
806 // ShuffleVectorUtils implements helper functions for rematerializing
807 // shufflevector instructions from a sequence of extractelement/insertelement
808 // instructions. It looks for the following pattern:
809 //
810 // %t0 = extractelement A, %n0
811 // %t1 = extractelement B, %n1
812 // %t2 = extractelement C, %n2
813 // ...
814 // %tN = extractelement N, %nN
815 // %d0 = insertelement undef, %t0, 0
816 // %d1 = insertelement %d0, %t1, 1
817 // %d2 = insertelement %d1, %t2, 2
818 // ...
819 // %dest = insertelement %d_N-1, %tN, N
820 //
821 // where N is num_element(typeof(%dest)), and A, B, C, ... N are at most two
822 // distinct variables.
823 namespace ShuffleVectorUtils {
824 // findAllInserts is used when searching for all the insertelements that are
825 // used in a shufflevector operation. This function works recursively, when
826 // invoked with I = i, the function assumes Insts[i] is the last found
827 // insertelement in the chain. The next insertelement insertruction is saved in
828 // Insts[i+1].
829 bool findAllInserts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM,
830 CfgVector<const Inst *> *Insts, SizeT I = 0) {
831 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat);
832
833 if (I > Insts->size()) {
834 if (Verbose) {
835 Ctx->getStrDump() << "\tToo many inserts.\n";
836 }
837 return false;
838 }
839
840 const auto *LastInsert = Insts->at(I);
841 assert(llvm::isa<InstInsertElement>(LastInsert));
842
843 if (I == Insts->size() - 1) {
844 // Matching agains undef is not really needed because the value in Src(0)
Jim Stichnoth 2016/04/20 23:43:11 against
John 2016/04/21 00:02:17 Done.
845 // will be totally overwritten. We still enforece it anyways because the
Jim Stichnoth 2016/04/20 23:43:11 enforce
John 2016/04/21 00:02:17 Done.
846 // pnacl toolchain generates the bitcode with it.
Jim Stichnoth 2016/04/20 23:43:10 PNaCl
John 2016/04/21 00:02:18 Done.
847 if (!llvm::isa<ConstantUndef>(LastInsert->getSrc(0))) {
848 if (Verbose) {
849 OstreamLocker _(Ctx);
Jim Stichnoth 2016/04/20 23:43:11 Instead of using OstreamLocker everywhere, you sho
John 2016/04/21 00:02:17 see if the solution works.
850 Ctx->getStrDump() << "\tSrc0 is not undef: " << I << " "
851 << Insts->size();
852 LastInsert->dump(Func);
853 Ctx->getStrDump() << "\n";
854 }
855 return false;
856 }
857
858 // The following loop ensures that the insertelements are sorted. In theory,
859 // we could relax this restriction and allow any order. As long as each
860 // index appear excatly once, this chain is still a candidate for becoming a
Jim Stichnoth 2016/04/20 23:43:10 appears exactly
John 2016/04/21 00:02:18 Done.
861 // shufflevector. The Insts vector is traversed backwards because the
862 // instructions are "enqueued" in reverse order.
863 int32_t ExpectedElement = 0;
864 for (const auto *I : reverse_range(*Insts)) {
865 if (llvm::cast<ConstantInteger32>(I->getSrc(2))->getValue() !=
866 ExpectedElement) {
867 return false;
868 }
869 ++ExpectedElement;
870 }
871 return true;
872 }
873
874 const auto *Src0V = llvm::cast<Variable>(LastInsert->getSrc(0));
875 const auto *Def = VM->getSingleDefinition(Src0V);
876
877 // Only optimize if the first operand in
878 //
879 // Dest = insertelement A, B, 10
880 //
881 // is singly-def'ed.
882 if (Def == nullptr) {
883 if (Verbose) {
884 OstreamLocker _(Ctx);
885 Ctx->getStrDump() << "\tmulti-def: ";
886 (*Insts)[I]->dump(Func);
887 Ctx->getStrDump() << "\n";
888 }
889 return false;
890 }
891
892 // We also require the (single) definition to come from an insertelement
893 // instruction.
894 if (!llvm::isa<InstInsertElement>(Def)) {
895 if (Verbose) {
896 OstreamLocker _(Ctx);
897 Ctx->getStrDump() << "\tnot insert element: ";
898 Def->dump(Func);
899 Ctx->getStrDump() << "\n";
900 }
901 return false;
902 }
903
904 // Everything seems fine, so we save Def in Insts, and delegate the decision
905 // to findAllInserts.
906 (*Insts)[I + 1] = Def;
907
908 return findAllInserts(Func, Ctx, VM, Insts, I + 1);
909 }
910
911 // insertsLastElement returns true if Insert is inserting an element in the last
912 // position of a vector.
913 bool insertsLastElement(const Inst &Insert) {
914 const Type DestTy = Insert.getDest()->getType();
915 assert(isVectorType(DestTy));
916 const SizeT Elem =
917 llvm::cast<ConstantInteger32>(Insert.getSrc(2))->getValue();
918 return Elem == typeNumElements(DestTy) - 1;
919 }
920
921 // findAllExtracts goes over all the insertelement instructions that are
922 // candidates to be replaced by a shufflevector, and searches for all the
923 // definitions of the elements being inserted. If all of the elements are the
924 // result of an extractelement instruction, and all of the extractelements
925 // operates on at most two different sources, than the instructions can be
Jim Stichnoth 2016/04/20 23:43:11 operate
John 2016/04/21 00:02:17 Done.
926 // replaced by a shufflevector.
927 bool findAllExtracts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM,
928 const CfgVector<const Inst *> &Insts, Variable **Src0,
929 Variable **Src1, CfgVector<const Inst *> *Extracts) {
930 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat);
931
932 *Src0 = nullptr;
933 *Src1 = nullptr;
934 assert(Insts.size() > 0);
935 for (SizeT I = 0; I < Insts.size(); ++I) {
936 const auto *Insert = Insts.at(I);
937 const auto *Src1V = llvm::dyn_cast<Variable>(Insert->getSrc(1));
938 if (Src1V == nullptr) {
939 if (Verbose) {
940 OstreamLocker _(Ctx);
941 Ctx->getStrDump() << "src(1) is not a variable: ";
942 Insert->dump(Func);
943 Ctx->getStrDump() << "\n";
944 }
945 return false;
946 }
947
948 const auto *Def = VM->getSingleDefinition(Src1V);
949 if (Def == nullptr) {
950 if (Verbose) {
951 OstreamLocker _(Ctx);
952 Ctx->getStrDump() << "multi-def src(1): ";
953 Insert->dump(Func);
954 Ctx->getStrDump() << "\n";
955 }
956 return false;
957 }
958
959 if (!llvm::isa<InstExtractElement>(Def)) {
960 if (Verbose) {
961 OstreamLocker _(Ctx);
962 Ctx->getStrDump() << "not extractelement: ";
963 Def->dump(Func);
964 Ctx->getStrDump() << "\n";
965 }
966 return false;
967 }
968
969 auto *Src = llvm::cast<Variable>(Def->getSrc(0));
970 if (*Src0 == nullptr) {
971 // No sources yet. Save Src to Src0.
972 *Src0 = Src;
973 } else if (*Src1 == nullptr) {
974 // We already have a source, so we might save Src in Src1 -- but only if
975 // Src0 is not Src.
976 if (*Src0 != Src) {
977 *Src1 = Src;
978 }
979 } else if (Src != *Src0 && Src != *Src1) {
980 // More than two sources, so we can't rematerialize the shufflevector
981 // instruction.
982 if (Verbose) {
983 Ctx->getStrDump() << "Can't shuffle more than two sources.\n";
984 }
985 return false;
986 }
987
988 (*Extracts)[I] = Def;
989 }
990
991 // We should have seen at least one source operand.
992 assert(*Src0 != nullptr);
993
994 // If a second source was not seeing, then we just make Src1 = Src0 to
995 // simplify things down stream. This should not matter, as all of the indexes
996 // in the shufflevector instruction will point to Src0.
997 if (*Src1 == nullptr) {
998 *Src1 = *Src0;
999 }
1000
1001 return true;
1002 }
1003
1004 } // end of namespace ShuffleVectorUtils
1005 } // end of anonymous namespace
1006
1007 void Cfg::materializeVectorShuffles() {
1008 const bool Verbose = BuildDefs::dump() && isVerbose(IceV_ShufMat);
1009
1010 if (Verbose) {
1011 getContext()->getStrDump() << "\nShuffle materialization:\n";
1012 }
1013
1014 // MaxVectorElements is the maximum number of elements in the vector types
1015 // handled by Subzero. We use it to create the Inserts and Extracts vectors
1016 // with the appropriate size, thus avoinding resize() calls.
Jim Stichnoth 2016/04/20 23:43:11 avoiding
John 2016/04/21 00:02:17 Done.
1017 const SizeT MaxVectorElements = typeNumElements(IceType_v16i8);
1018 CfgVector<const Inst *> Inserts(MaxVectorElements);
1019 CfgVector<const Inst *> Extracts(MaxVectorElements);
1020
1021 TimerMarker T(TimerStack::TT_materializeVectorShuffles, this);
1022 for (CfgNode *Node : Nodes) {
1023 for (auto &Instr : Node->getInsts()) {
1024 if (!llvm::isa<InstInsertElement>(Instr)) {
1025 continue;
1026 }
1027 if (!ShuffleVectorUtils::insertsLastElement(Instr)) {
1028 // To avoid waisting time, we only start the pattern match at the last
1029 // insertelement instruction -- and go backwards from there.
1030 continue;
1031 }
1032 if (Verbose) {
1033 OstreamLocker _(getContext());
1034 getContext()->getStrDump() << "\tCandidate: ";
1035 Instr.dump(this);
1036 getContext()->getStrDump() << "\n";
1037 }
1038 Inserts.resize(typeNumElements(Instr.getDest()->getType()));
1039 Inserts[0] = &Instr;
1040 if (!ShuffleVectorUtils::findAllInserts(this, getContext(),
1041 VMetadata.get(), &Inserts)) {
1042 // If we fail to find a sequence of insertelements, we stop the
1043 // optimization.
1044 if (Verbose) {
1045 getContext()->getStrDump() << "\tFalse alarm.\n";
1046 }
1047 continue;
1048 }
1049 if (Verbose) {
1050 OstreamLocker _(getContext());
1051 getContext()->getStrDump() << "\tFound the following insertelement: \n";
1052 for (auto *I : reverse_range(Inserts)) {
1053 getContext()->getStrDump() << "\t\t";
1054 I->dump(this);
1055 getContext()->getStrDump() << "\n";
1056 }
1057 }
1058 Extracts.resize(Inserts.size());
1059 Variable *Src0;
1060 Variable *Src1;
1061 if (!ShuffleVectorUtils::findAllExtracts(this, getContext(),
1062 VMetadata.get(), Inserts, &Src0,
1063 &Src1, &Extracts)) {
1064 // If we fail to match the definitions of the insertelements' sources
1065 // with extractelement instructions -- or if those instructions operate
1066 // on more than two different variables --, we stop the optimization.
Jim Stichnoth 2016/04/20 23:43:11 no comma
John 2016/04/21 00:02:17 Done.
1067 if (Verbose) {
1068 getContext()->getStrDump() << "\tFailed to match extractelements.\n";
1069 }
1070 continue;
1071 }
1072 if (Verbose) {
1073 OstreamLocker _(getContext());
1074 getContext()->getStrDump()
1075 << "\tFound the following insert/extract element pairs: \n";
1076 for (SizeT I = 0; I < Inserts.size(); ++I) {
1077 const SizeT Pos = Inserts.size() - I - 1;
1078 getContext()->getStrDump() << "\t\tInsert : ";
1079 Inserts[Pos]->dump(this);
1080 getContext()->getStrDump() << "\n\t\tExtract: ";
1081 Extracts[Pos]->dump(this);
1082 getContext()->getStrDump() << "\n";
1083 }
1084 }
1085
1086 assert(Src0 != nullptr);
1087 assert(Src1 != nullptr);
1088
1089 auto *ShuffleVector =
1090 InstShuffleVector::create(this, Instr.getDest(), Src0, Src1);
1091 assert(ShuffleVector->getSrc(0) == Src0);
1092 assert(ShuffleVector->getSrc(1) == Src1);
1093 for (SizeT I = 0; I < Extracts.size(); ++I) {
1094 const SizeT Pos = Extracts.size() - I - 1;
1095 auto *Index = llvm::cast<ConstantInteger32>(Extracts[Pos]->getSrc(1));
1096 if (Src0 == Extracts[Pos]->getSrc(0)) {
1097 ShuffleVector->addIndex(Index);
1098 } else {
1099 ShuffleVector->addIndex(llvm::cast<ConstantInteger32>(
1100 Ctx->getConstantInt32(Index->getValue() + Extracts.size())));
1101 }
1102 }
1103
1104 if (Verbose) {
1105 OstreamLocker _(getContext());
1106 getContext()->getStrDump() << "Created: ";
1107 ShuffleVector->dump(this);
1108 getContext()->getStrDump() << "\n";
1109 }
1110
1111 Instr.setDeleted();
1112 auto &LoweringContext = getTarget()->getContext();
1113 LoweringContext.setInsertPoint(Instr);
1114 LoweringContext.insert(ShuffleVector);
1115 }
1116 }
1117 }
1118
805 void Cfg::doNopInsertion() { 1119 void Cfg::doNopInsertion() {
806 if (!getFlags().getShouldDoNopInsertion()) 1120 if (!getFlags().getShouldDoNopInsertion())
807 return; 1121 return;
808 TimerMarker T(TimerStack::TT_doNopInsertion, this); 1122 TimerMarker T(TimerStack::TT_doNopInsertion, this);
809 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_NopInsertion, 1123 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_NopInsertion,
810 SequenceNumber); 1124 SequenceNumber);
811 for (CfgNode *Node : Nodes) 1125 for (CfgNode *Node : Nodes)
812 Node->doNopInsertion(RNG); 1126 Node->doNopInsertion(RNG);
813 } 1127 }
814 1128
(...skipping 357 matching lines...) Expand 10 before | Expand all | Expand 10 after
1172 } 1486 }
1173 } 1487 }
1174 // Print each basic block 1488 // Print each basic block
1175 for (CfgNode *Node : Nodes) 1489 for (CfgNode *Node : Nodes)
1176 Node->dump(this); 1490 Node->dump(this);
1177 if (isVerbose(IceV_Instructions)) 1491 if (isVerbose(IceV_Instructions))
1178 Str << "}\n"; 1492 Str << "}\n";
1179 } 1493 }
1180 1494
1181 } // end of namespace Ice 1495 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698