Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(57)

Side by Side Diff: src/IceCfg.cpp

Issue 1897243002: Subzero. Rematerializes shufflevector instructions. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceCfg.h ('k') | src/IceClFlags.def » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 784 matching lines...) Expand 10 before | Expand all | Expand 10 after
795 } 795 }
796 } while (FoundNewAssignment); 796 } while (FoundNewAssignment);
797 } 797 }
798 798
799 void Cfg::doAddressOpt() { 799 void Cfg::doAddressOpt() {
800 TimerMarker T(TimerStack::TT_doAddressOpt, this); 800 TimerMarker T(TimerStack::TT_doAddressOpt, this);
801 for (CfgNode *Node : Nodes) 801 for (CfgNode *Node : Nodes)
802 Node->doAddressOpt(); 802 Node->doAddressOpt();
803 } 803 }
804 804
805 namespace {
806 // ShuffleVectorUtils implements helper functions for rematerializing
807 // shufflevector instructions from a sequence of extractelement/insertelement
808 // instructions. It looks for the following pattern:
809 //
810 // %t0 = extractelement A, %n0
811 // %t1 = extractelement B, %n1
812 // %t2 = extractelement C, %n2
813 // ...
814 // %tN = extractelement N, %nN
815 // %d0 = insertelement undef, %t0, 0
816 // %d1 = insertelement %d0, %t1, 1
817 // %d2 = insertelement %d1, %t2, 2
818 // ...
819 // %dest = insertelement %d_N-1, %tN, N
820 //
821 // where N is num_element(typeof(%dest)), and A, B, C, ... N are at most two
822 // distinct variables.
823 namespace ShuffleVectorUtils {
824 // findAllInserts is used when searching for all the insertelements that are
825 // used in a shufflevector operation. This function works recursively, when
826 // invoked with I = i, the function assumes Insts[i] is the last found
827 // insertelement in the chain. The next insertelement insertruction is saved in
828 // Insts[i+1].
829 bool findAllInserts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM,
830 CfgVector<const Inst *> *Insts, SizeT I = 0) {
831 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat);
832
833 if (I > Insts->size()) {
834 if (Verbose) {
835 Ctx->getStrDump() << "\tToo many inserts.\n";
836 }
837 return false;
838 }
839
840 const auto *LastInsert = Insts->at(I);
841 assert(llvm::isa<InstInsertElement>(LastInsert));
842
843 if (I == Insts->size() - 1) {
844 // Matching against undef is not really needed because the value in Src(0)
845 // will be totally overwritten. We still enforce it anyways because the
846 // PNaCl toolchain generates the bitcode with it.
847 if (!llvm::isa<ConstantUndef>(LastInsert->getSrc(0))) {
848 if (Verbose) {
849 Ctx->getStrDump() << "\tSrc0 is not undef: " << I << " "
850 << Insts->size();
851 LastInsert->dump(Func);
852 Ctx->getStrDump() << "\n";
853 }
854 return false;
855 }
856
857 // The following loop ensures that the insertelements are sorted. In theory,
858 // we could relax this restriction and allow any order. As long as each
859 // index appears exactly once, this chain is still a candidate for becoming
860 // a shufflevector. The Insts vector is traversed backwards because the
861 // instructions are "enqueued" in reverse order.
862 int32_t ExpectedElement = 0;
863 for (const auto *I : reverse_range(*Insts)) {
864 if (llvm::cast<ConstantInteger32>(I->getSrc(2))->getValue() !=
865 ExpectedElement) {
866 return false;
867 }
868 ++ExpectedElement;
869 }
870 return true;
871 }
872
873 const auto *Src0V = llvm::cast<Variable>(LastInsert->getSrc(0));
874 const auto *Def = VM->getSingleDefinition(Src0V);
875
876 // Only optimize if the first operand in
877 //
878 // Dest = insertelement A, B, 10
879 //
880 // is singly-def'ed.
881 if (Def == nullptr) {
882 if (Verbose) {
883 Ctx->getStrDump() << "\tmulti-def: ";
884 (*Insts)[I]->dump(Func);
885 Ctx->getStrDump() << "\n";
886 }
887 return false;
888 }
889
890 // We also require the (single) definition to come from an insertelement
891 // instruction.
892 if (!llvm::isa<InstInsertElement>(Def)) {
893 if (Verbose) {
894 Ctx->getStrDump() << "\tnot insert element: ";
895 Def->dump(Func);
896 Ctx->getStrDump() << "\n";
897 }
898 return false;
899 }
900
901 // Everything seems fine, so we save Def in Insts, and delegate the decision
902 // to findAllInserts.
903 (*Insts)[I + 1] = Def;
904
905 return findAllInserts(Func, Ctx, VM, Insts, I + 1);
906 }
907
908 // insertsLastElement returns true if Insert is inserting an element in the last
909 // position of a vector.
910 bool insertsLastElement(const Inst &Insert) {
911 const Type DestTy = Insert.getDest()->getType();
912 assert(isVectorType(DestTy));
913 const SizeT Elem =
914 llvm::cast<ConstantInteger32>(Insert.getSrc(2))->getValue();
915 return Elem == typeNumElements(DestTy) - 1;
916 }
917
918 // findAllExtracts goes over all the insertelement instructions that are
919 // candidates to be replaced by a shufflevector, and searches for all the
920 // definitions of the elements being inserted. If all of the elements are the
921 // result of an extractelement instruction, and all of the extractelements
922 // operate on at most two different sources, than the instructions can be
923 // replaced by a shufflevector.
924 bool findAllExtracts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM,
925 const CfgVector<const Inst *> &Insts, Variable **Src0,
926 Variable **Src1, CfgVector<const Inst *> *Extracts) {
927 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat);
928
929 *Src0 = nullptr;
930 *Src1 = nullptr;
931 assert(Insts.size() > 0);
932 for (SizeT I = 0; I < Insts.size(); ++I) {
933 const auto *Insert = Insts.at(I);
934 const auto *Src1V = llvm::dyn_cast<Variable>(Insert->getSrc(1));
935 if (Src1V == nullptr) {
936 if (Verbose) {
937 Ctx->getStrDump() << "src(1) is not a variable: ";
938 Insert->dump(Func);
939 Ctx->getStrDump() << "\n";
940 }
941 return false;
942 }
943
944 const auto *Def = VM->getSingleDefinition(Src1V);
945 if (Def == nullptr) {
946 if (Verbose) {
947 Ctx->getStrDump() << "multi-def src(1): ";
948 Insert->dump(Func);
949 Ctx->getStrDump() << "\n";
950 }
951 return false;
952 }
953
954 if (!llvm::isa<InstExtractElement>(Def)) {
955 if (Verbose) {
956 Ctx->getStrDump() << "not extractelement: ";
957 Def->dump(Func);
958 Ctx->getStrDump() << "\n";
959 }
960 return false;
961 }
962
963 auto *Src = llvm::cast<Variable>(Def->getSrc(0));
964 if (*Src0 == nullptr) {
965 // No sources yet. Save Src to Src0.
966 *Src0 = Src;
967 } else if (*Src1 == nullptr) {
968 // We already have a source, so we might save Src in Src1 -- but only if
969 // Src0 is not Src.
970 if (*Src0 != Src) {
971 *Src1 = Src;
972 }
973 } else if (Src != *Src0 && Src != *Src1) {
974 // More than two sources, so we can't rematerialize the shufflevector
975 // instruction.
976 if (Verbose) {
977 Ctx->getStrDump() << "Can't shuffle more than two sources.\n";
978 }
979 return false;
980 }
981
982 (*Extracts)[I] = Def;
983 }
984
985 // We should have seen at least one source operand.
986 assert(*Src0 != nullptr);
987
988 // If a second source was not seen, then we just make Src1 = Src0 to simplify
989 // things down stream. This should not matter, as all of the indexes in the
990 // shufflevector instruction will point to Src0.
991 if (*Src1 == nullptr) {
992 *Src1 = *Src0;
993 }
994
995 return true;
996 }
997
998 } // end of namespace ShuffleVectorUtils
999 } // end of anonymous namespace
1000
1001 void Cfg::materializeVectorShuffles() {
1002 const bool Verbose = BuildDefs::dump() && isVerbose(IceV_ShufMat);
1003
1004 std::unique_ptr<OstreamLocker> L;
1005 if (Verbose) {
1006 L.reset(new OstreamLocker(getContext()));
1007 getContext()->getStrDump() << "\nShuffle materialization:\n";
1008 }
1009
1010 // MaxVectorElements is the maximum number of elements in the vector types
1011 // handled by Subzero. We use it to create the Inserts and Extracts vectors
1012 // with the appropriate size, thus avoiding resize() calls.
1013 const SizeT MaxVectorElements = typeNumElements(IceType_v16i8);
1014 CfgVector<const Inst *> Inserts(MaxVectorElements);
1015 CfgVector<const Inst *> Extracts(MaxVectorElements);
1016
1017 TimerMarker T(TimerStack::TT_materializeVectorShuffles, this);
1018 for (CfgNode *Node : Nodes) {
1019 for (auto &Instr : Node->getInsts()) {
1020 if (!llvm::isa<InstInsertElement>(Instr)) {
1021 continue;
1022 }
1023 if (!ShuffleVectorUtils::insertsLastElement(Instr)) {
1024 // To avoid wasting time, we only start the pattern match at the last
1025 // insertelement instruction -- and go backwards from there.
1026 continue;
1027 }
1028 if (Verbose) {
1029 getContext()->getStrDump() << "\tCandidate: ";
1030 Instr.dump(this);
1031 getContext()->getStrDump() << "\n";
1032 }
1033 Inserts.resize(typeNumElements(Instr.getDest()->getType()));
1034 Inserts[0] = &Instr;
1035 if (!ShuffleVectorUtils::findAllInserts(this, getContext(),
1036 VMetadata.get(), &Inserts)) {
1037 // If we fail to find a sequence of insertelements, we stop the
1038 // optimization.
1039 if (Verbose) {
1040 getContext()->getStrDump() << "\tFalse alarm.\n";
1041 }
1042 continue;
1043 }
1044 if (Verbose) {
1045 getContext()->getStrDump() << "\tFound the following insertelement: \n";
1046 for (auto *I : reverse_range(Inserts)) {
1047 getContext()->getStrDump() << "\t\t";
1048 I->dump(this);
1049 getContext()->getStrDump() << "\n";
1050 }
1051 }
1052 Extracts.resize(Inserts.size());
1053 Variable *Src0;
1054 Variable *Src1;
1055 if (!ShuffleVectorUtils::findAllExtracts(this, getContext(),
1056 VMetadata.get(), Inserts, &Src0,
1057 &Src1, &Extracts)) {
1058 // If we fail to match the definitions of the insertelements' sources
1059 // with extractelement instructions -- or if those instructions operate
1060 // on more than two different variables -- we stop the optimization.
1061 if (Verbose) {
1062 getContext()->getStrDump() << "\tFailed to match extractelements.\n";
1063 }
1064 continue;
1065 }
1066 if (Verbose) {
1067 getContext()->getStrDump()
1068 << "\tFound the following insert/extract element pairs: \n";
1069 for (SizeT I = 0; I < Inserts.size(); ++I) {
1070 const SizeT Pos = Inserts.size() - I - 1;
1071 getContext()->getStrDump() << "\t\tInsert : ";
1072 Inserts[Pos]->dump(this);
1073 getContext()->getStrDump() << "\n\t\tExtract: ";
1074 Extracts[Pos]->dump(this);
1075 getContext()->getStrDump() << "\n";
1076 }
1077 }
1078
1079 assert(Src0 != nullptr);
1080 assert(Src1 != nullptr);
1081
1082 auto *ShuffleVector =
1083 InstShuffleVector::create(this, Instr.getDest(), Src0, Src1);
1084 assert(ShuffleVector->getSrc(0) == Src0);
1085 assert(ShuffleVector->getSrc(1) == Src1);
1086 for (SizeT I = 0; I < Extracts.size(); ++I) {
1087 const SizeT Pos = Extracts.size() - I - 1;
1088 auto *Index = llvm::cast<ConstantInteger32>(Extracts[Pos]->getSrc(1));
1089 if (Src0 == Extracts[Pos]->getSrc(0)) {
1090 ShuffleVector->addIndex(Index);
1091 } else {
1092 ShuffleVector->addIndex(llvm::cast<ConstantInteger32>(
1093 Ctx->getConstantInt32(Index->getValue() + Extracts.size())));
1094 }
1095 }
1096
1097 if (Verbose) {
1098 getContext()->getStrDump() << "Created: ";
1099 ShuffleVector->dump(this);
1100 getContext()->getStrDump() << "\n";
1101 }
1102
1103 Instr.setDeleted();
1104 auto &LoweringContext = getTarget()->getContext();
1105 LoweringContext.setInsertPoint(Instr);
1106 LoweringContext.insert(ShuffleVector);
1107 }
1108 }
1109 }
1110
805 void Cfg::doNopInsertion() { 1111 void Cfg::doNopInsertion() {
806 if (!getFlags().getShouldDoNopInsertion()) 1112 if (!getFlags().getShouldDoNopInsertion())
807 return; 1113 return;
808 TimerMarker T(TimerStack::TT_doNopInsertion, this); 1114 TimerMarker T(TimerStack::TT_doNopInsertion, this);
809 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_NopInsertion, 1115 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_NopInsertion,
810 SequenceNumber); 1116 SequenceNumber);
811 for (CfgNode *Node : Nodes) 1117 for (CfgNode *Node : Nodes)
812 Node->doNopInsertion(RNG); 1118 Node->doNopInsertion(RNG);
813 } 1119 }
814 1120
(...skipping 357 matching lines...) Expand 10 before | Expand all | Expand 10 after
1172 } 1478 }
1173 } 1479 }
1174 // Print each basic block 1480 // Print each basic block
1175 for (CfgNode *Node : Nodes) 1481 for (CfgNode *Node : Nodes)
1176 Node->dump(this); 1482 Node->dump(this);
1177 if (isVerbose(IceV_Instructions)) 1483 if (isVerbose(IceV_Instructions))
1178 Str << "}\n"; 1484 Str << "}\n";
1179 } 1485 }
1180 1486
1181 } // end of namespace Ice 1487 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceCfg.h ('k') | src/IceClFlags.def » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698