Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// | 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 784 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 795 } | 795 } |
| 796 } while (FoundNewAssignment); | 796 } while (FoundNewAssignment); |
| 797 } | 797 } |
| 798 | 798 |
| 799 void Cfg::doAddressOpt() { | 799 void Cfg::doAddressOpt() { |
| 800 TimerMarker T(TimerStack::TT_doAddressOpt, this); | 800 TimerMarker T(TimerStack::TT_doAddressOpt, this); |
| 801 for (CfgNode *Node : Nodes) | 801 for (CfgNode *Node : Nodes) |
| 802 Node->doAddressOpt(); | 802 Node->doAddressOpt(); |
| 803 } | 803 } |
| 804 | 804 |
| 805 namespace { | |
| 806 // ShuffleVectorUtils implements helper functions for rematerializing | |
| 807 // shufflevector instructions from a sequence of extractelement/insertelement | |
| 808 // instructions. It looks for the following pattern: | |
| 809 // | |
| 810 // %t0 = extractelement A, %n0 | |
| 811 // %t1 = extractelement B, %n1 | |
| 812 // %t2 = extractelement C, %n2 | |
| 813 // ... | |
| 814 // %tN = extractelement N, %nN | |
| 815 // %d0 = insertelement undef, %t0, 0 | |
| 816 // %d1 = insertelement %d0, %t1, 1 | |
| 817 // %d2 = insertelement %d1, %t2, 2 | |
| 818 // ... | |
| 819 // %dest = insertelement %d_N-1, %tN, N | |
| 820 // | |
| 821 // where N is num_element(typeof(%dest)), and A, B, C, ... N are at most two | |
| 822 // distinct variables. | |
| 823 namespace ShuffleVectorUtils { | |
| 824 // findAllInserts is used when searching for all the insertelements that are | |
| 825 // used in a shufflevector operation. This function works recursively, when | |
| 826 // invoked with I = i, the function assumes Insts[i] is the last found | |
| 827 // insertelement in the chain. The next insertelement insertruction is saved in | |
| 828 // Insts[i+1]. | |
| 829 bool findAllInserts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM, | |
| 830 CfgVector<const Inst *> *Insts, SizeT I = 0) { | |
| 831 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat); | |
| 832 | |
| 833 if (I > Insts->size()) { | |
| 834 if (Verbose) { | |
| 835 Ctx->getStrDump() << "\tToo many inserts.\n"; | |
| 836 } | |
| 837 return false; | |
| 838 } | |
| 839 | |
| 840 const auto *LastInsert = Insts->at(I); | |
| 841 assert(llvm::isa<InstInsertElement>(LastInsert)); | |
| 842 | |
| 843 if (I == Insts->size() - 1) { | |
| 844 // Matching agains undef is not really needed because the value in Src(0) | |
|
Jim Stichnoth
2016/04/20 23:43:11
against
John
2016/04/21 00:02:17
Done.
| |
| 845 // will be totally overwritten. We still enforece it anyways because the | |
|
Jim Stichnoth
2016/04/20 23:43:11
enforce
John
2016/04/21 00:02:17
Done.
| |
| 846 // pnacl toolchain generates the bitcode with it. | |
|
Jim Stichnoth
2016/04/20 23:43:10
PNaCl
John
2016/04/21 00:02:18
Done.
| |
| 847 if (!llvm::isa<ConstantUndef>(LastInsert->getSrc(0))) { | |
| 848 if (Verbose) { | |
| 849 OstreamLocker _(Ctx); | |
|
Jim Stichnoth
2016/04/20 23:43:11
Instead of using OstreamLocker everywhere, you sho
John
2016/04/21 00:02:17
see if the solution works.
| |
| 850 Ctx->getStrDump() << "\tSrc0 is not undef: " << I << " " | |
| 851 << Insts->size(); | |
| 852 LastInsert->dump(Func); | |
| 853 Ctx->getStrDump() << "\n"; | |
| 854 } | |
| 855 return false; | |
| 856 } | |
| 857 | |
| 858 // The following loop ensures that the insertelements are sorted. In theory, | |
| 859 // we could relax this restriction and allow any order. As long as each | |
| 860 // index appear excatly once, this chain is still a candidate for becoming a | |
|
Jim Stichnoth
2016/04/20 23:43:10
appears exactly
John
2016/04/21 00:02:18
Done.
| |
| 861 // shufflevector. The Insts vector is traversed backwards because the | |
| 862 // instructions are "enqueued" in reverse order. | |
| 863 int32_t ExpectedElement = 0; | |
| 864 for (const auto *I : reverse_range(*Insts)) { | |
| 865 if (llvm::cast<ConstantInteger32>(I->getSrc(2))->getValue() != | |
| 866 ExpectedElement) { | |
| 867 return false; | |
| 868 } | |
| 869 ++ExpectedElement; | |
| 870 } | |
| 871 return true; | |
| 872 } | |
| 873 | |
| 874 const auto *Src0V = llvm::cast<Variable>(LastInsert->getSrc(0)); | |
| 875 const auto *Def = VM->getSingleDefinition(Src0V); | |
| 876 | |
| 877 // Only optimize if the first operand in | |
| 878 // | |
| 879 // Dest = insertelement A, B, 10 | |
| 880 // | |
| 881 // is singly-def'ed. | |
| 882 if (Def == nullptr) { | |
| 883 if (Verbose) { | |
| 884 OstreamLocker _(Ctx); | |
| 885 Ctx->getStrDump() << "\tmulti-def: "; | |
| 886 (*Insts)[I]->dump(Func); | |
| 887 Ctx->getStrDump() << "\n"; | |
| 888 } | |
| 889 return false; | |
| 890 } | |
| 891 | |
| 892 // We also require the (single) definition to come from an insertelement | |
| 893 // instruction. | |
| 894 if (!llvm::isa<InstInsertElement>(Def)) { | |
| 895 if (Verbose) { | |
| 896 OstreamLocker _(Ctx); | |
| 897 Ctx->getStrDump() << "\tnot insert element: "; | |
| 898 Def->dump(Func); | |
| 899 Ctx->getStrDump() << "\n"; | |
| 900 } | |
| 901 return false; | |
| 902 } | |
| 903 | |
| 904 // Everything seems fine, so we save Def in Insts, and delegate the decision | |
| 905 // to findAllInserts. | |
| 906 (*Insts)[I + 1] = Def; | |
| 907 | |
| 908 return findAllInserts(Func, Ctx, VM, Insts, I + 1); | |
| 909 } | |
| 910 | |
| 911 // insertsLastElement returns true if Insert is inserting an element in the last | |
| 912 // position of a vector. | |
| 913 bool insertsLastElement(const Inst &Insert) { | |
| 914 const Type DestTy = Insert.getDest()->getType(); | |
| 915 assert(isVectorType(DestTy)); | |
| 916 const SizeT Elem = | |
| 917 llvm::cast<ConstantInteger32>(Insert.getSrc(2))->getValue(); | |
| 918 return Elem == typeNumElements(DestTy) - 1; | |
| 919 } | |
| 920 | |
| 921 // findAllExtracts goes over all the insertelement instructions that are | |
| 922 // candidates to be replaced by a shufflevector, and searches for all the | |
| 923 // definitions of the elements being inserted. If all of the elements are the | |
| 924 // result of an extractelement instruction, and all of the extractelements | |
| 925 // operates on at most two different sources, than the instructions can be | |
|
Jim Stichnoth
2016/04/20 23:43:11
operate
John
2016/04/21 00:02:17
Done.
| |
| 926 // replaced by a shufflevector. | |
| 927 bool findAllExtracts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM, | |
| 928 const CfgVector<const Inst *> &Insts, Variable **Src0, | |
| 929 Variable **Src1, CfgVector<const Inst *> *Extracts) { | |
| 930 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat); | |
| 931 | |
| 932 *Src0 = nullptr; | |
| 933 *Src1 = nullptr; | |
| 934 assert(Insts.size() > 0); | |
| 935 for (SizeT I = 0; I < Insts.size(); ++I) { | |
| 936 const auto *Insert = Insts.at(I); | |
| 937 const auto *Src1V = llvm::dyn_cast<Variable>(Insert->getSrc(1)); | |
| 938 if (Src1V == nullptr) { | |
| 939 if (Verbose) { | |
| 940 OstreamLocker _(Ctx); | |
| 941 Ctx->getStrDump() << "src(1) is not a variable: "; | |
| 942 Insert->dump(Func); | |
| 943 Ctx->getStrDump() << "\n"; | |
| 944 } | |
| 945 return false; | |
| 946 } | |
| 947 | |
| 948 const auto *Def = VM->getSingleDefinition(Src1V); | |
| 949 if (Def == nullptr) { | |
| 950 if (Verbose) { | |
| 951 OstreamLocker _(Ctx); | |
| 952 Ctx->getStrDump() << "multi-def src(1): "; | |
| 953 Insert->dump(Func); | |
| 954 Ctx->getStrDump() << "\n"; | |
| 955 } | |
| 956 return false; | |
| 957 } | |
| 958 | |
| 959 if (!llvm::isa<InstExtractElement>(Def)) { | |
| 960 if (Verbose) { | |
| 961 OstreamLocker _(Ctx); | |
| 962 Ctx->getStrDump() << "not extractelement: "; | |
| 963 Def->dump(Func); | |
| 964 Ctx->getStrDump() << "\n"; | |
| 965 } | |
| 966 return false; | |
| 967 } | |
| 968 | |
| 969 auto *Src = llvm::cast<Variable>(Def->getSrc(0)); | |
| 970 if (*Src0 == nullptr) { | |
| 971 // No sources yet. Save Src to Src0. | |
| 972 *Src0 = Src; | |
| 973 } else if (*Src1 == nullptr) { | |
| 974 // We already have a source, so we might save Src in Src1 -- but only if | |
| 975 // Src0 is not Src. | |
| 976 if (*Src0 != Src) { | |
| 977 *Src1 = Src; | |
| 978 } | |
| 979 } else if (Src != *Src0 && Src != *Src1) { | |
| 980 // More than two sources, so we can't rematerialize the shufflevector | |
| 981 // instruction. | |
| 982 if (Verbose) { | |
| 983 Ctx->getStrDump() << "Can't shuffle more than two sources.\n"; | |
| 984 } | |
| 985 return false; | |
| 986 } | |
| 987 | |
| 988 (*Extracts)[I] = Def; | |
| 989 } | |
| 990 | |
| 991 // We should have seen at least one source operand. | |
| 992 assert(*Src0 != nullptr); | |
| 993 | |
| 994 // If a second source was not seeing, then we just make Src1 = Src0 to | |
| 995 // simplify things down stream. This should not matter, as all of the indexes | |
| 996 // in the shufflevector instruction will point to Src0. | |
| 997 if (*Src1 == nullptr) { | |
| 998 *Src1 = *Src0; | |
| 999 } | |
| 1000 | |
| 1001 return true; | |
| 1002 } | |
| 1003 | |
| 1004 } // end of namespace ShuffleVectorUtils | |
| 1005 } // end of anonymous namespace | |
| 1006 | |
| 1007 void Cfg::materializeVectorShuffles() { | |
| 1008 const bool Verbose = BuildDefs::dump() && isVerbose(IceV_ShufMat); | |
| 1009 | |
| 1010 if (Verbose) { | |
| 1011 getContext()->getStrDump() << "\nShuffle materialization:\n"; | |
| 1012 } | |
| 1013 | |
| 1014 // MaxVectorElements is the maximum number of elements in the vector types | |
| 1015 // handled by Subzero. We use it to create the Inserts and Extracts vectors | |
| 1016 // with the appropriate size, thus avoinding resize() calls. | |
|
Jim Stichnoth
2016/04/20 23:43:11
avoiding
John
2016/04/21 00:02:17
Done.
| |
| 1017 const SizeT MaxVectorElements = typeNumElements(IceType_v16i8); | |
| 1018 CfgVector<const Inst *> Inserts(MaxVectorElements); | |
| 1019 CfgVector<const Inst *> Extracts(MaxVectorElements); | |
| 1020 | |
| 1021 TimerMarker T(TimerStack::TT_materializeVectorShuffles, this); | |
| 1022 for (CfgNode *Node : Nodes) { | |
| 1023 for (auto &Instr : Node->getInsts()) { | |
| 1024 if (!llvm::isa<InstInsertElement>(Instr)) { | |
| 1025 continue; | |
| 1026 } | |
| 1027 if (!ShuffleVectorUtils::insertsLastElement(Instr)) { | |
| 1028 // To avoid waisting time, we only start the pattern match at the last | |
| 1029 // insertelement instruction -- and go backwards from there. | |
| 1030 continue; | |
| 1031 } | |
| 1032 if (Verbose) { | |
| 1033 OstreamLocker _(getContext()); | |
| 1034 getContext()->getStrDump() << "\tCandidate: "; | |
| 1035 Instr.dump(this); | |
| 1036 getContext()->getStrDump() << "\n"; | |
| 1037 } | |
| 1038 Inserts.resize(typeNumElements(Instr.getDest()->getType())); | |
| 1039 Inserts[0] = &Instr; | |
| 1040 if (!ShuffleVectorUtils::findAllInserts(this, getContext(), | |
| 1041 VMetadata.get(), &Inserts)) { | |
| 1042 // If we fail to find a sequence of insertelements, we stop the | |
| 1043 // optimization. | |
| 1044 if (Verbose) { | |
| 1045 getContext()->getStrDump() << "\tFalse alarm.\n"; | |
| 1046 } | |
| 1047 continue; | |
| 1048 } | |
| 1049 if (Verbose) { | |
| 1050 OstreamLocker _(getContext()); | |
| 1051 getContext()->getStrDump() << "\tFound the following insertelement: \n"; | |
| 1052 for (auto *I : reverse_range(Inserts)) { | |
| 1053 getContext()->getStrDump() << "\t\t"; | |
| 1054 I->dump(this); | |
| 1055 getContext()->getStrDump() << "\n"; | |
| 1056 } | |
| 1057 } | |
| 1058 Extracts.resize(Inserts.size()); | |
| 1059 Variable *Src0; | |
| 1060 Variable *Src1; | |
| 1061 if (!ShuffleVectorUtils::findAllExtracts(this, getContext(), | |
| 1062 VMetadata.get(), Inserts, &Src0, | |
| 1063 &Src1, &Extracts)) { | |
| 1064 // If we fail to match the definitions of the insertelements' sources | |
| 1065 // with extractelement instructions -- or if those instructions operate | |
| 1066 // on more than two different variables --, we stop the optimization. | |
|
Jim Stichnoth
2016/04/20 23:43:11
no comma
John
2016/04/21 00:02:17
Done.
| |
| 1067 if (Verbose) { | |
| 1068 getContext()->getStrDump() << "\tFailed to match extractelements.\n"; | |
| 1069 } | |
| 1070 continue; | |
| 1071 } | |
| 1072 if (Verbose) { | |
| 1073 OstreamLocker _(getContext()); | |
| 1074 getContext()->getStrDump() | |
| 1075 << "\tFound the following insert/extract element pairs: \n"; | |
| 1076 for (SizeT I = 0; I < Inserts.size(); ++I) { | |
| 1077 const SizeT Pos = Inserts.size() - I - 1; | |
| 1078 getContext()->getStrDump() << "\t\tInsert : "; | |
| 1079 Inserts[Pos]->dump(this); | |
| 1080 getContext()->getStrDump() << "\n\t\tExtract: "; | |
| 1081 Extracts[Pos]->dump(this); | |
| 1082 getContext()->getStrDump() << "\n"; | |
| 1083 } | |
| 1084 } | |
| 1085 | |
| 1086 assert(Src0 != nullptr); | |
| 1087 assert(Src1 != nullptr); | |
| 1088 | |
| 1089 auto *ShuffleVector = | |
| 1090 InstShuffleVector::create(this, Instr.getDest(), Src0, Src1); | |
| 1091 assert(ShuffleVector->getSrc(0) == Src0); | |
| 1092 assert(ShuffleVector->getSrc(1) == Src1); | |
| 1093 for (SizeT I = 0; I < Extracts.size(); ++I) { | |
| 1094 const SizeT Pos = Extracts.size() - I - 1; | |
| 1095 auto *Index = llvm::cast<ConstantInteger32>(Extracts[Pos]->getSrc(1)); | |
| 1096 if (Src0 == Extracts[Pos]->getSrc(0)) { | |
| 1097 ShuffleVector->addIndex(Index); | |
| 1098 } else { | |
| 1099 ShuffleVector->addIndex(llvm::cast<ConstantInteger32>( | |
| 1100 Ctx->getConstantInt32(Index->getValue() + Extracts.size()))); | |
| 1101 } | |
| 1102 } | |
| 1103 | |
| 1104 if (Verbose) { | |
| 1105 OstreamLocker _(getContext()); | |
| 1106 getContext()->getStrDump() << "Created: "; | |
| 1107 ShuffleVector->dump(this); | |
| 1108 getContext()->getStrDump() << "\n"; | |
| 1109 } | |
| 1110 | |
| 1111 Instr.setDeleted(); | |
| 1112 auto &LoweringContext = getTarget()->getContext(); | |
| 1113 LoweringContext.setInsertPoint(Instr); | |
| 1114 LoweringContext.insert(ShuffleVector); | |
| 1115 } | |
| 1116 } | |
| 1117 } | |
| 1118 | |
| 805 void Cfg::doNopInsertion() { | 1119 void Cfg::doNopInsertion() { |
| 806 if (!getFlags().getShouldDoNopInsertion()) | 1120 if (!getFlags().getShouldDoNopInsertion()) |
| 807 return; | 1121 return; |
| 808 TimerMarker T(TimerStack::TT_doNopInsertion, this); | 1122 TimerMarker T(TimerStack::TT_doNopInsertion, this); |
| 809 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_NopInsertion, | 1123 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_NopInsertion, |
| 810 SequenceNumber); | 1124 SequenceNumber); |
| 811 for (CfgNode *Node : Nodes) | 1125 for (CfgNode *Node : Nodes) |
| 812 Node->doNopInsertion(RNG); | 1126 Node->doNopInsertion(RNG); |
| 813 } | 1127 } |
| 814 | 1128 |
| (...skipping 357 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1172 } | 1486 } |
| 1173 } | 1487 } |
| 1174 // Print each basic block | 1488 // Print each basic block |
| 1175 for (CfgNode *Node : Nodes) | 1489 for (CfgNode *Node : Nodes) |
| 1176 Node->dump(this); | 1490 Node->dump(this); |
| 1177 if (isVerbose(IceV_Instructions)) | 1491 if (isVerbose(IceV_Instructions)) |
| 1178 Str << "}\n"; | 1492 Str << "}\n"; |
| 1179 } | 1493 } |
| 1180 | 1494 |
| 1181 } // end of namespace Ice | 1495 } // end of namespace Ice |
| OLD | NEW |