Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(26)

Unified Diff: src/IceCfg.cpp

Issue 1897243002: Subzero. Rematerializes shufflevector instructions. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/IceCfg.h ('k') | src/IceClFlags.def » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceCfg.cpp
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 23c6728b811cce0709569c324fd16f59062e8960..9a47dd668985387ee896ba7e047d3d5f88eb0ef9 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -802,6 +802,312 @@ void Cfg::doAddressOpt() {
Node->doAddressOpt();
}
+namespace {
+// ShuffleVectorUtils implements helper functions for rematerializing
+// shufflevector instructions from a sequence of extractelement/insertelement
+// instructions. It looks for the following pattern:
+//
+// %t0 = extractelement A, %n0
+// %t1 = extractelement B, %n1
+// %t2 = extractelement C, %n2
+// ...
+// %tN = extractelement N, %nN
+// %d0 = insertelement undef, %t0, 0
+// %d1 = insertelement %d0, %t1, 1
+// %d2 = insertelement %d1, %t2, 2
+// ...
+// %dest = insertelement %d_N-1, %tN, N
+//
+// where N is num_element(typeof(%dest)), and A, B, C, ... N are at most two
+// distinct variables.
+namespace ShuffleVectorUtils {
+// findAllInserts is used when searching for all the insertelements that are
+// used in a shufflevector operation. This function works recursively, when
+// invoked with I = i, the function assumes Insts[i] is the last found
+// insertelement in the chain. The next insertelement insertruction is saved in
+// Insts[i+1].
+bool findAllInserts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM,
+ CfgVector<const Inst *> *Insts, SizeT I = 0) {
+ const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat);
+
+ if (I > Insts->size()) {
+ if (Verbose) {
+ Ctx->getStrDump() << "\tToo many inserts.\n";
+ }
+ return false;
+ }
+
+ const auto *LastInsert = Insts->at(I);
+ assert(llvm::isa<InstInsertElement>(LastInsert));
+
+ if (I == Insts->size() - 1) {
+ // Matching against undef is not really needed because the value in Src(0)
+ // will be totally overwritten. We still enforce it anyways because the
+ // PNaCl toolchain generates the bitcode with it.
+ if (!llvm::isa<ConstantUndef>(LastInsert->getSrc(0))) {
+ if (Verbose) {
+ Ctx->getStrDump() << "\tSrc0 is not undef: " << I << " "
+ << Insts->size();
+ LastInsert->dump(Func);
+ Ctx->getStrDump() << "\n";
+ }
+ return false;
+ }
+
+ // The following loop ensures that the insertelements are sorted. In theory,
+ // we could relax this restriction and allow any order. As long as each
+ // index appears exactly once, this chain is still a candidate for becoming
+ // a shufflevector. The Insts vector is traversed backwards because the
+ // instructions are "enqueued" in reverse order.
+ int32_t ExpectedElement = 0;
+ for (const auto *I : reverse_range(*Insts)) {
+ if (llvm::cast<ConstantInteger32>(I->getSrc(2))->getValue() !=
+ ExpectedElement) {
+ return false;
+ }
+ ++ExpectedElement;
+ }
+ return true;
+ }
+
+ const auto *Src0V = llvm::cast<Variable>(LastInsert->getSrc(0));
+ const auto *Def = VM->getSingleDefinition(Src0V);
+
+ // Only optimize if the first operand in
+ //
+ // Dest = insertelement A, B, 10
+ //
+ // is singly-def'ed.
+ if (Def == nullptr) {
+ if (Verbose) {
+ Ctx->getStrDump() << "\tmulti-def: ";
+ (*Insts)[I]->dump(Func);
+ Ctx->getStrDump() << "\n";
+ }
+ return false;
+ }
+
+ // We also require the (single) definition to come from an insertelement
+ // instruction.
+ if (!llvm::isa<InstInsertElement>(Def)) {
+ if (Verbose) {
+ Ctx->getStrDump() << "\tnot insert element: ";
+ Def->dump(Func);
+ Ctx->getStrDump() << "\n";
+ }
+ return false;
+ }
+
+ // Everything seems fine, so we save Def in Insts, and delegate the decision
+ // to findAllInserts.
+ (*Insts)[I + 1] = Def;
+
+ return findAllInserts(Func, Ctx, VM, Insts, I + 1);
+}
+
+// insertsLastElement returns true if Insert is inserting an element in the last
+// position of a vector.
+bool insertsLastElement(const Inst &Insert) {
+ const Type DestTy = Insert.getDest()->getType();
+ assert(isVectorType(DestTy));
+ const SizeT Elem =
+ llvm::cast<ConstantInteger32>(Insert.getSrc(2))->getValue();
+ return Elem == typeNumElements(DestTy) - 1;
+}
+
+// findAllExtracts goes over all the insertelement instructions that are
+// candidates to be replaced by a shufflevector, and searches for all the
+// definitions of the elements being inserted. If all of the elements are the
+// result of an extractelement instruction, and all of the extractelements
+// operate on at most two different sources, than the instructions can be
+// replaced by a shufflevector.
+bool findAllExtracts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM,
+ const CfgVector<const Inst *> &Insts, Variable **Src0,
+ Variable **Src1, CfgVector<const Inst *> *Extracts) {
+ const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat);
+
+ *Src0 = nullptr;
+ *Src1 = nullptr;
+ assert(Insts.size() > 0);
+ for (SizeT I = 0; I < Insts.size(); ++I) {
+ const auto *Insert = Insts.at(I);
+ const auto *Src1V = llvm::dyn_cast<Variable>(Insert->getSrc(1));
+ if (Src1V == nullptr) {
+ if (Verbose) {
+ Ctx->getStrDump() << "src(1) is not a variable: ";
+ Insert->dump(Func);
+ Ctx->getStrDump() << "\n";
+ }
+ return false;
+ }
+
+ const auto *Def = VM->getSingleDefinition(Src1V);
+ if (Def == nullptr) {
+ if (Verbose) {
+ Ctx->getStrDump() << "multi-def src(1): ";
+ Insert->dump(Func);
+ Ctx->getStrDump() << "\n";
+ }
+ return false;
+ }
+
+ if (!llvm::isa<InstExtractElement>(Def)) {
+ if (Verbose) {
+ Ctx->getStrDump() << "not extractelement: ";
+ Def->dump(Func);
+ Ctx->getStrDump() << "\n";
+ }
+ return false;
+ }
+
+ auto *Src = llvm::cast<Variable>(Def->getSrc(0));
+ if (*Src0 == nullptr) {
+ // No sources yet. Save Src to Src0.
+ *Src0 = Src;
+ } else if (*Src1 == nullptr) {
+ // We already have a source, so we might save Src in Src1 -- but only if
+ // Src0 is not Src.
+ if (*Src0 != Src) {
+ *Src1 = Src;
+ }
+ } else if (Src != *Src0 && Src != *Src1) {
+ // More than two sources, so we can't rematerialize the shufflevector
+ // instruction.
+ if (Verbose) {
+ Ctx->getStrDump() << "Can't shuffle more than two sources.\n";
+ }
+ return false;
+ }
+
+ (*Extracts)[I] = Def;
+ }
+
+ // We should have seen at least one source operand.
+ assert(*Src0 != nullptr);
+
+ // If a second source was not seen, then we just make Src1 = Src0 to simplify
+ // things down stream. This should not matter, as all of the indexes in the
+ // shufflevector instruction will point to Src0.
+ if (*Src1 == nullptr) {
+ *Src1 = *Src0;
+ }
+
+ return true;
+}
+
+} // end of namespace ShuffleVectorUtils
+} // end of anonymous namespace
+
+void Cfg::materializeVectorShuffles() {
+ const bool Verbose = BuildDefs::dump() && isVerbose(IceV_ShufMat);
+
+ std::unique_ptr<OstreamLocker> L;
+ if (Verbose) {
+ L.reset(new OstreamLocker(getContext()));
+ getContext()->getStrDump() << "\nShuffle materialization:\n";
+ }
+
+ // MaxVectorElements is the maximum number of elements in the vector types
+ // handled by Subzero. We use it to create the Inserts and Extracts vectors
+ // with the appropriate size, thus avoiding resize() calls.
+ const SizeT MaxVectorElements = typeNumElements(IceType_v16i8);
+ CfgVector<const Inst *> Inserts(MaxVectorElements);
+ CfgVector<const Inst *> Extracts(MaxVectorElements);
+
+ TimerMarker T(TimerStack::TT_materializeVectorShuffles, this);
+ for (CfgNode *Node : Nodes) {
+ for (auto &Instr : Node->getInsts()) {
+ if (!llvm::isa<InstInsertElement>(Instr)) {
+ continue;
+ }
+ if (!ShuffleVectorUtils::insertsLastElement(Instr)) {
+ // To avoid wasting time, we only start the pattern match at the last
+ // insertelement instruction -- and go backwards from there.
+ continue;
+ }
+ if (Verbose) {
+ getContext()->getStrDump() << "\tCandidate: ";
+ Instr.dump(this);
+ getContext()->getStrDump() << "\n";
+ }
+ Inserts.resize(typeNumElements(Instr.getDest()->getType()));
+ Inserts[0] = &Instr;
+ if (!ShuffleVectorUtils::findAllInserts(this, getContext(),
+ VMetadata.get(), &Inserts)) {
+ // If we fail to find a sequence of insertelements, we stop the
+ // optimization.
+ if (Verbose) {
+ getContext()->getStrDump() << "\tFalse alarm.\n";
+ }
+ continue;
+ }
+ if (Verbose) {
+ getContext()->getStrDump() << "\tFound the following insertelement: \n";
+ for (auto *I : reverse_range(Inserts)) {
+ getContext()->getStrDump() << "\t\t";
+ I->dump(this);
+ getContext()->getStrDump() << "\n";
+ }
+ }
+ Extracts.resize(Inserts.size());
+ Variable *Src0;
+ Variable *Src1;
+ if (!ShuffleVectorUtils::findAllExtracts(this, getContext(),
+ VMetadata.get(), Inserts, &Src0,
+ &Src1, &Extracts)) {
+ // If we fail to match the definitions of the insertelements' sources
+ // with extractelement instructions -- or if those instructions operate
+ // on more than two different variables -- we stop the optimization.
+ if (Verbose) {
+ getContext()->getStrDump() << "\tFailed to match extractelements.\n";
+ }
+ continue;
+ }
+ if (Verbose) {
+ getContext()->getStrDump()
+ << "\tFound the following insert/extract element pairs: \n";
+ for (SizeT I = 0; I < Inserts.size(); ++I) {
+ const SizeT Pos = Inserts.size() - I - 1;
+ getContext()->getStrDump() << "\t\tInsert : ";
+ Inserts[Pos]->dump(this);
+ getContext()->getStrDump() << "\n\t\tExtract: ";
+ Extracts[Pos]->dump(this);
+ getContext()->getStrDump() << "\n";
+ }
+ }
+
+ assert(Src0 != nullptr);
+ assert(Src1 != nullptr);
+
+ auto *ShuffleVector =
+ InstShuffleVector::create(this, Instr.getDest(), Src0, Src1);
+ assert(ShuffleVector->getSrc(0) == Src0);
+ assert(ShuffleVector->getSrc(1) == Src1);
+ for (SizeT I = 0; I < Extracts.size(); ++I) {
+ const SizeT Pos = Extracts.size() - I - 1;
+ auto *Index = llvm::cast<ConstantInteger32>(Extracts[Pos]->getSrc(1));
+ if (Src0 == Extracts[Pos]->getSrc(0)) {
+ ShuffleVector->addIndex(Index);
+ } else {
+ ShuffleVector->addIndex(llvm::cast<ConstantInteger32>(
+ Ctx->getConstantInt32(Index->getValue() + Extracts.size())));
+ }
+ }
+
+ if (Verbose) {
+ getContext()->getStrDump() << "Created: ";
+ ShuffleVector->dump(this);
+ getContext()->getStrDump() << "\n";
+ }
+
+ Instr.setDeleted();
+ auto &LoweringContext = getTarget()->getContext();
+ LoweringContext.setInsertPoint(Instr);
+ LoweringContext.insert(ShuffleVector);
+ }
+ }
+}
+
void Cfg::doNopInsertion() {
if (!getFlags().getShouldDoNopInsertion())
return;
« no previous file with comments | « src/IceCfg.h ('k') | src/IceClFlags.def » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698