Index: src/IceCfg.cpp |
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp |
index 23c6728b811cce0709569c324fd16f59062e8960..9a47dd668985387ee896ba7e047d3d5f88eb0ef9 100644 |
--- a/src/IceCfg.cpp |
+++ b/src/IceCfg.cpp |
@@ -802,6 +802,312 @@ void Cfg::doAddressOpt() { |
Node->doAddressOpt(); |
} |
+namespace { |
+// ShuffleVectorUtils implements helper functions for rematerializing |
+// shufflevector instructions from a sequence of extractelement/insertelement |
+// instructions. It looks for the following pattern: |
+// |
+// %t0 = extractelement A, %n0 |
+// %t1 = extractelement B, %n1 |
+// %t2 = extractelement C, %n2 |
+// ... |
+// %tN = extractelement N, %nN |
+// %d0 = insertelement undef, %t0, 0 |
+// %d1 = insertelement %d0, %t1, 1 |
+// %d2 = insertelement %d1, %t2, 2 |
+// ... |
+// %dest = insertelement %d_N-1, %tN, N |
+// |
+// where N is num_element(typeof(%dest)), and A, B, C, ... N are at most two |
+// distinct variables. |
+namespace ShuffleVectorUtils { |
+// findAllInserts is used when searching for all the insertelements that are |
+// used in a shufflevector operation. This function works recursively, when |
+// invoked with I = i, the function assumes Insts[i] is the last found |
+// insertelement in the chain. The next insertelement insertruction is saved in |
+// Insts[i+1]. |
+bool findAllInserts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM, |
+ CfgVector<const Inst *> *Insts, SizeT I = 0) { |
+ const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat); |
+ |
+ if (I > Insts->size()) { |
+ if (Verbose) { |
+ Ctx->getStrDump() << "\tToo many inserts.\n"; |
+ } |
+ return false; |
+ } |
+ |
+ const auto *LastInsert = Insts->at(I); |
+ assert(llvm::isa<InstInsertElement>(LastInsert)); |
+ |
+ if (I == Insts->size() - 1) { |
+ // Matching against undef is not really needed because the value in Src(0) |
+ // will be totally overwritten. We still enforce it anyways because the |
+ // PNaCl toolchain generates the bitcode with it. |
+ if (!llvm::isa<ConstantUndef>(LastInsert->getSrc(0))) { |
+ if (Verbose) { |
+ Ctx->getStrDump() << "\tSrc0 is not undef: " << I << " " |
+ << Insts->size(); |
+ LastInsert->dump(Func); |
+ Ctx->getStrDump() << "\n"; |
+ } |
+ return false; |
+ } |
+ |
+ // The following loop ensures that the insertelements are sorted. In theory, |
+ // we could relax this restriction and allow any order. As long as each |
+ // index appears exactly once, this chain is still a candidate for becoming |
+ // a shufflevector. The Insts vector is traversed backwards because the |
+ // instructions are "enqueued" in reverse order. |
+ int32_t ExpectedElement = 0; |
+ for (const auto *I : reverse_range(*Insts)) { |
+ if (llvm::cast<ConstantInteger32>(I->getSrc(2))->getValue() != |
+ ExpectedElement) { |
+ return false; |
+ } |
+ ++ExpectedElement; |
+ } |
+ return true; |
+ } |
+ |
+ const auto *Src0V = llvm::cast<Variable>(LastInsert->getSrc(0)); |
+ const auto *Def = VM->getSingleDefinition(Src0V); |
+ |
+ // Only optimize if the first operand in |
+ // |
+ // Dest = insertelement A, B, 10 |
+ // |
+ // is singly-def'ed. |
+ if (Def == nullptr) { |
+ if (Verbose) { |
+ Ctx->getStrDump() << "\tmulti-def: "; |
+ (*Insts)[I]->dump(Func); |
+ Ctx->getStrDump() << "\n"; |
+ } |
+ return false; |
+ } |
+ |
+ // We also require the (single) definition to come from an insertelement |
+ // instruction. |
+ if (!llvm::isa<InstInsertElement>(Def)) { |
+ if (Verbose) { |
+ Ctx->getStrDump() << "\tnot insert element: "; |
+ Def->dump(Func); |
+ Ctx->getStrDump() << "\n"; |
+ } |
+ return false; |
+ } |
+ |
+ // Everything seems fine, so we save Def in Insts, and delegate the decision |
+ // to findAllInserts. |
+ (*Insts)[I + 1] = Def; |
+ |
+ return findAllInserts(Func, Ctx, VM, Insts, I + 1); |
+} |
+ |
+// insertsLastElement returns true if Insert is inserting an element in the last |
+// position of a vector. |
+bool insertsLastElement(const Inst &Insert) { |
+ const Type DestTy = Insert.getDest()->getType(); |
+ assert(isVectorType(DestTy)); |
+ const SizeT Elem = |
+ llvm::cast<ConstantInteger32>(Insert.getSrc(2))->getValue(); |
+ return Elem == typeNumElements(DestTy) - 1; |
+} |
+ |
+// findAllExtracts goes over all the insertelement instructions that are |
+// candidates to be replaced by a shufflevector, and searches for all the |
+// definitions of the elements being inserted. If all of the elements are the |
+// result of an extractelement instruction, and all of the extractelements |
+// operate on at most two different sources, than the instructions can be |
+// replaced by a shufflevector. |
+bool findAllExtracts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM, |
+ const CfgVector<const Inst *> &Insts, Variable **Src0, |
+ Variable **Src1, CfgVector<const Inst *> *Extracts) { |
+ const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat); |
+ |
+ *Src0 = nullptr; |
+ *Src1 = nullptr; |
+ assert(Insts.size() > 0); |
+ for (SizeT I = 0; I < Insts.size(); ++I) { |
+ const auto *Insert = Insts.at(I); |
+ const auto *Src1V = llvm::dyn_cast<Variable>(Insert->getSrc(1)); |
+ if (Src1V == nullptr) { |
+ if (Verbose) { |
+ Ctx->getStrDump() << "src(1) is not a variable: "; |
+ Insert->dump(Func); |
+ Ctx->getStrDump() << "\n"; |
+ } |
+ return false; |
+ } |
+ |
+ const auto *Def = VM->getSingleDefinition(Src1V); |
+ if (Def == nullptr) { |
+ if (Verbose) { |
+ Ctx->getStrDump() << "multi-def src(1): "; |
+ Insert->dump(Func); |
+ Ctx->getStrDump() << "\n"; |
+ } |
+ return false; |
+ } |
+ |
+ if (!llvm::isa<InstExtractElement>(Def)) { |
+ if (Verbose) { |
+ Ctx->getStrDump() << "not extractelement: "; |
+ Def->dump(Func); |
+ Ctx->getStrDump() << "\n"; |
+ } |
+ return false; |
+ } |
+ |
+ auto *Src = llvm::cast<Variable>(Def->getSrc(0)); |
+ if (*Src0 == nullptr) { |
+ // No sources yet. Save Src to Src0. |
+ *Src0 = Src; |
+ } else if (*Src1 == nullptr) { |
+ // We already have a source, so we might save Src in Src1 -- but only if |
+ // Src0 is not Src. |
+ if (*Src0 != Src) { |
+ *Src1 = Src; |
+ } |
+ } else if (Src != *Src0 && Src != *Src1) { |
+ // More than two sources, so we can't rematerialize the shufflevector |
+ // instruction. |
+ if (Verbose) { |
+ Ctx->getStrDump() << "Can't shuffle more than two sources.\n"; |
+ } |
+ return false; |
+ } |
+ |
+ (*Extracts)[I] = Def; |
+ } |
+ |
+ // We should have seen at least one source operand. |
+ assert(*Src0 != nullptr); |
+ |
+ // If a second source was not seen, then we just make Src1 = Src0 to simplify |
+ // things down stream. This should not matter, as all of the indexes in the |
+ // shufflevector instruction will point to Src0. |
+ if (*Src1 == nullptr) { |
+ *Src1 = *Src0; |
+ } |
+ |
+ return true; |
+} |
+ |
+} // end of namespace ShuffleVectorUtils |
+} // end of anonymous namespace |
+ |
+void Cfg::materializeVectorShuffles() { |
+ const bool Verbose = BuildDefs::dump() && isVerbose(IceV_ShufMat); |
+ |
+ std::unique_ptr<OstreamLocker> L; |
+ if (Verbose) { |
+ L.reset(new OstreamLocker(getContext())); |
+ getContext()->getStrDump() << "\nShuffle materialization:\n"; |
+ } |
+ |
+ // MaxVectorElements is the maximum number of elements in the vector types |
+ // handled by Subzero. We use it to create the Inserts and Extracts vectors |
+ // with the appropriate size, thus avoiding resize() calls. |
+ const SizeT MaxVectorElements = typeNumElements(IceType_v16i8); |
+ CfgVector<const Inst *> Inserts(MaxVectorElements); |
+ CfgVector<const Inst *> Extracts(MaxVectorElements); |
+ |
+ TimerMarker T(TimerStack::TT_materializeVectorShuffles, this); |
+ for (CfgNode *Node : Nodes) { |
+ for (auto &Instr : Node->getInsts()) { |
+ if (!llvm::isa<InstInsertElement>(Instr)) { |
+ continue; |
+ } |
+ if (!ShuffleVectorUtils::insertsLastElement(Instr)) { |
+ // To avoid wasting time, we only start the pattern match at the last |
+ // insertelement instruction -- and go backwards from there. |
+ continue; |
+ } |
+ if (Verbose) { |
+ getContext()->getStrDump() << "\tCandidate: "; |
+ Instr.dump(this); |
+ getContext()->getStrDump() << "\n"; |
+ } |
+ Inserts.resize(typeNumElements(Instr.getDest()->getType())); |
+ Inserts[0] = &Instr; |
+ if (!ShuffleVectorUtils::findAllInserts(this, getContext(), |
+ VMetadata.get(), &Inserts)) { |
+ // If we fail to find a sequence of insertelements, we stop the |
+ // optimization. |
+ if (Verbose) { |
+ getContext()->getStrDump() << "\tFalse alarm.\n"; |
+ } |
+ continue; |
+ } |
+ if (Verbose) { |
+ getContext()->getStrDump() << "\tFound the following insertelement: \n"; |
+ for (auto *I : reverse_range(Inserts)) { |
+ getContext()->getStrDump() << "\t\t"; |
+ I->dump(this); |
+ getContext()->getStrDump() << "\n"; |
+ } |
+ } |
+ Extracts.resize(Inserts.size()); |
+ Variable *Src0; |
+ Variable *Src1; |
+ if (!ShuffleVectorUtils::findAllExtracts(this, getContext(), |
+ VMetadata.get(), Inserts, &Src0, |
+ &Src1, &Extracts)) { |
+ // If we fail to match the definitions of the insertelements' sources |
+ // with extractelement instructions -- or if those instructions operate |
+ // on more than two different variables -- we stop the optimization. |
+ if (Verbose) { |
+ getContext()->getStrDump() << "\tFailed to match extractelements.\n"; |
+ } |
+ continue; |
+ } |
+ if (Verbose) { |
+ getContext()->getStrDump() |
+ << "\tFound the following insert/extract element pairs: \n"; |
+ for (SizeT I = 0; I < Inserts.size(); ++I) { |
+ const SizeT Pos = Inserts.size() - I - 1; |
+ getContext()->getStrDump() << "\t\tInsert : "; |
+ Inserts[Pos]->dump(this); |
+ getContext()->getStrDump() << "\n\t\tExtract: "; |
+ Extracts[Pos]->dump(this); |
+ getContext()->getStrDump() << "\n"; |
+ } |
+ } |
+ |
+ assert(Src0 != nullptr); |
+ assert(Src1 != nullptr); |
+ |
+ auto *ShuffleVector = |
+ InstShuffleVector::create(this, Instr.getDest(), Src0, Src1); |
+ assert(ShuffleVector->getSrc(0) == Src0); |
+ assert(ShuffleVector->getSrc(1) == Src1); |
+ for (SizeT I = 0; I < Extracts.size(); ++I) { |
+ const SizeT Pos = Extracts.size() - I - 1; |
+ auto *Index = llvm::cast<ConstantInteger32>(Extracts[Pos]->getSrc(1)); |
+ if (Src0 == Extracts[Pos]->getSrc(0)) { |
+ ShuffleVector->addIndex(Index); |
+ } else { |
+ ShuffleVector->addIndex(llvm::cast<ConstantInteger32>( |
+ Ctx->getConstantInt32(Index->getValue() + Extracts.size()))); |
+ } |
+ } |
+ |
+ if (Verbose) { |
+ getContext()->getStrDump() << "Created: "; |
+ ShuffleVector->dump(this); |
+ getContext()->getStrDump() << "\n"; |
+ } |
+ |
+ Instr.setDeleted(); |
+ auto &LoweringContext = getTarget()->getContext(); |
+ LoweringContext.setInsertPoint(Instr); |
+ LoweringContext.insert(ShuffleVector); |
+ } |
+ } |
+} |
+ |
void Cfg::doNopInsertion() { |
if (!getFlags().getShouldDoNopInsertion()) |
return; |