Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2276)

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 1141213004: Subzero: Fold icmp into br/select lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Remove unnecessary break statement Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/IceTargetLoweringX8632.cpp
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 5f83baef11a503f2fd756d404148c5c86ec62bbc..f9488eccb50d35e292be183c3a319dddbec69292 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -260,6 +260,152 @@ ICETYPE_TABLE
} // end of anonymous namespace
+BoolFoldingEntry::BoolFoldingEntry(Inst *I)
+ : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)), IsLiveOut(true),
+ NumUses(0) {}
+
+BoolFolding::BoolFoldingProducerKind
+BoolFolding::getProducerKind(const Inst *Instr) {
+ if (llvm::isa<InstIcmp>(Instr)) {
+ if (Instr->getSrc(0)->getType() != IceType_i64)
+ return PK_Icmp32;
+ return PK_None; // TODO(stichnot): actually PK_Icmp64;
+ }
+ return PK_None; // TODO(stichnot): remove this
+
+ if (llvm::isa<InstFcmp>(Instr))
+ return PK_Fcmp;
+ if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
+ switch (Cast->getCastKind()) {
+ default:
+ return PK_None;
+ case InstCast::Trunc:
+ return PK_Trunc;
+ }
+ }
+ return PK_None;
+}
+
+BoolFolding::BoolFoldingConsumerKind
+BoolFolding::getConsumerKind(const Inst *Instr) {
+ if (llvm::isa<InstBr>(Instr))
+ return CK_Br;
+ if (llvm::isa<InstSelect>(Instr))
+ return CK_Select;
+ return CK_None; // TODO(stichnot): remove this
+
+ if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
+ switch (Cast->getCastKind()) {
+ default:
+ return CK_None;
+ case InstCast::Sext:
+ return CK_Sext;
+ case InstCast::Zext:
+ return CK_Zext;
+ }
+ }
+ return CK_None;
+}
+
+// Returns true if the producing instruction has a "complex" lowering
+// sequence. This generally means that its lowering sequence requires
+// more than one conditional branch, namely 64-bit integer compares
+// and some floating-point compares. When this is true, and there is
+// more than one consumer, we prefer to disable the folding
+// optimization because it minimizes branches.
+bool BoolFolding::hasComplexLowering(const Inst *Instr) {
+ switch (getProducerKind(Instr)) {
+ default:
+ return false;
+ case PK_Icmp64:
+ return true;
+ case PK_Fcmp:
+ return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=
+ CondX86::Br_None;
+ }
+}
+
+void BoolFolding::init(CfgNode *Node) {
+ Producers.clear();
+ for (Inst &Instr : Node->getInsts()) {
+ // Check whether Instr is a valid producer.
+ Variable *Var = Instr.getDest();
+ if (!Instr.isDeleted() // only consider non-deleted instructions
+ && Var // only instructions with an actual dest var
+ && Var->getType() == IceType_i1 // only bool-type dest vars
+ && getProducerKind(&Instr) != PK_None) { // white-listed instructions
+ Producers[Var->getIndex()] = BoolFoldingEntry(&Instr);
+ }
+ // Check each src variable against the map.
+ for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {
+ Operand *Src = Instr.getSrc(I);
+ SizeT NumVars = Src->getNumVars();
+ for (SizeT J = 0; J < NumVars; ++J) {
+ const Variable *Var = Src->getVar(J);
+ SizeT VarNum = Var->getIndex();
+ if (containsValid(VarNum)) {
+ if (I != 0 // All valid consumers use Var as the first source operand
+ || getConsumerKind(&Instr) == CK_None // must be white-listed
+ || (Producers[VarNum].IsComplex && // complex can't be multi-use
+ Producers[VarNum].NumUses > 0)) {
+ setInvalid(VarNum);
+ continue;
+ }
+ ++Producers[VarNum].NumUses;
+ if (Instr.isLastUse(Var)) {
+ Producers[VarNum].IsLiveOut = false;
+ }
+ }
+ }
+ }
+ }
+ for (auto &I : Producers) {
+ // Ignore entries previously marked invalid.
+ if (I.second.Instr == nullptr)
+ continue;
+ // Disable the producer if its dest may be live beyond this block.
+ if (I.second.IsLiveOut) {
+ setInvalid(I.first);
+ continue;
+ }
+ // Mark as "dead" rather than outright deleting. This is so that
+ // other peephole style optimizations during or before lowering
+ // have access to this instruction in undeleted form. See for
+ // example tryOptimizedCmpxchgCmpBr().
+ I.second.Instr->setDead();
+ }
+}
+
+const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const {
+ auto *Var = llvm::dyn_cast<const Variable>(Opnd);
+ if (Var == nullptr)
+ return nullptr;
+ SizeT VarNum = Var->getIndex();
+ auto Element = Producers.find(VarNum);
+ if (Element == Producers.end())
+ return nullptr;
+ return Element->second.Instr;
+}
+
+void BoolFolding::dump(const Cfg *Func) const {
+ if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding))
+ return;
+ OstreamLocker L(Func->getContext());
+ Ostream &Str = Func->getContext()->getStrDump();
+ for (auto &I : Producers) {
+ if (I.second.Instr == nullptr)
+ continue;
+ Str << "Found foldable producer:\n ";
+ I.second.Instr->dump(Func);
+ Str << "\n";
+ }
+}
+
+void TargetX8632::initNodeForLowering(CfgNode *Node) {
+ FoldingInfo.init(Node);
+ FoldingInfo.dump(Func);
+}
+
TargetX8632::TargetX8632(Cfg *Func)
: TargetLowering(Func),
InstructionSet(static_cast<X86InstructionSet>(
@@ -1719,12 +1865,35 @@ void TargetX8632::lowerAssign(const InstAssign *Inst) {
void TargetX8632::lowerBr(const InstBr *Inst) {
if (Inst->isUnconditional()) {
_br(Inst->getTargetUnconditional());
- } else {
- Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem);
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- _cmp(Src0, Zero);
- _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
+ return;
}
+ Operand *Cond = Inst->getCondition();
+
+ // Handle folding opportunities.
+ if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
+ assert(Producer->isDeleted());
+ switch (BoolFolding::getProducerKind(Producer)) {
+ default:
+ break;
+ case BoolFolding::PK_Icmp32: {
+ // TODO(stichnot): Refactor similarities between this block and
+ // the corresponding code in lowerIcmp().
+ auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
+ Operand *Src0 = Producer->getSrc(0);
+ Operand *Src1 = legalize(Producer->getSrc(1));
+ Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
+ _cmp(Src0RM, Src1);
+ _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
+ Inst->getTargetFalse());
+ return;
+ }
+ }
+ }
+
+ Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
+ Constant *Zero = Ctx->getConstantZero(IceType_i32);
+ _cmp(Src0, Zero);
+ _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
}
void TargetX8632::lowerCall(const InstCall *Instr) {
@@ -2678,37 +2847,6 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
return;
}
- // If Src1 is an immediate, or known to be a physical register, we can
- // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
- // a physical register. (Actually, either Src0 or Src1 can be chosen for
- // the physical register, but unfortunately we have to commit to one or
- // the other before register allocation.)
- bool IsSrc1ImmOrReg = false;
- if (llvm::isa<Constant>(Src1)) {
- IsSrc1ImmOrReg = true;
- } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
- if (Var->hasReg())
- IsSrc1ImmOrReg = true;
- }
-
- // Try to fuse a compare immediately followed by a conditional branch. This
- // is possible when the compare dest and the branch source operands are the
- // same, and are their only uses. TODO: implement this optimization for i64.
- if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
- if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
- Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
- NextBr->setDeleted();
- Operand *Src0RM =
- legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
- _cmp(Src0RM, Src1);
- _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
- NextBr->getTargetFalse());
- // Skip over the following branch instruction.
- Context.advanceNext();
- return;
- }
- }
-
// a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
if (Src0->getType() == IceType_i64) {
InstIcmp::ICond Condition = Inst->getCondition();
@@ -2737,8 +2875,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
}
// cmp b, c
- Operand *Src0RM =
- legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
+ Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
_cmp(Src0RM, Src1);
_setcc(Dest, getIcmp32Mapping(Inst->getCondition()));
}
@@ -4001,7 +4138,51 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
return;
}
- // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
+ // Handle folding opportunities.
+ if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
+ assert(Producer->isDeleted());
+ switch (BoolFolding::getProducerKind(Producer)) {
+ default:
+ break;
+ case BoolFolding::PK_Icmp32: {
+ // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
+ auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
+ InstX8632Label *Label = InstX8632Label::create(Func, this);
+ Operand *Src0 = Producer->getSrc(0);
+ Operand *Src1 = legalize(Producer->getSrc(1));
+ Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
+ _cmp(Src0RM, Src1);
+ // This is the same code as below (for both i64 and non-i64),
+ // except without the _cmp instruction and with a different
+ // branch condition. TODO(stichnot): refactor.
+ if (Dest->getType() == IceType_i64) {
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
+ Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
+ _mov(DestLo, SrcLoRI);
+ _mov(DestHi, SrcHiRI);
+ _br(getIcmp32Mapping(Cmp->getCondition()), Label);
+ Operand *SrcFLo = loOperand(SrcF);
+ Operand *SrcFHi = hiOperand(SrcF);
+ SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
+ SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
+ _mov_nonkillable(DestLo, SrcLoRI);
+ _mov_nonkillable(DestHi, SrcHiRI);
+ } else {
+ SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
+ _mov(Dest, SrcT);
+ _br(getIcmp32Mapping(Cmp->getCondition()), Label);
+ SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
+ _mov_nonkillable(Dest, SrcF);
+ }
+ Context.insert(Label);
+ return;
+ }
+ }
+ }
+
+ // a=d?b:c ==> cmp d,0; a=b; jne L1; a=c; L1:
Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
Constant *Zero = Ctx->getConstantZero(IceType_i32);
InstX8632Label *Label = InstX8632Label::create(Func, this);
@@ -4534,6 +4715,23 @@ Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) {
return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
}
+// For the cmp instruction, if Src1 is an immediate, or known to be a
+// physical register, we can allow Src0 to be a memory operand.
+// Otherwise, Src0 must be copied into a physical register.
+// (Actually, either Src0 or Src1 can be chosen for the physical
+// register, but unfortunately we have to commit to one or the other
+// before register allocation.)
+Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) {
+ bool IsSrc1ImmOrReg = false;
+ if (llvm::isa<Constant>(Src1)) {
+ IsSrc1ImmOrReg = true;
+ } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
+ if (Var->hasReg())
+ IsSrc1ImmOrReg = true;
+ }
+ return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
+}
+
OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
// It may be the case that address mode optimization already creates

Powered by Google App Engine
This is Rietveld 408576698