Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index 5f83baef11a503f2fd756d404148c5c86ec62bbc..f9488eccb50d35e292be183c3a319dddbec69292 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -260,6 +260,152 @@ ICETYPE_TABLE |
} // end of anonymous namespace |
+BoolFoldingEntry::BoolFoldingEntry(Inst *I) |
+ : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)), IsLiveOut(true), |
+ NumUses(0) {} |
+ |
+BoolFolding::BoolFoldingProducerKind |
+BoolFolding::getProducerKind(const Inst *Instr) { |
+ if (llvm::isa<InstIcmp>(Instr)) { |
+ if (Instr->getSrc(0)->getType() != IceType_i64) |
+ return PK_Icmp32; |
+ return PK_None; // TODO(stichnot): actually PK_Icmp64; |
+ } |
+ return PK_None; // TODO(stichnot): remove this |
+ |
+ if (llvm::isa<InstFcmp>(Instr)) |
+ return PK_Fcmp; |
+ if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
+ switch (Cast->getCastKind()) { |
+ default: |
+ return PK_None; |
+ case InstCast::Trunc: |
+ return PK_Trunc; |
+ } |
+ } |
+ return PK_None; |
+} |
+ |
+BoolFolding::BoolFoldingConsumerKind |
+BoolFolding::getConsumerKind(const Inst *Instr) { |
+ if (llvm::isa<InstBr>(Instr)) |
+ return CK_Br; |
+ if (llvm::isa<InstSelect>(Instr)) |
+ return CK_Select; |
+ return CK_None; // TODO(stichnot): remove this |
+ |
+ if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
+ switch (Cast->getCastKind()) { |
+ default: |
+ return CK_None; |
+ case InstCast::Sext: |
+ return CK_Sext; |
+ case InstCast::Zext: |
+ return CK_Zext; |
+ } |
+ } |
+ return CK_None; |
+} |
+ |
+// Returns true if the producing instruction has a "complex" lowering |
+// sequence. This generally means that its lowering sequence requires |
+// more than one conditional branch, namely 64-bit integer compares |
+// and some floating-point compares. When this is true, and there is |
+// more than one consumer, we prefer to disable the folding |
+// optimization because it minimizes branches. |
+bool BoolFolding::hasComplexLowering(const Inst *Instr) { |
+ switch (getProducerKind(Instr)) { |
+ default: |
+ return false; |
+ case PK_Icmp64: |
+ return true; |
+ case PK_Fcmp: |
+ return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 != |
+ CondX86::Br_None; |
+ } |
+} |
+ |
+void BoolFolding::init(CfgNode *Node) { |
+ Producers.clear(); |
+ for (Inst &Instr : Node->getInsts()) { |
+ // Check whether Instr is a valid producer. |
+ Variable *Var = Instr.getDest(); |
+ if (!Instr.isDeleted() // only consider non-deleted instructions |
+ && Var // only instructions with an actual dest var |
+ && Var->getType() == IceType_i1 // only bool-type dest vars |
+ && getProducerKind(&Instr) != PK_None) { // white-listed instructions |
+ Producers[Var->getIndex()] = BoolFoldingEntry(&Instr); |
+ } |
+ // Check each src variable against the map. |
+ for (SizeT I = 0; I < Instr.getSrcSize(); ++I) { |
+ Operand *Src = Instr.getSrc(I); |
+ SizeT NumVars = Src->getNumVars(); |
+ for (SizeT J = 0; J < NumVars; ++J) { |
+ const Variable *Var = Src->getVar(J); |
+ SizeT VarNum = Var->getIndex(); |
+ if (containsValid(VarNum)) { |
+ if (I != 0 // All valid consumers use Var as the first source operand |
+ || getConsumerKind(&Instr) == CK_None // must be white-listed |
+ || (Producers[VarNum].IsComplex && // complex can't be multi-use |
+ Producers[VarNum].NumUses > 0)) { |
+ setInvalid(VarNum); |
+ continue; |
+ } |
+ ++Producers[VarNum].NumUses; |
+ if (Instr.isLastUse(Var)) { |
+ Producers[VarNum].IsLiveOut = false; |
+ } |
+ } |
+ } |
+ } |
+ } |
+ for (auto &I : Producers) { |
+ // Ignore entries previously marked invalid. |
+ if (I.second.Instr == nullptr) |
+ continue; |
+ // Disable the producer if its dest may be live beyond this block. |
+ if (I.second.IsLiveOut) { |
+ setInvalid(I.first); |
+ continue; |
+ } |
+ // Mark as "dead" rather than outright deleting. This is so that |
+ // other peephole style optimizations during or before lowering |
+ // have access to this instruction in undeleted form. See for |
+ // example tryOptimizedCmpxchgCmpBr(). |
+ I.second.Instr->setDead(); |
+ } |
+} |
+ |
+const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const { |
+ auto *Var = llvm::dyn_cast<const Variable>(Opnd); |
+ if (Var == nullptr) |
+ return nullptr; |
+ SizeT VarNum = Var->getIndex(); |
+ auto Element = Producers.find(VarNum); |
+ if (Element == Producers.end()) |
+ return nullptr; |
+ return Element->second.Instr; |
+} |
+ |
+void BoolFolding::dump(const Cfg *Func) const { |
+ if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding)) |
+ return; |
+ OstreamLocker L(Func->getContext()); |
+ Ostream &Str = Func->getContext()->getStrDump(); |
+ for (auto &I : Producers) { |
+ if (I.second.Instr == nullptr) |
+ continue; |
+ Str << "Found foldable producer:\n "; |
+ I.second.Instr->dump(Func); |
+ Str << "\n"; |
+ } |
+} |
+ |
+void TargetX8632::initNodeForLowering(CfgNode *Node) { |
+ FoldingInfo.init(Node); |
+ FoldingInfo.dump(Func); |
+} |
+ |
TargetX8632::TargetX8632(Cfg *Func) |
: TargetLowering(Func), |
InstructionSet(static_cast<X86InstructionSet>( |
@@ -1719,12 +1865,35 @@ void TargetX8632::lowerAssign(const InstAssign *Inst) { |
void TargetX8632::lowerBr(const InstBr *Inst) { |
if (Inst->isUnconditional()) { |
_br(Inst->getTargetUnconditional()); |
- } else { |
- Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem); |
- Constant *Zero = Ctx->getConstantZero(IceType_i32); |
- _cmp(Src0, Zero); |
- _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
+ return; |
} |
+ Operand *Cond = Inst->getCondition(); |
+ |
+ // Handle folding opportunities. |
+ if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { |
+ assert(Producer->isDeleted()); |
+ switch (BoolFolding::getProducerKind(Producer)) { |
+ default: |
+ break; |
+ case BoolFolding::PK_Icmp32: { |
+ // TODO(stichnot): Refactor similarities between this block and |
+ // the corresponding code in lowerIcmp(). |
+ auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); |
+ Operand *Src0 = Producer->getSrc(0); |
+ Operand *Src1 = legalize(Producer->getSrc(1)); |
+ Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); |
+ _cmp(Src0RM, Src1); |
+ _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(), |
+ Inst->getTargetFalse()); |
+ return; |
+ } |
+ } |
+ } |
+ |
+ Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
+ Constant *Zero = Ctx->getConstantZero(IceType_i32); |
+ _cmp(Src0, Zero); |
+ _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
} |
void TargetX8632::lowerCall(const InstCall *Instr) { |
@@ -2678,37 +2847,6 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
return; |
} |
- // If Src1 is an immediate, or known to be a physical register, we can |
- // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
- // a physical register. (Actually, either Src0 or Src1 can be chosen for |
- // the physical register, but unfortunately we have to commit to one or |
- // the other before register allocation.) |
- bool IsSrc1ImmOrReg = false; |
- if (llvm::isa<Constant>(Src1)) { |
- IsSrc1ImmOrReg = true; |
- } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
- if (Var->hasReg()) |
- IsSrc1ImmOrReg = true; |
- } |
- |
- // Try to fuse a compare immediately followed by a conditional branch. This |
- // is possible when the compare dest and the branch source operands are the |
- // same, and are their only uses. TODO: implement this optimization for i64. |
- if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { |
- if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && |
- Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { |
- NextBr->setDeleted(); |
- Operand *Src0RM = |
- legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); |
- _cmp(Src0RM, Src1); |
- _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), |
- NextBr->getTargetFalse()); |
- // Skip over the following branch instruction. |
- Context.advanceNext(); |
- return; |
- } |
- } |
- |
// a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
if (Src0->getType() == IceType_i64) { |
InstIcmp::ICond Condition = Inst->getCondition(); |
@@ -2737,8 +2875,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
} |
// cmp b, c |
- Operand *Src0RM = |
- legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); |
+ Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); |
_cmp(Src0RM, Src1); |
_setcc(Dest, getIcmp32Mapping(Inst->getCondition())); |
} |
@@ -4001,7 +4138,51 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { |
return; |
} |
- // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1: |
+ // Handle folding opportunities. |
+ if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { |
+ assert(Producer->isDeleted()); |
+ switch (BoolFolding::getProducerKind(Producer)) { |
+ default: |
+ break; |
+ case BoolFolding::PK_Icmp32: { |
+ // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: |
+ auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); |
+ InstX8632Label *Label = InstX8632Label::create(Func, this); |
+ Operand *Src0 = Producer->getSrc(0); |
+ Operand *Src1 = legalize(Producer->getSrc(1)); |
+ Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); |
+ _cmp(Src0RM, Src1); |
+ // This is the same code as below (for both i64 and non-i64), |
+ // except without the _cmp instruction and with a different |
+ // branch condition. TODO(stichnot): refactor. |
+ if (Dest->getType() == IceType_i64) { |
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
+ Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm); |
+ Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm); |
+ _mov(DestLo, SrcLoRI); |
+ _mov(DestHi, SrcHiRI); |
+ _br(getIcmp32Mapping(Cmp->getCondition()), Label); |
+ Operand *SrcFLo = loOperand(SrcF); |
+ Operand *SrcFHi = hiOperand(SrcF); |
+ SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm); |
+ SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm); |
+ _mov_nonkillable(DestLo, SrcLoRI); |
+ _mov_nonkillable(DestHi, SrcHiRI); |
+ } else { |
+ SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); |
+ _mov(Dest, SrcT); |
+ _br(getIcmp32Mapping(Cmp->getCondition()), Label); |
+ SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); |
+ _mov_nonkillable(Dest, SrcF); |
+ } |
+ Context.insert(Label); |
+ return; |
+ } |
+ } |
+ } |
+ |
+ // a=d?b:c ==> cmp d,0; a=b; jne L1; a=c; L1: |
Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
Constant *Zero = Ctx->getConstantZero(IceType_i32); |
InstX8632Label *Label = InstX8632Label::create(Func, this); |
@@ -4534,6 +4715,23 @@ Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) { |
return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); |
} |
+// For the cmp instruction, if Src1 is an immediate, or known to be a |
+// physical register, we can allow Src0 to be a memory operand. |
+// Otherwise, Src0 must be copied into a physical register. |
+// (Actually, either Src0 or Src1 can be chosen for the physical |
+// register, but unfortunately we have to commit to one or the other |
+// before register allocation.) |
+Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) { |
+ bool IsSrc1ImmOrReg = false; |
+ if (llvm::isa<Constant>(Src1)) { |
+ IsSrc1ImmOrReg = true; |
+ } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
+ if (Var->hasReg()) |
+ IsSrc1ImmOrReg = true; |
+ } |
+ return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); |
+} |
+ |
OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) { |
OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand); |
// It may be the case that address mode optimization already creates |