| Index: src/IceTargetLoweringX8632.cpp
|
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
|
| index 5f83baef11a503f2fd756d404148c5c86ec62bbc..f9488eccb50d35e292be183c3a319dddbec69292 100644
|
| --- a/src/IceTargetLoweringX8632.cpp
|
| +++ b/src/IceTargetLoweringX8632.cpp
|
| @@ -260,6 +260,152 @@ ICETYPE_TABLE
|
|
|
| } // end of anonymous namespace
|
|
|
| +BoolFoldingEntry::BoolFoldingEntry(Inst *I)
|
| + : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)), IsLiveOut(true),
|
| + NumUses(0) {}
|
| +
|
| +BoolFolding::BoolFoldingProducerKind
|
| +BoolFolding::getProducerKind(const Inst *Instr) {
|
| + if (llvm::isa<InstIcmp>(Instr)) {
|
| + if (Instr->getSrc(0)->getType() != IceType_i64)
|
| + return PK_Icmp32;
|
| + return PK_None; // TODO(stichnot): actually PK_Icmp64;
|
| + }
|
| + return PK_None; // TODO(stichnot): remove this
|
| +
|
| + if (llvm::isa<InstFcmp>(Instr))
|
| + return PK_Fcmp;
|
| + if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
|
| + switch (Cast->getCastKind()) {
|
| + default:
|
| + return PK_None;
|
| + case InstCast::Trunc:
|
| + return PK_Trunc;
|
| + }
|
| + }
|
| + return PK_None;
|
| +}
|
| +
|
| +BoolFolding::BoolFoldingConsumerKind
|
| +BoolFolding::getConsumerKind(const Inst *Instr) {
|
| + if (llvm::isa<InstBr>(Instr))
|
| + return CK_Br;
|
| + if (llvm::isa<InstSelect>(Instr))
|
| + return CK_Select;
|
| + return CK_None; // TODO(stichnot): remove this
|
| +
|
| + if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
|
| + switch (Cast->getCastKind()) {
|
| + default:
|
| + return CK_None;
|
| + case InstCast::Sext:
|
| + return CK_Sext;
|
| + case InstCast::Zext:
|
| + return CK_Zext;
|
| + }
|
| + }
|
| + return CK_None;
|
| +}
|
| +
|
| +// Returns true if the producing instruction has a "complex" lowering
|
| +// sequence. This generally means that its lowering sequence requires
|
| +// more than one conditional branch, namely 64-bit integer compares
|
| +// and some floating-point compares. When this is true, and there is
|
| +// more than one consumer, we prefer to disable the folding
|
| +// optimization because it minimizes branches.
|
| +bool BoolFolding::hasComplexLowering(const Inst *Instr) {
|
| + switch (getProducerKind(Instr)) {
|
| + default:
|
| + return false;
|
| + case PK_Icmp64:
|
| + return true;
|
| + case PK_Fcmp:
|
| + return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=
|
| + CondX86::Br_None;
|
| + }
|
| +}
|
| +
|
| +void BoolFolding::init(CfgNode *Node) {
|
| + Producers.clear();
|
| + for (Inst &Instr : Node->getInsts()) {
|
| + // Check whether Instr is a valid producer.
|
| + Variable *Var = Instr.getDest();
|
| + if (!Instr.isDeleted() // only consider non-deleted instructions
|
| + && Var // only instructions with an actual dest var
|
| + && Var->getType() == IceType_i1 // only bool-type dest vars
|
| + && getProducerKind(&Instr) != PK_None) { // white-listed instructions
|
| + Producers[Var->getIndex()] = BoolFoldingEntry(&Instr);
|
| + }
|
| + // Check each src variable against the map.
|
| + for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {
|
| + Operand *Src = Instr.getSrc(I);
|
| + SizeT NumVars = Src->getNumVars();
|
| + for (SizeT J = 0; J < NumVars; ++J) {
|
| + const Variable *Var = Src->getVar(J);
|
| + SizeT VarNum = Var->getIndex();
|
| + if (containsValid(VarNum)) {
|
| + if (I != 0 // All valid consumers use Var as the first source operand
|
| + || getConsumerKind(&Instr) == CK_None // must be white-listed
|
| + || (Producers[VarNum].IsComplex && // complex can't be multi-use
|
| + Producers[VarNum].NumUses > 0)) {
|
| + setInvalid(VarNum);
|
| + continue;
|
| + }
|
| + ++Producers[VarNum].NumUses;
|
| + if (Instr.isLastUse(Var)) {
|
| + Producers[VarNum].IsLiveOut = false;
|
| + }
|
| + }
|
| + }
|
| + }
|
| + }
|
| + for (auto &I : Producers) {
|
| + // Ignore entries previously marked invalid.
|
| + if (I.second.Instr == nullptr)
|
| + continue;
|
| + // Disable the producer if its dest may be live beyond this block.
|
| + if (I.second.IsLiveOut) {
|
| + setInvalid(I.first);
|
| + continue;
|
| + }
|
| + // Mark as "dead" rather than outright deleting. This is so that
|
| + // other peephole style optimizations during or before lowering
|
| + // have access to this instruction in undeleted form. See for
|
| + // example tryOptimizedCmpxchgCmpBr().
|
| + I.second.Instr->setDead();
|
| + }
|
| +}
|
| +
|
| +const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const {
|
| + auto *Var = llvm::dyn_cast<const Variable>(Opnd);
|
| + if (Var == nullptr)
|
| + return nullptr;
|
| + SizeT VarNum = Var->getIndex();
|
| + auto Element = Producers.find(VarNum);
|
| + if (Element == Producers.end())
|
| + return nullptr;
|
| + return Element->second.Instr;
|
| +}
|
| +
|
| +void BoolFolding::dump(const Cfg *Func) const {
|
| + if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding))
|
| + return;
|
| + OstreamLocker L(Func->getContext());
|
| + Ostream &Str = Func->getContext()->getStrDump();
|
| + for (auto &I : Producers) {
|
| + if (I.second.Instr == nullptr)
|
| + continue;
|
| + Str << "Found foldable producer:\n ";
|
| + I.second.Instr->dump(Func);
|
| + Str << "\n";
|
| + }
|
| +}
|
| +
|
| +void TargetX8632::initNodeForLowering(CfgNode *Node) {
|
| + FoldingInfo.init(Node);
|
| + FoldingInfo.dump(Func);
|
| +}
|
| +
|
| TargetX8632::TargetX8632(Cfg *Func)
|
| : TargetLowering(Func),
|
| InstructionSet(static_cast<X86InstructionSet>(
|
| @@ -1719,12 +1865,35 @@ void TargetX8632::lowerAssign(const InstAssign *Inst) {
|
| void TargetX8632::lowerBr(const InstBr *Inst) {
|
| if (Inst->isUnconditional()) {
|
| _br(Inst->getTargetUnconditional());
|
| - } else {
|
| - Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem);
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - _cmp(Src0, Zero);
|
| - _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
|
| + return;
|
| }
|
| + Operand *Cond = Inst->getCondition();
|
| +
|
| + // Handle folding opportunities.
|
| + if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
|
| + assert(Producer->isDeleted());
|
| + switch (BoolFolding::getProducerKind(Producer)) {
|
| + default:
|
| + break;
|
| + case BoolFolding::PK_Icmp32: {
|
| + // TODO(stichnot): Refactor similarities between this block and
|
| + // the corresponding code in lowerIcmp().
|
| + auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
|
| + Operand *Src0 = Producer->getSrc(0);
|
| + Operand *Src1 = legalize(Producer->getSrc(1));
|
| + Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
|
| + _cmp(Src0RM, Src1);
|
| + _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
|
| + Inst->getTargetFalse());
|
| + return;
|
| + }
|
| + }
|
| + }
|
| +
|
| + Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
|
| + Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| + _cmp(Src0, Zero);
|
| + _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
|
| }
|
|
|
| void TargetX8632::lowerCall(const InstCall *Instr) {
|
| @@ -2678,37 +2847,6 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
|
| return;
|
| }
|
|
|
| - // If Src1 is an immediate, or known to be a physical register, we can
|
| - // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
|
| - // a physical register. (Actually, either Src0 or Src1 can be chosen for
|
| - // the physical register, but unfortunately we have to commit to one or
|
| - // the other before register allocation.)
|
| - bool IsSrc1ImmOrReg = false;
|
| - if (llvm::isa<Constant>(Src1)) {
|
| - IsSrc1ImmOrReg = true;
|
| - } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
|
| - if (Var->hasReg())
|
| - IsSrc1ImmOrReg = true;
|
| - }
|
| -
|
| - // Try to fuse a compare immediately followed by a conditional branch. This
|
| - // is possible when the compare dest and the branch source operands are the
|
| - // same, and are their only uses. TODO: implement this optimization for i64.
|
| - if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
|
| - if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
|
| - Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
|
| - NextBr->setDeleted();
|
| - Operand *Src0RM =
|
| - legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
|
| - _cmp(Src0RM, Src1);
|
| - _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
|
| - NextBr->getTargetFalse());
|
| - // Skip over the following branch instruction.
|
| - Context.advanceNext();
|
| - return;
|
| - }
|
| - }
|
| -
|
| // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
|
| if (Src0->getType() == IceType_i64) {
|
| InstIcmp::ICond Condition = Inst->getCondition();
|
| @@ -2737,8 +2875,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
|
| }
|
|
|
| // cmp b, c
|
| - Operand *Src0RM =
|
| - legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
|
| + Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
|
| _cmp(Src0RM, Src1);
|
| _setcc(Dest, getIcmp32Mapping(Inst->getCondition()));
|
| }
|
| @@ -4001,7 +4138,51 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
|
| return;
|
| }
|
|
|
| - // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
|
| + // Handle folding opportunities.
|
| + if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
|
| + assert(Producer->isDeleted());
|
| + switch (BoolFolding::getProducerKind(Producer)) {
|
| + default:
|
| + break;
|
| + case BoolFolding::PK_Icmp32: {
|
| + // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
|
| + auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
|
| + InstX8632Label *Label = InstX8632Label::create(Func, this);
|
| + Operand *Src0 = Producer->getSrc(0);
|
| + Operand *Src1 = legalize(Producer->getSrc(1));
|
| + Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
|
| + _cmp(Src0RM, Src1);
|
| + // This is the same code as below (for both i64 and non-i64),
|
| + // except without the _cmp instruction and with a different
|
| + // branch condition. TODO(stichnot): refactor.
|
| + if (Dest->getType() == IceType_i64) {
|
| + Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| + Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| + Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
|
| + Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
|
| + _mov(DestLo, SrcLoRI);
|
| + _mov(DestHi, SrcHiRI);
|
| + _br(getIcmp32Mapping(Cmp->getCondition()), Label);
|
| + Operand *SrcFLo = loOperand(SrcF);
|
| + Operand *SrcFHi = hiOperand(SrcF);
|
| + SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
|
| + SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
|
| + _mov_nonkillable(DestLo, SrcLoRI);
|
| + _mov_nonkillable(DestHi, SrcHiRI);
|
| + } else {
|
| + SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
|
| + _mov(Dest, SrcT);
|
| + _br(getIcmp32Mapping(Cmp->getCondition()), Label);
|
| + SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
|
| + _mov_nonkillable(Dest, SrcF);
|
| + }
|
| + Context.insert(Label);
|
| + return;
|
| + }
|
| + }
|
| + }
|
| +
|
| + // a=d?b:c ==> cmp d,0; a=b; jne L1; a=c; L1:
|
| Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
|
| Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| InstX8632Label *Label = InstX8632Label::create(Func, this);
|
| @@ -4534,6 +4715,23 @@ Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) {
|
| return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
|
| }
|
|
|
| +// For the cmp instruction, if Src1 is an immediate, or known to be a
|
| +// physical register, we can allow Src0 to be a memory operand.
|
| +// Otherwise, Src0 must be copied into a physical register.
|
| +// (Actually, either Src0 or Src1 can be chosen for the physical
|
| +// register, but unfortunately we have to commit to one or the other
|
| +// before register allocation.)
|
| +Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) {
|
| + bool IsSrc1ImmOrReg = false;
|
| + if (llvm::isa<Constant>(Src1)) {
|
| + IsSrc1ImmOrReg = true;
|
| + } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
|
| + if (Var->hasReg())
|
| + IsSrc1ImmOrReg = true;
|
| + }
|
| + return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
|
| +}
|
| +
|
| OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
|
| OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
|
| // It may be the case that address mode optimization already creates
|
|
|