Chromium Code Reviews| Index: src/IceTargetLoweringX8632.cpp |
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
| index 5f83baef11a503f2fd756d404148c5c86ec62bbc..73957805f29be61fe617f9a3439bd32867db521b 100644 |
| --- a/src/IceTargetLoweringX8632.cpp |
| +++ b/src/IceTargetLoweringX8632.cpp |
| @@ -260,6 +260,150 @@ ICETYPE_TABLE |
| } // end of anonymous namespace |
| +BoolFoldingEntry::BoolFoldingEntry(Inst *I) |
| + : Var(I->getDest()), Instr(I), |
| + IsComplex(BoolFolding::hasComplexLowering(I)), IsLiveOut(true), |
| + NumUses(0) {} |
| + |
| +BoolFoldingProducerKind BoolFolding::getProducerKind(const Inst *Instr) { |
| + if (llvm::isa<InstIcmp>(Instr)) { |
| + if (Instr->getSrc(0)->getType() != IceType_i64) |
| + return PK_Icmp32; |
| + return PK_None; // TODO(stichnot): actually PK_Icmp64; |
| + } |
| + return PK_None; // TODO(stichnot): remove this |
| + |
| + if (llvm::isa<InstFcmp>(Instr)) |
| + return PK_Fcmp; |
| + if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
| + switch (Cast->getCastKind()) { |
| + default: |
| + return PK_None; |
| + case InstCast::Trunc: |
| + return PK_Trunc; |
| + } |
| + } |
| + return PK_None; |
| +} |
| + |
| +BoolFoldingConsumerKind BoolFolding::getConsumerKind(const Inst *Instr) { |
| + if (llvm::isa<InstBr>(Instr)) |
| + return CK_Br; |
| + if (llvm::isa<InstSelect>(Instr)) |
| + return CK_Select; |
| + return CK_None; // TODO(stichnot): remove this |
| + |
| + if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
| + switch (Cast->getCastKind()) { |
| + default: |
| + return CK_None; |
| + case InstCast::Sext: |
| + return CK_Sext; |
| + case InstCast::Zext: |
| + return CK_Zext; |
| + } |
| + } |
| + return CK_None; |
| +} |
| + |
| +// Returns true if the producing instruction has a "complex" lowering |
| +// sequence. This generally that its lowering sequence requires more |
|
jvoung (off chromium)
2015/05/15 21:48:20
"generally that" -> "generally means that", or som
Jim Stichnoth
2015/05/16 17:32:50
Done.
|
| +// than one conditional branch, namely 64-bit integer compares and |
| +// some floating-point compares. When this is true, and there is more |
| +// than one consumer, we prefer to disable the folding optimization |
| +// because it minimizes branches. |
| +bool BoolFolding::hasComplexLowering(const Inst *Instr) { |
| + switch (getProducerKind(Instr)) { |
| + default: |
| + return false; |
| + case PK_Icmp64: |
| + return true; |
| + case PK_Fcmp: |
| + return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 != |
| + CondX86::Br_None; |
| + } |
| +} |
| + |
| +void BoolFolding::init(CfgNode *Node) { |
| + Producers.clear(); |
| + for (Inst &Instr : Node->getInsts()) { |
| + // Check whether Instr is a valid producer. |
| + Variable *Var = Instr.getDest(); |
| + if (!Instr.isDeleted() // only consider non-deleted instructions |
| + && Var // only instructions with an actual dest var |
| + && Var->getType() == IceType_i1 // only bool-type dest vars |
| + && getProducerKind(&Instr) != PK_None) { // white-listed instructions |
| + Producers[Var->getIndex()] = BoolFoldingEntry(&Instr); |
| + } |
| + // Check each src variable against the map. |
| + for (SizeT I = 0; I < Instr.getSrcSize(); ++I) { |
| + Operand *Src = Instr.getSrc(I); |
| + SizeT NumVars = Src->getNumVars(); |
| + for (SizeT J = 0; J < NumVars; ++J) { |
| + const Variable *Var = Src->getVar(J); |
| + SizeT VarNum = Var->getIndex(); |
| + if (containsValid(VarNum)) { |
| + if (I != 0 // All valid consumers use Var as the first source operand |
| + || getConsumerKind(&Instr) == CK_None // must be white-listed |
| + || (Producers[VarNum].IsComplex && // complex can't be multi-use |
| + Producers[VarNum].NumUses > 0)) { |
| + setInvalid(VarNum); |
| + continue; |
| + } |
| + ++Producers[VarNum].NumUses; |
| + if (Instr.isLastUse(Var)) { |
| + Producers[VarNum].IsLiveOut = false; |
| + } |
| + } |
| + } |
| + } |
| + } |
| + for (auto &I : Producers) { |
| + // Ignore entries previously marked invalid. |
| + if (I.second.Instr == nullptr) |
| + continue; |
| + // Disable the producer if its dest may be live beyond this block. |
| + if (I.second.IsLiveOut) { |
| + setInvalid(I.first); |
| + continue; |
| + } |
| + // Mark as "dead" rather than outright deleting. This is so that |
| + // other peephole style optimizations during or before lowering |
| + // have access to this instruction in undeleted form. See for |
| + // example tryOptimizedCmpxchgCmpBr(). |
| + I.second.Instr->setDead(); |
| + } |
| +} |
| + |
| +const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const { |
| + auto *Var = llvm::dyn_cast<const Variable>(Opnd); |
| + if (Var == nullptr) |
| + return nullptr; |
| + SizeT VarNum = Var->getIndex(); |
| + if (Producers.find(VarNum) == Producers.end()) |
|
jvoung (off chromium)
2015/05/15 21:48:20
similar -- if two lookups is a concern
Jim Stichnoth
2015/05/16 17:32:50
Done.
|
| + return nullptr; |
| + return Producers.at(VarNum).Instr; |
| +} |
| + |
| +void BoolFolding::dump(const Cfg *Func) const { |
| + if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding)) |
| + return; |
| + OstreamLocker L(Func->getContext()); |
| + Ostream &Str = Func->getContext()->getStrDump(); |
| + for (auto &I : Producers) { |
| + if (I.second.Instr == nullptr) |
| + continue; |
| + Str << "Found foldable producer:\n "; |
| + I.second.Instr->dump(Func); |
| + Str << "\n"; |
| + } |
| +} |
| + |
| +void TargetX8632::initNodeForLowering(CfgNode *Node) { |
| + FoldingInfo.init(Node); |
| + FoldingInfo.dump(Func); |
| +} |
| + |
| TargetX8632::TargetX8632(Cfg *Func) |
| : TargetLowering(Func), |
| InstructionSet(static_cast<X86InstructionSet>( |
| @@ -1719,12 +1863,43 @@ void TargetX8632::lowerAssign(const InstAssign *Inst) { |
| void TargetX8632::lowerBr(const InstBr *Inst) { |
| if (Inst->isUnconditional()) { |
| _br(Inst->getTargetUnconditional()); |
| - } else { |
| - Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem); |
| - Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| - _cmp(Src0, Zero); |
| - _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
| + return; |
| + } |
| + Operand *Cond = Inst->getCondition(); |
| + |
| + // Handle folding opportunities. |
| + if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { |
| + assert(Producer->isDeleted()); |
| + switch (BoolFolding::getProducerKind(Producer)) { |
| + default: |
| + break; |
| + case PK_Icmp32: { |
| + // TODO(stichnot): Refactor similarities between this block and |
| + // the corresponding code in lowerIcmp(). |
|
jvoung (off chromium)
2015/05/15 21:48:19
As a suggestion, maybe factor out the IsSrc1ImmOrR
Jim Stichnoth
2015/05/16 17:32:50
This is good. I created a new legalize() routine
|
| + auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); |
| + Operand *Src0 = legalize(Producer->getSrc(0)); |
| + Operand *Src1 = legalize(Producer->getSrc(1)); |
| + bool IsSrc1ImmOrReg = false; |
| + if (llvm::isa<Constant>(Src1)) { |
| + IsSrc1ImmOrReg = true; |
| + } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
| + if (Var->hasReg()) |
| + IsSrc1ImmOrReg = true; |
| + } |
| + Operand *Src0RM = |
| + legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); |
| + _cmp(Src0RM, Src1); |
| + _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(), |
| + Inst->getTargetFalse()); |
| + return; |
| + } break; |
|
jvoung (off chromium)
2015/05/15 21:48:19
nit: no need for break; after return;? (okay if t
Jim Stichnoth
2015/05/16 17:32:50
You're right - the "break" was left over from my s
|
| + } |
| } |
| + |
| + Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
| + Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| + _cmp(Src0, Zero); |
| + _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
| } |
| void TargetX8632::lowerCall(const InstCall *Instr) { |
| @@ -2691,24 +2866,6 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
| IsSrc1ImmOrReg = true; |
| } |
| - // Try to fuse a compare immediately followed by a conditional branch. This |
| - // is possible when the compare dest and the branch source operands are the |
| - // same, and are their only uses. TODO: implement this optimization for i64. |
| - if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { |
| - if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && |
| - Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { |
| - NextBr->setDeleted(); |
| - Operand *Src0RM = |
| - legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); |
| - _cmp(Src0RM, Src1); |
| - _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), |
| - NextBr->getTargetFalse()); |
| - // Skip over the following branch instruction. |
| - Context.advanceNext(); |
| - return; |
| - } |
| - } |
| - |
| // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
| if (Src0->getType() == IceType_i64) { |
| InstIcmp::ICond Condition = Inst->getCondition(); |
| @@ -4001,7 +4158,59 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { |
| return; |
| } |
| - // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1: |
| + // Handle folding opportunities. |
| + if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { |
| + assert(Producer->isDeleted()); |
| + switch (BoolFolding::getProducerKind(Producer)) { |
| + default: |
| + break; |
| + case PK_Icmp32: { |
| + // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: |
| + auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); |
| + InstX8632Label *Label = InstX8632Label::create(Func, this); |
| + Operand *Src0 = legalize(Producer->getSrc(0)); |
| + Operand *Src1 = legalize(Producer->getSrc(1)); |
| + bool IsSrc1ImmOrReg = false; |
| + if (llvm::isa<Constant>(Src1)) { |
| + IsSrc1ImmOrReg = true; |
| + } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
| + if (Var->hasReg()) |
| + IsSrc1ImmOrReg = true; |
| + } |
| + Operand *Src0RM = |
| + legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); |
| + _cmp(Src0RM, Src1); |
| + // This is the same code as below (for both i64 and non-i64), |
| + // except without the _cmp instruction and with a different |
| + // branch condition. TODO(stichnot): refactor. |
| + if (Dest->getType() == IceType_i64) { |
| + Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| + Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| + Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm); |
| + Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm); |
| + _mov(DestLo, SrcLoRI); |
| + _mov(DestHi, SrcHiRI); |
| + _br(getIcmp32Mapping(Cmp->getCondition()), Label); |
| + Operand *SrcFLo = loOperand(SrcF); |
| + Operand *SrcFHi = hiOperand(SrcF); |
| + SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm); |
| + SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm); |
| + _mov_nonkillable(DestLo, SrcLoRI); |
| + _mov_nonkillable(DestHi, SrcHiRI); |
| + } else { |
| + SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); |
| + _mov(Dest, SrcT); |
| + _br(getIcmp32Mapping(Cmp->getCondition()), Label); |
| + SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); |
| + _mov_nonkillable(Dest, SrcF); |
| + } |
| + Context.insert(Label); |
| + return; |
| + } break; |
| + } |
| + } |
| + |
| + // a=d?b:c ==> cmp d,0; a=b; jne L1; a=c; L1: |
| Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| InstX8632Label *Label = InstX8632Label::create(Func, this); |