Chromium Code Reviews| Index: src/IceTargetLoweringX86BaseImpl.h |
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
| index 1818f023d07449cdc6e85c09cb6facb68857fa1b..0feed94a526744d8e310acc7bc548fa330153a75 100644 |
| --- a/src/IceTargetLoweringX86BaseImpl.h |
| +++ b/src/IceTargetLoweringX86BaseImpl.h |
| @@ -99,6 +99,8 @@ public: |
| static BoolFoldingProducerKind getProducerKind(const Inst *Instr); |
| static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); |
| static bool hasComplexLowering(const Inst *Instr); |
| + static bool isValidFolding(BoolFoldingProducerKind ProducerKind, |
| + BoolFoldingConsumerKind ConsumerKind); |
| void init(CfgNode *Node); |
| const Inst *getProducerFor(const Operand *Opnd) const; |
| void dump(const Cfg *Func) const; |
| @@ -193,6 +195,22 @@ bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { |
| } |
| template <class MachineTraits> |
| +bool BoolFolding<MachineTraits>::isValidFolding( |
| + typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind, |
| + typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) { |
| + switch (ProducerKind) { |
| + default: |
| + return false; |
| + case PK_Icmp32: |
| + case PK_Icmp64: |
| + case PK_Fcmp: |
| + return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select); |
| + case PK_Arith: |
| + return ConsumerKind == CK_Br; |
| + } |
| +} |
| + |
| +template <class MachineTraits> |
| void BoolFolding<MachineTraits>::init(CfgNode *Node) { |
| Producers.clear(); |
| for (Inst &Instr : Node->getInsts()) { |
| @@ -207,23 +225,32 @@ void BoolFolding<MachineTraits>::init(CfgNode *Node) { |
| // Check each src variable against the map. |
| FOREACH_VAR_IN_INST(Var, Instr) { |
| SizeT VarNum = Var->getIndex(); |
| - if (containsValid(VarNum)) { |
| - if (IndexOfVarOperandInInst(Var) != |
| - 0 // All valid consumers use Var as the first source operand |
| - || |
| - getConsumerKind(&Instr) == CK_None // must be white-listed |
| - || |
| - (getConsumerKind(&Instr) != CK_Br && // Icmp64 only folds in branch |
| - getProducerKind(Producers[VarNum].Instr) != PK_Icmp32) || |
| - (Producers[VarNum].IsComplex && // complex can't be multi-use |
| - Producers[VarNum].NumUses > 0)) { |
| - setInvalid(VarNum); |
| - continue; |
| - } |
| - ++Producers[VarNum].NumUses; |
| - if (Instr.isLastUse(Var)) { |
| - Producers[VarNum].IsLiveOut = false; |
| - } |
| + if (!containsValid(VarNum)) |
| + continue; |
| + // All valid consumers use Var as the first source operand |
| + if (IndexOfVarOperandInInst(Var) != 0) { |
| + setInvalid(VarNum); |
| + continue; |
| + } |
| + // Consumer instructions must be white-listed |
| + auto ConsumerKind = getConsumerKind(&Instr); |
|
John
2015/12/07 13:07:44
I personally like using auto **everywhere** but un
Jim Stichnoth
2015/12/08 18:55:59
I don't think auto should be used here.
sehr
2015/12/15 20:45:44
Jim shot you down :-).
sehr
2015/12/15 20:45:44
Done.
|
| + if (ConsumerKind == CK_None) { |
| + setInvalid(VarNum); |
| + continue; |
| + } |
| + auto ProducerKind = getProducerKind(Producers[VarNum].Instr); |
| + if (!isValidFolding(ProducerKind, ConsumerKind)) { |
| + setInvalid(VarNum); |
| + continue; |
| + } |
| + // Avoid creating multiple copies of complex producer instructions. |
| + if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) { |
| + setInvalid(VarNum); |
| + continue; |
| + } |
| + ++Producers[VarNum].NumUses; |
| + if (Instr.isLastUse(Var)) { |
| + Producers[VarNum].IsLiveOut = false; |
| } |
| } |
| } |
| @@ -1885,63 +1912,36 @@ void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { |
| Context.insert(InstFakeDef::create(Func, Dest)); |
| return; |
| } |
| - Operand *Src0 = Inst->getSrc(0); |
| - assert(Dest->getType() == Src0->getType()); |
| - if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| - Src0 = legalize(Src0); |
| - Operand *Src0Lo = loOperand(Src0); |
| - Operand *Src0Hi = hiOperand(Src0); |
| - Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| - Variable *T_Lo = nullptr, *T_Hi = nullptr; |
| - _mov(T_Lo, Src0Lo); |
| - _mov(DestLo, T_Lo); |
| - _mov(T_Hi, Src0Hi); |
| - _mov(DestHi, T_Hi); |
| - } else { |
| - Operand *Src0Legal; |
| - if (Dest->hasReg()) { |
| - // If Dest already has a physical register, then only basic legalization |
| - // is needed, as the source operand can be a register, immediate, or |
| - // memory. |
| - Src0Legal = legalize(Src0, Legal_Reg, Dest->getRegNum()); |
| - } else { |
| - // If Dest could be a stack operand, then RI must be a physical register |
| - // or a scalar integer immediate. |
| - Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm); |
| - } |
| - if (isVectorType(Dest->getType())) |
| - _movp(Dest, Src0Legal); |
| - else |
| - _mov(Dest, Src0Legal); |
| - } |
| + Operand *Src = Inst->getSrc(0); |
| + assert(Dest->getType() == Src->getType()); |
| + lowerMove(Dest, Src, false); |
| } |
| template <class Machine> |
| -void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) { |
| - if (Inst->isUnconditional()) { |
| - _br(Inst->getTargetUnconditional()); |
| +void TargetX86Base<Machine>::lowerBr(const InstBr *Br) { |
| + if (Br->isUnconditional()) { |
| + _br(Br->getTargetUnconditional()); |
| return; |
| } |
| - Operand *Cond = Inst->getCondition(); |
| + Operand *Cond = Br->getCondition(); |
| // Handle folding opportunities. |
| - if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { |
| + if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) { |
| assert(Producer->isDeleted()); |
| switch (BoolFolding::getProducerKind(Producer)) { |
| default: |
| break; |
| case BoolFolding::PK_Icmp32: |
| case BoolFolding::PK_Icmp64: { |
| - lowerIcmpAndBr(llvm::dyn_cast<InstIcmp>(Producer), Inst); |
| + lowerIcmpAndConsumer(llvm::dyn_cast<InstIcmp>(Producer), Br); |
| return; |
| } |
| case BoolFolding::PK_Fcmp: { |
| - lowerFcmpAndBr(llvm::dyn_cast<InstFcmp>(Producer), Inst); |
| + lowerFcmpAndConsumer(llvm::dyn_cast<InstFcmp>(Producer), Br); |
| return; |
| } |
| case BoolFolding::PK_Arith: { |
| - lowerArithAndBr(llvm::dyn_cast<InstArithmetic>(Producer), Inst); |
| + lowerArithAndConsumer(llvm::dyn_cast<InstArithmetic>(Producer), Br); |
| return; |
| } |
| } |
| @@ -1949,7 +1949,7 @@ void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) { |
| Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
| Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| _cmp(Src0, Zero); |
| - _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
| + _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
| } |
| template <class Machine> |
| @@ -2483,76 +2483,32 @@ void TargetX86Base<Machine>::lowerExtractElement( |
| } |
| template <class Machine> |
| -void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) { |
| - constexpr InstBr *Br = nullptr; |
| - lowerFcmpAndBr(Inst, Br); |
| -} |
| - |
| -template <class Machine> |
| -void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst, |
| - const InstBr *Br) { |
| - Operand *Src0 = Inst->getSrc(0); |
| - Operand *Src1 = Inst->getSrc(1); |
| - Variable *Dest = Inst->getDest(); |
| +void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Fcmp) { |
| + Variable *Dest = Fcmp->getDest(); |
| if (isVectorType(Dest->getType())) { |
| - if (Br) |
| - llvm::report_fatal_error("vector compare/branch cannot be folded"); |
| - InstFcmp::FCond Condition = Inst->getCondition(); |
| - size_t Index = static_cast<size_t>(Condition); |
| - assert(Index < Traits::TableFcmpSize); |
| - |
| - if (Traits::TableFcmp[Index].SwapVectorOperands) |
| - std::swap(Src0, Src1); |
| - |
| - Variable *T = nullptr; |
| + lowerFcmpVector(Fcmp); |
| + } else { |
| + constexpr Inst *Consumer = nullptr; |
| + lowerFcmpAndConsumer(Fcmp, Consumer); |
| + } |
| +} |
| - if (Condition == InstFcmp::True) { |
| - // makeVectorOfOnes() requires an integer vector type. |
| - T = makeVectorOfMinusOnes(IceType_v4i32); |
| - } else if (Condition == InstFcmp::False) { |
| - T = makeVectorOfZeros(Dest->getType()); |
| - } else { |
| - Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| - Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| - if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| - Src1RM = legalizeToReg(Src1RM); |
| - |
| - switch (Condition) { |
| - default: { |
| - typename Traits::Cond::CmppsCond Predicate = |
| - Traits::TableFcmp[Index].Predicate; |
| - assert(Predicate != Traits::Cond::Cmpps_Invalid); |
| - T = makeReg(Src0RM->getType()); |
| - _movp(T, Src0RM); |
| - _cmpps(T, Src1RM, Predicate); |
| - } break; |
| - case InstFcmp::One: { |
| - // Check both unequal and ordered. |
| - T = makeReg(Src0RM->getType()); |
| - Variable *T2 = makeReg(Src0RM->getType()); |
| - _movp(T, Src0RM); |
| - _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq); |
| - _movp(T2, Src0RM); |
| - _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord); |
| - _pand(T, T2); |
| - } break; |
| - case InstFcmp::Ueq: { |
| - // Check both equal or unordered. |
| - T = makeReg(Src0RM->getType()); |
| - Variable *T2 = makeReg(Src0RM->getType()); |
| - _movp(T, Src0RM); |
| - _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq); |
| - _movp(T2, Src0RM); |
| - _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord); |
| - _por(T, T2); |
| - } break; |
| - } |
| +template <class Machine> |
| +void TargetX86Base<Machine>::lowerFcmpAndConsumer(const InstFcmp *Fcmp, |
| + const Inst *Consumer) { |
| + Operand *Src0 = Fcmp->getSrc(0); |
| + Operand *Src1 = Fcmp->getSrc(1); |
| + Variable *Dest = Fcmp->getDest(); |
| + |
| + if (isVectorType(Dest->getType())) |
| + llvm::report_fatal_error("Vector compare/branch cannot be folded"); |
| + |
| + if (Consumer != nullptr) { |
|
John
2015/12/07 13:07:44
Consider moving this block down to where InstSelec
sehr
2015/12/15 20:45:44
It's up here as a short-circuit before any lowerin
|
| + if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| + if (lowerOptimizeFcmpSelect(Fcmp, Select)) |
| + return; |
| } |
| - |
| - _movp(Dest, T); |
| - eliminateNextVectorSextInstruction(Dest); |
| - return; |
| } |
| // Lowering a = fcmp cond, b, c |
| @@ -2568,7 +2524,7 @@ void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst, |
| // setcc lowering when C1 != Br_None && C2 == Br_None: |
| // ucomiss b, c /* but swap b,c order if SwapOperands==true */ |
| // setcc a, C1 |
| - InstFcmp::FCond Condition = Inst->getCondition(); |
| + InstFcmp::FCond Condition = Fcmp->getCondition(); |
| size_t Index = static_cast<size_t>(Condition); |
| assert(Index < Traits::TableFcmpSize); |
| if (Traits::TableFcmp[Index].SwapScalarOperands) |
| @@ -2583,12 +2539,12 @@ void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst, |
| _ucomiss(T, Src1RM); |
| if (!HasC2) { |
| assert(Traits::TableFcmp[Index].Default); |
| - setccOrBr(Traits::TableFcmp[Index].C1, Dest, Br); |
| + setccOrConsumer(Traits::TableFcmp[Index].C1, Dest, Consumer); |
| return; |
| } |
| } |
| int32_t IntDefault = Traits::TableFcmp[Index].Default; |
| - if (Br == nullptr) { |
| + if (Consumer == nullptr) { |
| Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault); |
| _mov(Dest, Default); |
| if (HasC1) { |
| @@ -2602,7 +2558,7 @@ void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst, |
| _mov_redefined(Dest, NonDefault); |
| Context.insert(Label); |
| } |
|
Jim Stichnoth
2015/12/08 18:55:59
May be an opportunity to put an early return in ea
sehr
2015/12/15 20:45:44
Done, here and everywhere.
|
| - } else { |
| + } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| CfgNode *TrueSucc = Br->getTargetTrue(); |
| CfgNode *FalseSucc = Br->getTargetFalse(); |
| if (IntDefault != 0) |
| @@ -2616,143 +2572,114 @@ void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst, |
| return; |
| } |
| _br(FalseSucc); |
| + } else if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| + Operand *SrcT = Select->getTrueOperand(); |
| + Operand *SrcF = Select->getFalseOperand(); |
| + Variable *SelectDest = Select->getDest(); |
| + if (IntDefault != 0) |
| + std::swap(SrcT, SrcF); |
| + lowerMove(SelectDest, SrcF, false); |
| + if (HasC1) { |
| + typename Traits::Insts::Label *Label = |
| + Traits::Insts::Label::create(Func, this); |
| + _br(Traits::TableFcmp[Index].C1, Label); |
| + if (HasC2) { |
| + _br(Traits::TableFcmp[Index].C2, Label); |
| + } |
| + static constexpr bool IsRedefinition = true; |
| + lowerMove(SelectDest, SrcT, IsRedefinition); |
| + Context.insert(Label); |
| + } |
| + } else { |
| + llvm::report_fatal_error("Unexpected consumer type"); |
| } |
| } |
| -inline bool isZero(const Operand *Opnd) { |
| - if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) |
| - return C64->getValue() == 0; |
| - if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| - return C32->getValue() == 0; |
| - return false; |
| -} |
| - |
| template <class Machine> |
| -void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) { |
| - constexpr InstBr *Br = nullptr; |
| - lowerIcmpAndBr(Inst, Br); |
| -} |
| +void TargetX86Base<Machine>::lowerFcmpVector(const InstFcmp *Fcmp) { |
| + Operand *Src0 = Fcmp->getSrc(0); |
| + Operand *Src1 = Fcmp->getSrc(1); |
| + Variable *Dest = Fcmp->getDest(); |
| -template <class Machine> |
| -void TargetX86Base<Machine>::lowerIcmpAndBr(const InstIcmp *Icmp, |
| - const InstBr *Br) { |
| - Operand *Src0 = legalize(Icmp->getSrc(0)); |
| - Operand *Src1 = legalize(Icmp->getSrc(1)); |
| - Variable *Dest = Icmp->getDest(); |
| + if (!isVectorType(Dest->getType())) |
| + llvm::report_fatal_error("Expected vector compare"); |
| - if (isVectorType(Dest->getType())) { |
| - if (Br) |
| - llvm::report_fatal_error("vector compare/branch cannot be folded"); |
| - Type Ty = Src0->getType(); |
| - // Promote i1 vectors to 128 bit integer vector types. |
| - if (typeElementType(Ty) == IceType_i1) { |
| - Type NewTy = IceType_NUM; |
| - switch (Ty) { |
| - default: |
| - llvm_unreachable("unexpected type"); |
| - break; |
| - case IceType_v4i1: |
| - NewTy = IceType_v4i32; |
| - break; |
| - case IceType_v8i1: |
| - NewTy = IceType_v8i16; |
| - break; |
| - case IceType_v16i1: |
| - NewTy = IceType_v16i8; |
| - break; |
| - } |
| - Variable *NewSrc0 = Func->makeVariable(NewTy); |
| - Variable *NewSrc1 = Func->makeVariable(NewTy); |
| - lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0)); |
| - lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1)); |
| - Src0 = NewSrc0; |
| - Src1 = NewSrc1; |
| - Ty = NewTy; |
| - } |
| + InstFcmp::FCond Condition = Fcmp->getCondition(); |
| + size_t Index = static_cast<size_t>(Condition); |
| + assert(Index < Traits::TableFcmpSize); |
| + |
| + if (Traits::TableFcmp[Index].SwapVectorOperands) |
| + std::swap(Src0, Src1); |
| - InstIcmp::ICond Condition = Icmp->getCondition(); |
| + Variable *T = nullptr; |
| + if (Condition == InstFcmp::True) { |
| + // makeVectorOfOnes() requires an integer vector type. |
| + T = makeVectorOfMinusOnes(IceType_v4i32); |
| + } else if (Condition == InstFcmp::False) { |
| + T = makeVectorOfZeros(Dest->getType()); |
| + } else { |
| Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| + if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| + Src1RM = legalizeToReg(Src1RM); |
| - // SSE2 only has signed comparison operations. Transform unsigned inputs in |
| - // a manner that allows for the use of signed comparison operations by |
| - // flipping the high order bits. |
| - if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge || |
| - Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) { |
| - Variable *T0 = makeReg(Ty); |
| - Variable *T1 = makeReg(Ty); |
| - Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); |
| - _movp(T0, Src0RM); |
| - _pxor(T0, HighOrderBits); |
| - _movp(T1, Src1RM); |
| - _pxor(T1, HighOrderBits); |
| - Src0RM = T0; |
| - Src1RM = T1; |
| - } |
| - |
| - Variable *T = makeReg(Ty); |
| switch (Condition) { |
| - default: |
| - llvm_unreachable("unexpected condition"); |
| - break; |
| - case InstIcmp::Eq: { |
| - if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| - Src1RM = legalizeToReg(Src1RM); |
| + default: { |
| + typename Traits::Cond::CmppsCond Predicate = |
| + Traits::TableFcmp[Index].Predicate; |
| + assert(Predicate != Traits::Cond::Cmpps_Invalid); |
| + T = makeReg(Src0RM->getType()); |
| _movp(T, Src0RM); |
| - _pcmpeq(T, Src1RM); |
| + _cmpps(T, Src1RM, Predicate); |
| } break; |
| - case InstIcmp::Ne: { |
| - if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| - Src1RM = legalizeToReg(Src1RM); |
| + case InstFcmp::One: { |
| + // Check both unequal and ordered. |
| + T = makeReg(Src0RM->getType()); |
| + Variable *T2 = makeReg(Src0RM->getType()); |
| _movp(T, Src0RM); |
| - _pcmpeq(T, Src1RM); |
| - Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| - _pxor(T, MinusOne); |
| + _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq); |
| + _movp(T2, Src0RM); |
| + _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord); |
| + _pand(T, T2); |
| } break; |
| - case InstIcmp::Ugt: |
| - case InstIcmp::Sgt: { |
| - if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| - Src1RM = legalizeToReg(Src1RM); |
| + case InstFcmp::Ueq: { |
| + // Check both equal or unordered. |
| + T = makeReg(Src0RM->getType()); |
| + Variable *T2 = makeReg(Src0RM->getType()); |
| _movp(T, Src0RM); |
| - _pcmpgt(T, Src1RM); |
| - } break; |
| - case InstIcmp::Uge: |
| - case InstIcmp::Sge: { |
| - // !(Src1RM > Src0RM) |
| - if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| - Src0RM = legalizeToReg(Src0RM); |
| - _movp(T, Src1RM); |
| - _pcmpgt(T, Src0RM); |
| - Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| - _pxor(T, MinusOne); |
| - } break; |
| - case InstIcmp::Ult: |
| - case InstIcmp::Slt: { |
| - if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| - Src0RM = legalizeToReg(Src0RM); |
| - _movp(T, Src1RM); |
| - _pcmpgt(T, Src0RM); |
| - } break; |
| - case InstIcmp::Ule: |
| - case InstIcmp::Sle: { |
| - // !(Src0RM > Src1RM) |
| - if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| - Src1RM = legalizeToReg(Src1RM); |
| - _movp(T, Src0RM); |
| - _pcmpgt(T, Src1RM); |
| - Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| - _pxor(T, MinusOne); |
| + _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq); |
| + _movp(T2, Src0RM); |
| + _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord); |
| + _por(T, T2); |
| } break; |
| } |
| - |
| - _movp(Dest, T); |
| - eliminateNextVectorSextInstruction(Dest); |
| - return; |
| } |
|
John
2015/12/07 13:07:44
assert(T != nullptr), maybe?
sehr
2015/12/15 20:45:44
Agreed. Done.
|
| + _movp(Dest, T); |
| + eliminateNextVectorSextInstruction(Dest); |
| +} |
| + |
| +inline bool isZero(const Operand *Opnd) { |
| + if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) |
| + return C64->getValue() == 0; |
| + if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| + return C32->getValue() == 0; |
| + return false; |
| +} |
| + |
| +template <class Machine> |
| +void TargetX86Base<Machine>::lowerIcmpAndConsumer(const InstIcmp *Icmp, |
| + const Inst *Consumer) { |
| + Operand *Src0 = legalize(Icmp->getSrc(0)); |
| + Operand *Src1 = legalize(Icmp->getSrc(1)); |
| + Variable *Dest = Icmp->getDest(); |
| + |
| + if (isVectorType(Dest->getType())) |
| + llvm::report_fatal_error("Vector compare/branch cannot be folded"); |
| + |
| if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { |
| - lowerIcmp64(Icmp, Br); |
| + lowerIcmp64(Icmp, Consumer); |
| return; |
| } |
| @@ -2762,22 +2689,140 @@ void TargetX86Base<Machine>::lowerIcmpAndBr(const InstIcmp *Icmp, |
| default: |
| break; |
| case InstIcmp::Uge: |
| - movOrBr(true, Dest, Br); |
| + movOrConsumer(true, Dest, Consumer); |
| return; |
| case InstIcmp::Ult: |
| - movOrBr(false, Dest, Br); |
| + movOrConsumer(false, Dest, Consumer); |
| return; |
| } |
| } |
| Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); |
| _cmp(Src0RM, Src1); |
| - setccOrBr(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest, Br); |
| + setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest, |
| + Consumer); |
| +} |
| + |
| +template <class Machine> |
| +void TargetX86Base<Machine>::lowerIcmpVector(const InstIcmp *Icmp) { |
| + Operand *Src0 = legalize(Icmp->getSrc(0)); |
| + Operand *Src1 = legalize(Icmp->getSrc(1)); |
| + Variable *Dest = Icmp->getDest(); |
| + |
| + if (!isVectorType(Dest->getType())) |
| + llvm::report_fatal_error("Expected a vector compare"); |
| + |
| + Type Ty = Src0->getType(); |
| + // Promote i1 vectors to 128 bit integer vector types. |
| + if (typeElementType(Ty) == IceType_i1) { |
|
John
2015/12/07 13:07:44
optional: maybe create a helper function for conve
sehr
2015/12/15 20:45:44
I think there's only one use (for now), but intend
|
| + Type NewTy = IceType_NUM; |
| + switch (Ty) { |
| + default: |
| + llvm_unreachable("unexpected type"); |
|
Jim Stichnoth
2015/12/08 18:55:59
I think report_fatal_error would be better here.
sehr
2015/12/15 20:45:44
Done.
|
| + break; |
| + case IceType_v4i1: |
| + NewTy = IceType_v4i32; |
| + break; |
| + case IceType_v8i1: |
| + NewTy = IceType_v8i16; |
| + break; |
| + case IceType_v16i1: |
| + NewTy = IceType_v16i8; |
| + break; |
| + } |
| + Variable *NewSrc0 = Func->makeVariable(NewTy); |
| + Variable *NewSrc1 = Func->makeVariable(NewTy); |
| + lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0)); |
| + lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1)); |
| + Src0 = NewSrc0; |
| + Src1 = NewSrc1; |
| + Ty = NewTy; |
| + } |
| + |
| + InstIcmp::ICond Condition = Icmp->getCondition(); |
| + |
| + Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| + Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| + |
| + // SSE2 only has signed comparison operations. Transform unsigned inputs in |
| + // a manner that allows for the use of signed comparison operations by |
| + // flipping the high order bits. |
| + if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge || |
| + Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) { |
| + Variable *T0 = makeReg(Ty); |
| + Variable *T1 = makeReg(Ty); |
| + Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); |
| + _movp(T0, Src0RM); |
| + _pxor(T0, HighOrderBits); |
| + _movp(T1, Src1RM); |
| + _pxor(T1, HighOrderBits); |
| + Src0RM = T0; |
| + Src1RM = T1; |
| + } |
| + |
| + Variable *T = makeReg(Ty); |
| + switch (Condition) { |
| + default: |
| + llvm_unreachable("unexpected condition"); |
| + break; |
| + case InstIcmp::Eq: { |
| + if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| + Src1RM = legalizeToReg(Src1RM); |
| + _movp(T, Src0RM); |
| + _pcmpeq(T, Src1RM); |
| + } break; |
| + case InstIcmp::Ne: { |
| + if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| + Src1RM = legalizeToReg(Src1RM); |
| + _movp(T, Src0RM); |
| + _pcmpeq(T, Src1RM); |
| + Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| + _pxor(T, MinusOne); |
| + } break; |
| + case InstIcmp::Ugt: |
| + case InstIcmp::Sgt: { |
| + if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| + Src1RM = legalizeToReg(Src1RM); |
| + _movp(T, Src0RM); |
| + _pcmpgt(T, Src1RM); |
| + } break; |
| + case InstIcmp::Uge: |
| + case InstIcmp::Sge: { |
| + // !(Src1RM > Src0RM) |
| + if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| + Src0RM = legalizeToReg(Src0RM); |
| + _movp(T, Src1RM); |
| + _pcmpgt(T, Src0RM); |
| + Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| + _pxor(T, MinusOne); |
| + } break; |
| + case InstIcmp::Ult: |
| + case InstIcmp::Slt: { |
| + if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| + Src0RM = legalizeToReg(Src0RM); |
| + _movp(T, Src1RM); |
| + _pcmpgt(T, Src0RM); |
| + } break; |
| + case InstIcmp::Ule: |
| + case InstIcmp::Sle: { |
| + // !(Src0RM > Src1RM) |
| + if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| + Src1RM = legalizeToReg(Src1RM); |
| + _movp(T, Src0RM); |
| + _pcmpgt(T, Src1RM); |
| + Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| + _pxor(T, MinusOne); |
| + } break; |
| + } |
| + |
| + _movp(Dest, T); |
| + eliminateNextVectorSextInstruction(Dest); |
| } |
| template <typename Machine> |
| template <typename T> |
| typename std::enable_if<!T::Is64Bit, void>::type |
| -TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) { |
| +TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, |
| + const Inst *Consumer) { |
| // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
| Operand *Src0 = legalize(Icmp->getSrc(0)); |
| Operand *Src1 = legalize(Icmp->getSrc(1)); |
| @@ -2835,7 +2880,7 @@ TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) { |
| _mov(Temp, Src0HiRM); |
| _or(Temp, Src0LoRM); |
| Context.insert(InstFakeUse::create(Func, Temp)); |
| - setccOrBr(Traits::Cond::Br_e, Dest, Br); |
| + setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer); |
| return; |
| case InstIcmp::Ne: |
| case InstIcmp::Ugt: |
| @@ -2844,23 +2889,23 @@ TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) { |
| _mov(Temp, Src0HiRM); |
| _or(Temp, Src0LoRM); |
| Context.insert(InstFakeUse::create(Func, Temp)); |
| - setccOrBr(Traits::Cond::Br_ne, Dest, Br); |
| + setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer); |
| return; |
| case InstIcmp::Uge: |
| - movOrBr(true, Dest, Br); |
| + movOrConsumer(true, Dest, Consumer); |
| return; |
| case InstIcmp::Ult: |
| - movOrBr(false, Dest, Br); |
| + movOrConsumer(false, Dest, Consumer); |
| return; |
| case InstIcmp::Sgt: |
| break; |
| case InstIcmp::Sge: |
| _test(Src0HiRM, SignMask); |
| - setccOrBr(Traits::Cond::Br_e, Dest, Br); |
| + setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer); |
| return; |
| case InstIcmp::Slt: |
| _test(Src0HiRM, SignMask); |
| - setccOrBr(Traits::Cond::Br_ne, Dest, Br); |
| + setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer); |
| return; |
| case InstIcmp::Sle: |
| break; |
| @@ -2869,7 +2914,7 @@ TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) { |
| // Handle general compares. |
| Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
| Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
| - if (Br == nullptr) { |
| + if (Consumer == nullptr) { |
| Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0); |
| Constant *One = Ctx->getConstantInt(Dest->getType(), 1); |
| typename Traits::Insts::Label *LabelFalse = |
| @@ -2887,7 +2932,7 @@ TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) { |
| Context.insert(LabelFalse); |
| _mov_redefined(Dest, Zero); |
| Context.insert(LabelTrue); |
| - } else { |
| + } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| _cmp(Src0HiRM, Src1HiRI); |
| if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) |
| _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue()); |
| @@ -2896,37 +2941,78 @@ TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) { |
| _cmp(Src0LoRM, Src1LoRI); |
| _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(), |
| Br->getTargetFalse()); |
| + } else if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| + Operand *SrcT = Select->getTrueOperand(); |
| + Operand *SrcF = Select->getFalseOperand(); |
| + Variable *SelectDest = Select->getDest(); |
| + typename Traits::Insts::Label *LabelFalse = |
| + Traits::Insts::Label::create(Func, this); |
| + typename Traits::Insts::Label *LabelTrue = |
| + Traits::Insts::Label::create(Func, this); |
| + lowerMove(SelectDest, SrcT, false); |
| + _cmp(Src0HiRM, Src1HiRI); |
| + if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) |
| + _br(Traits::TableIcmp64[Index].C1, LabelTrue); |
| + if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) |
| + _br(Traits::TableIcmp64[Index].C2, LabelFalse); |
| + _cmp(Src0LoRM, Src1LoRI); |
| + _br(Traits::TableIcmp64[Index].C3, LabelTrue); |
| + Context.insert(LabelFalse); |
| + static constexpr bool IsRedefinition = true; |
| + lowerMove(SelectDest, SrcF, IsRedefinition); |
| + Context.insert(LabelTrue); |
| + } else { |
| + llvm::report_fatal_error("Unexpected consumer type"); |
| } |
| } |
| template <class Machine> |
| -void TargetX86Base<Machine>::setccOrBr(typename Traits::Cond::BrCond Condition, |
| - Variable *Dest, const InstBr *Br) { |
| - if (Br == nullptr) { |
| +void TargetX86Base<Machine>::setccOrConsumer( |
| + typename Traits::Cond::BrCond Condition, Variable *Dest, |
| + const Inst *Consumer) { |
| + if (Consumer == nullptr) { |
| _setcc(Dest, Condition); |
| - } else { |
| + } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| _br(Condition, Br->getTargetTrue(), Br->getTargetFalse()); |
| + } else if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| + Operand *SrcT = Select->getTrueOperand(); |
| + Operand *SrcF = Select->getFalseOperand(); |
| + Variable *SelectDest = Select->getDest(); |
| + lowerSelectMove(SelectDest, Condition, SrcT, SrcF); |
| + } else { |
| + llvm::report_fatal_error("Unexpected consumer type"); |
| } |
| } |
| template <class Machine> |
| -void TargetX86Base<Machine>::movOrBr(bool IcmpResult, Variable *Dest, |
| - const InstBr *Br) { |
| - if (Br == nullptr) { |
| +void TargetX86Base<Machine>::movOrConsumer(bool IcmpResult, Variable *Dest, |
| + const Inst *Consumer) { |
| + if (Consumer == nullptr) { |
| _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); |
| - } else { |
| + } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| // TODO(sehr,stichnot): This could be done with a single unconditional |
| // branch instruction, but subzero doesn't know how to handle the resulting |
| // control flow graph changes now. Make it do so to eliminate mov and cmp. |
| _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); |
| _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0)); |
| _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
| + } else if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| + Operand *Src = nullptr; |
| + if (IcmpResult) { |
| + Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm); |
| + } else { |
| + Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm); |
| + } |
| + Variable *SelectDest = Select->getDest(); |
| + lowerMove(SelectDest, Src, false); |
| + } else { |
| + llvm::report_fatal_error("Unexpected consumer type"); |
| } |
| } |
| template <class Machine> |
| -void TargetX86Base<Machine>::lowerArithAndBr(const InstArithmetic *Arith, |
| - const InstBr *Br) { |
| +void TargetX86Base<Machine>::lowerArithAndConsumer(const InstArithmetic *Arith, |
| + const Inst *Consumer) { |
| Variable *T = nullptr; |
| Operand *Src0 = legalize(Arith->getSrc(0)); |
| Operand *Src1 = legalize(Arith->getSrc(1)); |
| @@ -2950,9 +3036,16 @@ void TargetX86Base<Machine>::lowerArithAndBr(const InstArithmetic *Arith, |
| _or(T, Src1); |
| break; |
| } |
| - Context.insert(InstFakeUse::create(Func, T)); |
| - Context.insert(InstFakeDef::create(Func, Dest)); |
| - _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
| + |
| + if (Consumer == nullptr) { |
| + llvm::report_fatal_error("Expected a consumer instruction"); |
| + } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| + Context.insert(InstFakeUse::create(Func, T)); |
| + Context.insert(InstFakeDef::create(Func, Dest)); |
| + _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
| + } else { |
| + llvm::report_fatal_error("Unexpected consumer type"); |
| + } |
| } |
| template <class Machine> |
| @@ -4617,96 +4710,47 @@ void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { |
| } |
| template <class Machine> |
| -void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { |
| - Variable *Dest = Inst->getDest(); |
| - Type DestTy = Dest->getType(); |
| - Operand *SrcT = Inst->getTrueOperand(); |
| - Operand *SrcF = Inst->getFalseOperand(); |
| - Operand *Condition = Inst->getCondition(); |
| - |
| - if (isVectorType(DestTy)) { |
| - Type SrcTy = SrcT->getType(); |
| - Variable *T = makeReg(SrcTy); |
| - Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
| - Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
| - if (InstructionSet >= Traits::SSE4_1) { |
| - // TODO(wala): If the condition operand is a constant, use blendps or |
| - // pblendw. |
| - // |
| - // Use blendvps or pblendvb to implement select. |
| - if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || |
| - SrcTy == IceType_v4f32) { |
| - Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| - Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0); |
| - _movp(xmm0, ConditionRM); |
| - _psll(xmm0, Ctx->getConstantInt8(31)); |
| - _movp(T, SrcFRM); |
| - _blendvps(T, SrcTRM, xmm0); |
| - _movp(Dest, T); |
| - } else { |
| - assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
| - Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 |
| - : IceType_v16i8; |
| - Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0); |
| - lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
| - _movp(T, SrcFRM); |
| - _pblendvb(T, SrcTRM, xmm0); |
| - _movp(Dest, T); |
| - } |
| - return; |
| - } |
| - // Lower select without Traits::SSE4.1: |
| - // a=d?b:c ==> |
| - // if elementtype(d) != i1: |
| - // d=sext(d); |
| - // a=(b&d)|(c&~d); |
| - Variable *T2 = makeReg(SrcTy); |
| - // Sign extend the condition operand if applicable. |
| - if (SrcTy == IceType_v4f32) { |
| - // The sext operation takes only integer arguments. |
| - Variable *T3 = Func->makeVariable(IceType_v4i32); |
| - lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); |
| - _movp(T, T3); |
| - } else if (typeElementType(SrcTy) != IceType_i1) { |
| - lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); |
| - } else { |
| - Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| - _movp(T, ConditionRM); |
| - } |
| - _movp(T2, T); |
| - _pand(T, SrcTRM); |
| - _pandn(T2, SrcFRM); |
| - _por(T, T2); |
| - _movp(Dest, T); |
| +void TargetX86Base<Machine>::lowerSelect(const InstSelect *Select) { |
| + Variable *Dest = Select->getDest(); |
| + if (isVectorType(Dest->getType())) { |
| + lowerSelectVector(Select); |
| return; |
| } |
| - typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne; |
| - Operand *CmpOpnd0 = nullptr; |
| - Operand *CmpOpnd1 = nullptr; |
| + Operand *Condition = Select->getCondition(); |
| // Handle folding opportunities. |
| - if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { |
| + if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) { |
| assert(Producer->isDeleted()); |
| switch (BoolFolding::getProducerKind(Producer)) { |
| default: |
| break; |
| - case BoolFolding::PK_Icmp32: { |
| - auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); |
| - Cond = Traits::getIcmp32Mapping(Cmp->getCondition()); |
| - CmpOpnd1 = legalize(Producer->getSrc(1)); |
| - CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1); |
| - } break; |
| + case BoolFolding::PK_Icmp32: |
| + case BoolFolding::PK_Icmp64: { |
| + lowerIcmpAndConsumer(llvm::dyn_cast<InstIcmp>(Producer), Select); |
| + return; |
| + } |
| + case BoolFolding::PK_Fcmp: { |
| + lowerFcmpAndConsumer(llvm::dyn_cast<InstFcmp>(Producer), Select); |
| + return; |
| + } |
| } |
| } |
| - if (CmpOpnd0 == nullptr) { |
| - CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem); |
| - CmpOpnd1 = Ctx->getConstantZero(IceType_i32); |
| - } |
| - assert(CmpOpnd0); |
| - assert(CmpOpnd1); |
| - _cmp(CmpOpnd0, CmpOpnd1); |
| + Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem); |
| + Operand *Zero = Ctx->getConstantZero(IceType_i32); |
| + _cmp(CmpResult, Zero); |
| + Operand *SrcT = Select->getTrueOperand(); |
| + Operand *SrcF = Select->getFalseOperand(); |
| + const typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne; |
| + lowerSelectMove(Dest, Cond, SrcT, SrcF); |
| +} |
| + |
| +template <class Machine> |
| +void TargetX86Base<Machine>::lowerSelectMove(Variable *Dest, |
| + typename Traits::Cond::BrCond Cond, |
| + Operand *SrcT, Operand *SrcF) { |
| + Type DestTy = Dest->getType(); |
| if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { |
| // The cmov instruction doesn't allow 8-bit or FP operands, so we need |
| // explicit control flow. |
| @@ -4734,25 +4778,22 @@ void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { |
| SrcF = legalizeUndef(SrcF); |
| // Set the low portion. |
| Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| - Variable *TLo = nullptr; |
| - Operand *SrcFLo = legalize(loOperand(SrcF)); |
| - _mov(TLo, SrcFLo); |
| - Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem); |
| - _cmov(TLo, SrcTLo, Cond); |
| - _mov(DestLo, TLo); |
| + lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF)); |
| // Set the high portion. |
| Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| - Variable *THi = nullptr; |
| - Operand *SrcFHi = legalize(hiOperand(SrcF)); |
| - _mov(THi, SrcFHi); |
| - Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem); |
| - _cmov(THi, SrcTHi, Cond); |
| - _mov(DestHi, THi); |
| + lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF)); |
| return; |
| } |
| assert(DestTy == IceType_i16 || DestTy == IceType_i32 || |
| (Traits::Is64Bit && DestTy == IceType_i64)); |
| + lowerSelectIntMove(Dest, Cond, SrcT, SrcF); |
| +} |
| + |
| +template <class Machine> |
| +void TargetX86Base<Machine>::lowerSelectIntMove( |
| + Variable *Dest, typename Traits::Cond::BrCond Cond, Operand *SrcT, |
| + Operand *SrcF) { |
| Variable *T = nullptr; |
| SrcF = legalize(SrcF); |
| _mov(T, SrcF); |
| @@ -4762,6 +4803,165 @@ void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { |
| } |
| template <class Machine> |
| +void TargetX86Base<Machine>::lowerMove(Variable *Dest, Operand *Src, |
|
John
2015/12/07 13:07:44
What do you think about asserting !Src->isRemateri
sehr
2015/12/15 20:45:44
Done.
|
| + bool IsRedefinition) { |
| + assert(Dest->getType() == Src->getType()); |
| + if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| + Src = legalize(Src); |
| + Operand *SrcLo = loOperand(Src); |
| + Operand *SrcHi = hiOperand(Src); |
| + Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| + Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| + Variable *T_Lo = nullptr, *T_Hi = nullptr; |
| + _mov(T_Lo, SrcLo); |
| + if (IsRedefinition) { |
|
John
2015/12/07 13:07:44
I would personally do
_mov(DestLo, T_Lo);
if (IsR
Jim Stichnoth
2015/12/08 18:55:59
I agree with John's preference on this.
Thinking
sehr
2015/12/15 20:45:44
Followed Jim's extension of this suggestion.
sehr
2015/12/15 20:45:44
Done.
|
| + _mov_redefined(DestLo, T_Lo); |
| + } else { |
| + _mov(DestLo, T_Lo); |
| + } |
| + _mov(T_Hi, SrcHi); |
| + if (IsRedefinition) { |
| + _mov_redefined(DestHi, T_Hi); |
| + } else { |
| + _mov(DestHi, T_Hi); |
| + } |
| + } else { |
| + Operand *SrcLegal; |
| + if (Dest->hasReg()) { |
| + // If Dest already has a physical register, then only basic legalization |
| + // is needed, as the source operand can be a register, immediate, or |
| + // memory. |
| + SrcLegal = legalize(Src, Legal_Reg, Dest->getRegNum()); |
| + } else { |
| + // If Dest could be a stack operand, then RI must be a physical register |
| + // or a scalar integer immediate. |
| + SrcLegal = legalize(Src, Legal_Reg | Legal_Imm); |
| + } |
| + if (isVectorType(Dest->getType())) { |
| + if (IsRedefinition) { |
| + _movp_redefined(Dest, SrcLegal); |
| + } else { |
| + _movp(Dest, SrcLegal); |
| + } |
| + } else { |
| + if (IsRedefinition) { |
| + _mov_redefined(Dest, SrcLegal); |
| + } else { |
| + _mov(Dest, SrcLegal); |
| + } |
| + } |
| + } |
| +} |
| + |
| +template <class Machine> |
| +bool TargetX86Base<Machine>::lowerOptimizeFcmpSelect(const InstFcmp *Fcmp, |
| + const InstSelect *Select) { |
| + Operand *CmpSrc0 = Fcmp->getSrc(0); |
| + Operand *CmpSrc1 = Fcmp->getSrc(1); |
| + Operand *SelectSrcT = Select->getTrueOperand(); |
| + Operand *SelectSrcF = Select->getFalseOperand(); |
| + |
| + if (CmpSrc0->getType() != SelectSrcT->getType()) |
| + return false; |
| + |
| + // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here. |
| + InstFcmp::FCond Condition = Fcmp->getCondition(); |
| + switch (Condition) { |
| + default: |
| + return false; |
| + case InstFcmp::True: |
| + case InstFcmp::False: |
| + case InstFcmp::Ogt: |
| + case InstFcmp::Olt: |
| + (void)CmpSrc0; |
| + (void)CmpSrc1; |
| + (void)SelectSrcT; |
| + (void)SelectSrcF; |
| + break; |
| + } |
| + return false; |
| +} |
| + |
| +template <class Machine> |
| +void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Icmp) { |
| + Variable *Dest = Icmp->getDest(); |
| + if (isVectorType(Dest->getType())) { |
| + lowerIcmpVector(Icmp); |
| + } else { |
| + constexpr Inst *Consumer = nullptr; |
| + lowerIcmpAndConsumer(Icmp, Consumer); |
| + } |
| +} |
| + |
| +template <class Machine> |
| +void TargetX86Base<Machine>::lowerSelectVector(const InstSelect *Inst) { |
| + Variable *Dest = Inst->getDest(); |
| + Type DestTy = Dest->getType(); |
| + Operand *SrcT = Inst->getTrueOperand(); |
| + Operand *SrcF = Inst->getFalseOperand(); |
| + Operand *Condition = Inst->getCondition(); |
| + |
| + if (!isVectorType(DestTy)) |
| + llvm::report_fatal_error("Expected a vector select"); |
| + |
| + Type SrcTy = SrcT->getType(); |
| + Variable *T = makeReg(SrcTy); |
| + Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
| + Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
| + if (InstructionSet >= Traits::SSE4_1) { |
| + // TODO(wala): If the condition operand is a constant, use blendps or |
| + // pblendw. |
| + // |
| + // Use blendvps or pblendvb to implement select. |
| + if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || |
| + SrcTy == IceType_v4f32) { |
| + Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| + Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0); |
| + _movp(xmm0, ConditionRM); |
| + _psll(xmm0, Ctx->getConstantInt8(31)); |
| + _movp(T, SrcFRM); |
| + _blendvps(T, SrcTRM, xmm0); |
| + _movp(Dest, T); |
| + } else { |
| + assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
| + Type SignExtTy = |
| + Condition->getType() == IceType_v8i1 ? IceType_v8i16 : IceType_v16i8; |
| + Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0); |
| + lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
| + _movp(T, SrcFRM); |
| + _pblendvb(T, SrcTRM, xmm0); |
| + _movp(Dest, T); |
| + } |
| + return; |
| + } |
| + // Lower select without Traits::SSE4.1: |
| + // a=d?b:c ==> |
| + // if elementtype(d) != i1: |
| + // d=sext(d); |
| + // a=(b&d)|(c&~d); |
| + Variable *T2 = makeReg(SrcTy); |
| + // Sign extend the condition operand if applicable. |
| + if (SrcTy == IceType_v4f32) { |
| + // The sext operation takes only integer arguments. |
| + Variable *T3 = Func->makeVariable(IceType_v4i32); |
| + lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); |
| + _movp(T, T3); |
| + } else if (typeElementType(SrcTy) != IceType_i1) { |
| + lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); |
| + } else { |
| + Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| + _movp(T, ConditionRM); |
| + } |
| + _movp(T2, T); |
| + _pand(T, SrcTRM); |
| + _pandn(T2, SrcFRM); |
| + _por(T, T2); |
| + _movp(Dest, T); |
| + |
| + return; |
| +} |
| + |
| +template <class Machine> |
| void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { |
| Operand *Value = Inst->getData(); |
| Operand *Addr = Inst->getAddr(); |
| @@ -5521,7 +5721,7 @@ Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) { |
| case IceType_f64: |
| Context.insert(InstFakeDef::create(Func, Reg)); |
| // TODO(stichnot): Use xorps/xorpd instead of pxor. |
|
Jim Stichnoth
2015/12/08 18:55:59
TODONE?
sehr
2015/12/15 20:45:44
TODONEDONE.
|
| - _pxor(Reg, Reg); |
| + _xorps(Reg, Reg); |
| break; |
| default: |
| // All vector types use the same pxor instruction. |