Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(76)

Unified Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1497033002: Fuse icmp/fcmp with select (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: unittests work Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/IceTargetLoweringX86BaseImpl.h
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 1818f023d07449cdc6e85c09cb6facb68857fa1b..0feed94a526744d8e310acc7bc548fa330153a75 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -99,6 +99,8 @@ public:
static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
static bool hasComplexLowering(const Inst *Instr);
+ static bool isValidFolding(BoolFoldingProducerKind ProducerKind,
+ BoolFoldingConsumerKind ConsumerKind);
void init(CfgNode *Node);
const Inst *getProducerFor(const Operand *Opnd) const;
void dump(const Cfg *Func) const;
@@ -193,6 +195,22 @@ bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
}
template <class MachineTraits>
+bool BoolFolding<MachineTraits>::isValidFolding(
+ typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind,
+ typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) {
+ switch (ProducerKind) {
+ default:
+ return false;
+ case PK_Icmp32:
+ case PK_Icmp64:
+ case PK_Fcmp:
+ return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select);
+ case PK_Arith:
+ return ConsumerKind == CK_Br;
+ }
+}
+
+template <class MachineTraits>
void BoolFolding<MachineTraits>::init(CfgNode *Node) {
Producers.clear();
for (Inst &Instr : Node->getInsts()) {
@@ -207,23 +225,32 @@ void BoolFolding<MachineTraits>::init(CfgNode *Node) {
// Check each src variable against the map.
FOREACH_VAR_IN_INST(Var, Instr) {
SizeT VarNum = Var->getIndex();
- if (containsValid(VarNum)) {
- if (IndexOfVarOperandInInst(Var) !=
- 0 // All valid consumers use Var as the first source operand
- ||
- getConsumerKind(&Instr) == CK_None // must be white-listed
- ||
- (getConsumerKind(&Instr) != CK_Br && // Icmp64 only folds in branch
- getProducerKind(Producers[VarNum].Instr) != PK_Icmp32) ||
- (Producers[VarNum].IsComplex && // complex can't be multi-use
- Producers[VarNum].NumUses > 0)) {
- setInvalid(VarNum);
- continue;
- }
- ++Producers[VarNum].NumUses;
- if (Instr.isLastUse(Var)) {
- Producers[VarNum].IsLiveOut = false;
- }
+ if (!containsValid(VarNum))
+ continue;
+ // All valid consumers use Var as the first source operand
+ if (IndexOfVarOperandInInst(Var) != 0) {
+ setInvalid(VarNum);
+ continue;
+ }
+ // Consumer instructions must be white-listed
+ auto ConsumerKind = getConsumerKind(&Instr);
John 2015/12/07 13:07:44 I personally like using auto **everywhere** but un
Jim Stichnoth 2015/12/08 18:55:59 I don't think auto should be used here.
sehr 2015/12/15 20:45:44 Jim shot you down :-).
sehr 2015/12/15 20:45:44 Done.
+ if (ConsumerKind == CK_None) {
+ setInvalid(VarNum);
+ continue;
+ }
+ auto ProducerKind = getProducerKind(Producers[VarNum].Instr);
+ if (!isValidFolding(ProducerKind, ConsumerKind)) {
+ setInvalid(VarNum);
+ continue;
+ }
+ // Avoid creating multiple copies of complex producer instructions.
+ if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) {
+ setInvalid(VarNum);
+ continue;
+ }
+ ++Producers[VarNum].NumUses;
+ if (Instr.isLastUse(Var)) {
+ Producers[VarNum].IsLiveOut = false;
}
}
}
@@ -1885,63 +1912,36 @@ void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
Context.insert(InstFakeDef::create(Func, Dest));
return;
}
- Operand *Src0 = Inst->getSrc(0);
- assert(Dest->getType() == Src0->getType());
- if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
- Src0 = legalize(Src0);
- Operand *Src0Lo = loOperand(Src0);
- Operand *Src0Hi = hiOperand(Src0);
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Variable *T_Lo = nullptr, *T_Hi = nullptr;
- _mov(T_Lo, Src0Lo);
- _mov(DestLo, T_Lo);
- _mov(T_Hi, Src0Hi);
- _mov(DestHi, T_Hi);
- } else {
- Operand *Src0Legal;
- if (Dest->hasReg()) {
- // If Dest already has a physical register, then only basic legalization
- // is needed, as the source operand can be a register, immediate, or
- // memory.
- Src0Legal = legalize(Src0, Legal_Reg, Dest->getRegNum());
- } else {
- // If Dest could be a stack operand, then RI must be a physical register
- // or a scalar integer immediate.
- Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm);
- }
- if (isVectorType(Dest->getType()))
- _movp(Dest, Src0Legal);
- else
- _mov(Dest, Src0Legal);
- }
+ Operand *Src = Inst->getSrc(0);
+ assert(Dest->getType() == Src->getType());
+ lowerMove(Dest, Src, false);
}
template <class Machine>
-void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {
- if (Inst->isUnconditional()) {
- _br(Inst->getTargetUnconditional());
+void TargetX86Base<Machine>::lowerBr(const InstBr *Br) {
+ if (Br->isUnconditional()) {
+ _br(Br->getTargetUnconditional());
return;
}
- Operand *Cond = Inst->getCondition();
+ Operand *Cond = Br->getCondition();
// Handle folding opportunities.
- if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
+ if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
assert(Producer->isDeleted());
switch (BoolFolding::getProducerKind(Producer)) {
default:
break;
case BoolFolding::PK_Icmp32:
case BoolFolding::PK_Icmp64: {
- lowerIcmpAndBr(llvm::dyn_cast<InstIcmp>(Producer), Inst);
+ lowerIcmpAndConsumer(llvm::dyn_cast<InstIcmp>(Producer), Br);
return;
}
case BoolFolding::PK_Fcmp: {
- lowerFcmpAndBr(llvm::dyn_cast<InstFcmp>(Producer), Inst);
+ lowerFcmpAndConsumer(llvm::dyn_cast<InstFcmp>(Producer), Br);
return;
}
case BoolFolding::PK_Arith: {
- lowerArithAndBr(llvm::dyn_cast<InstArithmetic>(Producer), Inst);
+ lowerArithAndConsumer(llvm::dyn_cast<InstArithmetic>(Producer), Br);
return;
}
}
@@ -1949,7 +1949,7 @@ void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {
Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
Constant *Zero = Ctx->getConstantZero(IceType_i32);
_cmp(Src0, Zero);
- _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
+ _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
}
template <class Machine>
@@ -2483,76 +2483,32 @@ void TargetX86Base<Machine>::lowerExtractElement(
}
template <class Machine>
-void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {
- constexpr InstBr *Br = nullptr;
- lowerFcmpAndBr(Inst, Br);
-}
-
-template <class Machine>
-void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst,
- const InstBr *Br) {
- Operand *Src0 = Inst->getSrc(0);
- Operand *Src1 = Inst->getSrc(1);
- Variable *Dest = Inst->getDest();
+void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Fcmp) {
+ Variable *Dest = Fcmp->getDest();
if (isVectorType(Dest->getType())) {
- if (Br)
- llvm::report_fatal_error("vector compare/branch cannot be folded");
- InstFcmp::FCond Condition = Inst->getCondition();
- size_t Index = static_cast<size_t>(Condition);
- assert(Index < Traits::TableFcmpSize);
-
- if (Traits::TableFcmp[Index].SwapVectorOperands)
- std::swap(Src0, Src1);
-
- Variable *T = nullptr;
+ lowerFcmpVector(Fcmp);
+ } else {
+ constexpr Inst *Consumer = nullptr;
+ lowerFcmpAndConsumer(Fcmp, Consumer);
+ }
+}
- if (Condition == InstFcmp::True) {
- // makeVectorOfOnes() requires an integer vector type.
- T = makeVectorOfMinusOnes(IceType_v4i32);
- } else if (Condition == InstFcmp::False) {
- T = makeVectorOfZeros(Dest->getType());
- } else {
- Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
- Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
- if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
- Src1RM = legalizeToReg(Src1RM);
-
- switch (Condition) {
- default: {
- typename Traits::Cond::CmppsCond Predicate =
- Traits::TableFcmp[Index].Predicate;
- assert(Predicate != Traits::Cond::Cmpps_Invalid);
- T = makeReg(Src0RM->getType());
- _movp(T, Src0RM);
- _cmpps(T, Src1RM, Predicate);
- } break;
- case InstFcmp::One: {
- // Check both unequal and ordered.
- T = makeReg(Src0RM->getType());
- Variable *T2 = makeReg(Src0RM->getType());
- _movp(T, Src0RM);
- _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
- _movp(T2, Src0RM);
- _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
- _pand(T, T2);
- } break;
- case InstFcmp::Ueq: {
- // Check both equal or unordered.
- T = makeReg(Src0RM->getType());
- Variable *T2 = makeReg(Src0RM->getType());
- _movp(T, Src0RM);
- _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
- _movp(T2, Src0RM);
- _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
- _por(T, T2);
- } break;
- }
+template <class Machine>
+void TargetX86Base<Machine>::lowerFcmpAndConsumer(const InstFcmp *Fcmp,
+ const Inst *Consumer) {
+ Operand *Src0 = Fcmp->getSrc(0);
+ Operand *Src1 = Fcmp->getSrc(1);
+ Variable *Dest = Fcmp->getDest();
+
+ if (isVectorType(Dest->getType()))
+ llvm::report_fatal_error("Vector compare/branch cannot be folded");
+
+ if (Consumer != nullptr) {
John 2015/12/07 13:07:44 Consider moving this block down to where InstSelec
sehr 2015/12/15 20:45:44 It's up here as a short-circuit before any lowerin
+ if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
+ if (lowerOptimizeFcmpSelect(Fcmp, Select))
+ return;
}
-
- _movp(Dest, T);
- eliminateNextVectorSextInstruction(Dest);
- return;
}
// Lowering a = fcmp cond, b, c
@@ -2568,7 +2524,7 @@ void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst,
// setcc lowering when C1 != Br_None && C2 == Br_None:
// ucomiss b, c /* but swap b,c order if SwapOperands==true */
// setcc a, C1
- InstFcmp::FCond Condition = Inst->getCondition();
+ InstFcmp::FCond Condition = Fcmp->getCondition();
size_t Index = static_cast<size_t>(Condition);
assert(Index < Traits::TableFcmpSize);
if (Traits::TableFcmp[Index].SwapScalarOperands)
@@ -2583,12 +2539,12 @@ void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst,
_ucomiss(T, Src1RM);
if (!HasC2) {
assert(Traits::TableFcmp[Index].Default);
- setccOrBr(Traits::TableFcmp[Index].C1, Dest, Br);
+ setccOrConsumer(Traits::TableFcmp[Index].C1, Dest, Consumer);
return;
}
}
int32_t IntDefault = Traits::TableFcmp[Index].Default;
- if (Br == nullptr) {
+ if (Consumer == nullptr) {
Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault);
_mov(Dest, Default);
if (HasC1) {
@@ -2602,7 +2558,7 @@ void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst,
_mov_redefined(Dest, NonDefault);
Context.insert(Label);
}
Jim Stichnoth 2015/12/08 18:55:59 May be an opportunity to put an early return in ea
sehr 2015/12/15 20:45:44 Done, here and everywhere.
- } else {
+ } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
CfgNode *TrueSucc = Br->getTargetTrue();
CfgNode *FalseSucc = Br->getTargetFalse();
if (IntDefault != 0)
@@ -2616,143 +2572,114 @@ void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst,
return;
}
_br(FalseSucc);
+ } else if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
+ Operand *SrcT = Select->getTrueOperand();
+ Operand *SrcF = Select->getFalseOperand();
+ Variable *SelectDest = Select->getDest();
+ if (IntDefault != 0)
+ std::swap(SrcT, SrcF);
+ lowerMove(SelectDest, SrcF, false);
+ if (HasC1) {
+ typename Traits::Insts::Label *Label =
+ Traits::Insts::Label::create(Func, this);
+ _br(Traits::TableFcmp[Index].C1, Label);
+ if (HasC2) {
+ _br(Traits::TableFcmp[Index].C2, Label);
+ }
+ static constexpr bool IsRedefinition = true;
+ lowerMove(SelectDest, SrcT, IsRedefinition);
+ Context.insert(Label);
+ }
+ } else {
+ llvm::report_fatal_error("Unexpected consumer type");
}
}
-inline bool isZero(const Operand *Opnd) {
- if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd))
- return C64->getValue() == 0;
- if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd))
- return C32->getValue() == 0;
- return false;
-}
-
template <class Machine>
-void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) {
- constexpr InstBr *Br = nullptr;
- lowerIcmpAndBr(Inst, Br);
-}
+void TargetX86Base<Machine>::lowerFcmpVector(const InstFcmp *Fcmp) {
+ Operand *Src0 = Fcmp->getSrc(0);
+ Operand *Src1 = Fcmp->getSrc(1);
+ Variable *Dest = Fcmp->getDest();
-template <class Machine>
-void TargetX86Base<Machine>::lowerIcmpAndBr(const InstIcmp *Icmp,
- const InstBr *Br) {
- Operand *Src0 = legalize(Icmp->getSrc(0));
- Operand *Src1 = legalize(Icmp->getSrc(1));
- Variable *Dest = Icmp->getDest();
+ if (!isVectorType(Dest->getType()))
+ llvm::report_fatal_error("Expected vector compare");
- if (isVectorType(Dest->getType())) {
- if (Br)
- llvm::report_fatal_error("vector compare/branch cannot be folded");
- Type Ty = Src0->getType();
- // Promote i1 vectors to 128 bit integer vector types.
- if (typeElementType(Ty) == IceType_i1) {
- Type NewTy = IceType_NUM;
- switch (Ty) {
- default:
- llvm_unreachable("unexpected type");
- break;
- case IceType_v4i1:
- NewTy = IceType_v4i32;
- break;
- case IceType_v8i1:
- NewTy = IceType_v8i16;
- break;
- case IceType_v16i1:
- NewTy = IceType_v16i8;
- break;
- }
- Variable *NewSrc0 = Func->makeVariable(NewTy);
- Variable *NewSrc1 = Func->makeVariable(NewTy);
- lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
- lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
- Src0 = NewSrc0;
- Src1 = NewSrc1;
- Ty = NewTy;
- }
+ InstFcmp::FCond Condition = Fcmp->getCondition();
+ size_t Index = static_cast<size_t>(Condition);
+ assert(Index < Traits::TableFcmpSize);
+
+ if (Traits::TableFcmp[Index].SwapVectorOperands)
+ std::swap(Src0, Src1);
- InstIcmp::ICond Condition = Icmp->getCondition();
+ Variable *T = nullptr;
+ if (Condition == InstFcmp::True) {
+ // makeVectorOfOnes() requires an integer vector type.
+ T = makeVectorOfMinusOnes(IceType_v4i32);
+ } else if (Condition == InstFcmp::False) {
+ T = makeVectorOfZeros(Dest->getType());
+ } else {
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+ if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
+ Src1RM = legalizeToReg(Src1RM);
- // SSE2 only has signed comparison operations. Transform unsigned inputs in
- // a manner that allows for the use of signed comparison operations by
- // flipping the high order bits.
- if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
- Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
- Variable *T0 = makeReg(Ty);
- Variable *T1 = makeReg(Ty);
- Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
- _movp(T0, Src0RM);
- _pxor(T0, HighOrderBits);
- _movp(T1, Src1RM);
- _pxor(T1, HighOrderBits);
- Src0RM = T0;
- Src1RM = T1;
- }
-
- Variable *T = makeReg(Ty);
switch (Condition) {
- default:
- llvm_unreachable("unexpected condition");
- break;
- case InstIcmp::Eq: {
- if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
- Src1RM = legalizeToReg(Src1RM);
+ default: {
+ typename Traits::Cond::CmppsCond Predicate =
+ Traits::TableFcmp[Index].Predicate;
+ assert(Predicate != Traits::Cond::Cmpps_Invalid);
+ T = makeReg(Src0RM->getType());
_movp(T, Src0RM);
- _pcmpeq(T, Src1RM);
+ _cmpps(T, Src1RM, Predicate);
} break;
- case InstIcmp::Ne: {
- if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
- Src1RM = legalizeToReg(Src1RM);
+ case InstFcmp::One: {
+ // Check both unequal and ordered.
+ T = makeReg(Src0RM->getType());
+ Variable *T2 = makeReg(Src0RM->getType());
_movp(T, Src0RM);
- _pcmpeq(T, Src1RM);
- Variable *MinusOne = makeVectorOfMinusOnes(Ty);
- _pxor(T, MinusOne);
+ _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
+ _movp(T2, Src0RM);
+ _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
+ _pand(T, T2);
} break;
- case InstIcmp::Ugt:
- case InstIcmp::Sgt: {
- if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
- Src1RM = legalizeToReg(Src1RM);
+ case InstFcmp::Ueq: {
+ // Check both equal or unordered.
+ T = makeReg(Src0RM->getType());
+ Variable *T2 = makeReg(Src0RM->getType());
_movp(T, Src0RM);
- _pcmpgt(T, Src1RM);
- } break;
- case InstIcmp::Uge:
- case InstIcmp::Sge: {
- // !(Src1RM > Src0RM)
- if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
- Src0RM = legalizeToReg(Src0RM);
- _movp(T, Src1RM);
- _pcmpgt(T, Src0RM);
- Variable *MinusOne = makeVectorOfMinusOnes(Ty);
- _pxor(T, MinusOne);
- } break;
- case InstIcmp::Ult:
- case InstIcmp::Slt: {
- if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
- Src0RM = legalizeToReg(Src0RM);
- _movp(T, Src1RM);
- _pcmpgt(T, Src0RM);
- } break;
- case InstIcmp::Ule:
- case InstIcmp::Sle: {
- // !(Src0RM > Src1RM)
- if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
- Src1RM = legalizeToReg(Src1RM);
- _movp(T, Src0RM);
- _pcmpgt(T, Src1RM);
- Variable *MinusOne = makeVectorOfMinusOnes(Ty);
- _pxor(T, MinusOne);
+ _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
+ _movp(T2, Src0RM);
+ _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
+ _por(T, T2);
} break;
}
-
- _movp(Dest, T);
- eliminateNextVectorSextInstruction(Dest);
- return;
}
John 2015/12/07 13:07:44 assert(T != nullptr), maybe?
sehr 2015/12/15 20:45:44 Agreed. Done.
+ _movp(Dest, T);
+ eliminateNextVectorSextInstruction(Dest);
+}
+
+inline bool isZero(const Operand *Opnd) {
+ if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd))
+ return C64->getValue() == 0;
+ if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd))
+ return C32->getValue() == 0;
+ return false;
+}
+
+template <class Machine>
+void TargetX86Base<Machine>::lowerIcmpAndConsumer(const InstIcmp *Icmp,
+ const Inst *Consumer) {
+ Operand *Src0 = legalize(Icmp->getSrc(0));
+ Operand *Src1 = legalize(Icmp->getSrc(1));
+ Variable *Dest = Icmp->getDest();
+
+ if (isVectorType(Dest->getType()))
+ llvm::report_fatal_error("Vector compare/branch cannot be folded");
+
if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
- lowerIcmp64(Icmp, Br);
+ lowerIcmp64(Icmp, Consumer);
return;
}
@@ -2762,22 +2689,140 @@ void TargetX86Base<Machine>::lowerIcmpAndBr(const InstIcmp *Icmp,
default:
break;
case InstIcmp::Uge:
- movOrBr(true, Dest, Br);
+ movOrConsumer(true, Dest, Consumer);
return;
case InstIcmp::Ult:
- movOrBr(false, Dest, Br);
+ movOrConsumer(false, Dest, Consumer);
return;
}
}
Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
_cmp(Src0RM, Src1);
- setccOrBr(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest, Br);
+ setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest,
+ Consumer);
+}
+
+template <class Machine>
+void TargetX86Base<Machine>::lowerIcmpVector(const InstIcmp *Icmp) {
+ Operand *Src0 = legalize(Icmp->getSrc(0));
+ Operand *Src1 = legalize(Icmp->getSrc(1));
+ Variable *Dest = Icmp->getDest();
+
+ if (!isVectorType(Dest->getType()))
+ llvm::report_fatal_error("Expected a vector compare");
+
+ Type Ty = Src0->getType();
+ // Promote i1 vectors to 128 bit integer vector types.
+ if (typeElementType(Ty) == IceType_i1) {
John 2015/12/07 13:07:44 optional: maybe create a helper function for conve
sehr 2015/12/15 20:45:44 I think there's only one use (for now), but intend
+ Type NewTy = IceType_NUM;
+ switch (Ty) {
+ default:
+ llvm_unreachable("unexpected type");
Jim Stichnoth 2015/12/08 18:55:59 I think report_fatal_error would be better here.
sehr 2015/12/15 20:45:44 Done.
+ break;
+ case IceType_v4i1:
+ NewTy = IceType_v4i32;
+ break;
+ case IceType_v8i1:
+ NewTy = IceType_v8i16;
+ break;
+ case IceType_v16i1:
+ NewTy = IceType_v16i8;
+ break;
+ }
+ Variable *NewSrc0 = Func->makeVariable(NewTy);
+ Variable *NewSrc1 = Func->makeVariable(NewTy);
+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
+ Src0 = NewSrc0;
+ Src1 = NewSrc1;
+ Ty = NewTy;
+ }
+
+ InstIcmp::ICond Condition = Icmp->getCondition();
+
+ Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+
+ // SSE2 only has signed comparison operations. Transform unsigned inputs in
+ // a manner that allows for the use of signed comparison operations by
+ // flipping the high order bits.
+ if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
+ Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
+ Variable *T0 = makeReg(Ty);
+ Variable *T1 = makeReg(Ty);
+ Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
+ _movp(T0, Src0RM);
+ _pxor(T0, HighOrderBits);
+ _movp(T1, Src1RM);
+ _pxor(T1, HighOrderBits);
+ Src0RM = T0;
+ Src1RM = T1;
+ }
+
+ Variable *T = makeReg(Ty);
+ switch (Condition) {
+ default:
+ llvm_unreachable("unexpected condition");
+ break;
+ case InstIcmp::Eq: {
+ if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
+ Src1RM = legalizeToReg(Src1RM);
+ _movp(T, Src0RM);
+ _pcmpeq(T, Src1RM);
+ } break;
+ case InstIcmp::Ne: {
+ if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
+ Src1RM = legalizeToReg(Src1RM);
+ _movp(T, Src0RM);
+ _pcmpeq(T, Src1RM);
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);
+ _pxor(T, MinusOne);
+ } break;
+ case InstIcmp::Ugt:
+ case InstIcmp::Sgt: {
+ if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
+ Src1RM = legalizeToReg(Src1RM);
+ _movp(T, Src0RM);
+ _pcmpgt(T, Src1RM);
+ } break;
+ case InstIcmp::Uge:
+ case InstIcmp::Sge: {
+ // !(Src1RM > Src0RM)
+ if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
+ Src0RM = legalizeToReg(Src0RM);
+ _movp(T, Src1RM);
+ _pcmpgt(T, Src0RM);
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);
+ _pxor(T, MinusOne);
+ } break;
+ case InstIcmp::Ult:
+ case InstIcmp::Slt: {
+ if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
+ Src0RM = legalizeToReg(Src0RM);
+ _movp(T, Src1RM);
+ _pcmpgt(T, Src0RM);
+ } break;
+ case InstIcmp::Ule:
+ case InstIcmp::Sle: {
+ // !(Src0RM > Src1RM)
+ if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
+ Src1RM = legalizeToReg(Src1RM);
+ _movp(T, Src0RM);
+ _pcmpgt(T, Src1RM);
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);
+ _pxor(T, MinusOne);
+ } break;
+ }
+
+ _movp(Dest, T);
+ eliminateNextVectorSextInstruction(Dest);
}
template <typename Machine>
template <typename T>
typename std::enable_if<!T::Is64Bit, void>::type
-TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) {
+TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp,
+ const Inst *Consumer) {
// a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
Operand *Src0 = legalize(Icmp->getSrc(0));
Operand *Src1 = legalize(Icmp->getSrc(1));
@@ -2835,7 +2880,7 @@ TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) {
_mov(Temp, Src0HiRM);
_or(Temp, Src0LoRM);
Context.insert(InstFakeUse::create(Func, Temp));
- setccOrBr(Traits::Cond::Br_e, Dest, Br);
+ setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer);
return;
case InstIcmp::Ne:
case InstIcmp::Ugt:
@@ -2844,23 +2889,23 @@ TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) {
_mov(Temp, Src0HiRM);
_or(Temp, Src0LoRM);
Context.insert(InstFakeUse::create(Func, Temp));
- setccOrBr(Traits::Cond::Br_ne, Dest, Br);
+ setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer);
return;
case InstIcmp::Uge:
- movOrBr(true, Dest, Br);
+ movOrConsumer(true, Dest, Consumer);
return;
case InstIcmp::Ult:
- movOrBr(false, Dest, Br);
+ movOrConsumer(false, Dest, Consumer);
return;
case InstIcmp::Sgt:
break;
case InstIcmp::Sge:
_test(Src0HiRM, SignMask);
- setccOrBr(Traits::Cond::Br_e, Dest, Br);
+ setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer);
return;
case InstIcmp::Slt:
_test(Src0HiRM, SignMask);
- setccOrBr(Traits::Cond::Br_ne, Dest, Br);
+ setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer);
return;
case InstIcmp::Sle:
break;
@@ -2869,7 +2914,7 @@ TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) {
// Handle general compares.
Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
- if (Br == nullptr) {
+ if (Consumer == nullptr) {
Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0);
Constant *One = Ctx->getConstantInt(Dest->getType(), 1);
typename Traits::Insts::Label *LabelFalse =
@@ -2887,7 +2932,7 @@ TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) {
Context.insert(LabelFalse);
_mov_redefined(Dest, Zero);
Context.insert(LabelTrue);
- } else {
+ } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
_cmp(Src0HiRM, Src1HiRI);
if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
_br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue());
@@ -2896,37 +2941,78 @@ TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) {
_cmp(Src0LoRM, Src1LoRI);
_br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(),
Br->getTargetFalse());
+ } else if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
+ Operand *SrcT = Select->getTrueOperand();
+ Operand *SrcF = Select->getFalseOperand();
+ Variable *SelectDest = Select->getDest();
+ typename Traits::Insts::Label *LabelFalse =
+ Traits::Insts::Label::create(Func, this);
+ typename Traits::Insts::Label *LabelTrue =
+ Traits::Insts::Label::create(Func, this);
+ lowerMove(SelectDest, SrcT, false);
+ _cmp(Src0HiRM, Src1HiRI);
+ if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
+ _br(Traits::TableIcmp64[Index].C1, LabelTrue);
+ if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
+ _br(Traits::TableIcmp64[Index].C2, LabelFalse);
+ _cmp(Src0LoRM, Src1LoRI);
+ _br(Traits::TableIcmp64[Index].C3, LabelTrue);
+ Context.insert(LabelFalse);
+ static constexpr bool IsRedefinition = true;
+ lowerMove(SelectDest, SrcF, IsRedefinition);
+ Context.insert(LabelTrue);
+ } else {
+ llvm::report_fatal_error("Unexpected consumer type");
}
}
template <class Machine>
-void TargetX86Base<Machine>::setccOrBr(typename Traits::Cond::BrCond Condition,
- Variable *Dest, const InstBr *Br) {
- if (Br == nullptr) {
+void TargetX86Base<Machine>::setccOrConsumer(
+ typename Traits::Cond::BrCond Condition, Variable *Dest,
+ const Inst *Consumer) {
+ if (Consumer == nullptr) {
_setcc(Dest, Condition);
- } else {
+ } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
_br(Condition, Br->getTargetTrue(), Br->getTargetFalse());
+ } else if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
+ Operand *SrcT = Select->getTrueOperand();
+ Operand *SrcF = Select->getFalseOperand();
+ Variable *SelectDest = Select->getDest();
+ lowerSelectMove(SelectDest, Condition, SrcT, SrcF);
+ } else {
+ llvm::report_fatal_error("Unexpected consumer type");
}
}
template <class Machine>
-void TargetX86Base<Machine>::movOrBr(bool IcmpResult, Variable *Dest,
- const InstBr *Br) {
- if (Br == nullptr) {
+void TargetX86Base<Machine>::movOrConsumer(bool IcmpResult, Variable *Dest,
+ const Inst *Consumer) {
+ if (Consumer == nullptr) {
_mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
- } else {
+ } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
// TODO(sehr,stichnot): This could be done with a single unconditional
// branch instruction, but subzero doesn't know how to handle the resulting
// control flow graph changes now. Make it do so to eliminate mov and cmp.
_mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
_cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0));
_br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
+ } else if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
+ Operand *Src = nullptr;
+ if (IcmpResult) {
+ Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm);
+ } else {
+ Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm);
+ }
+ Variable *SelectDest = Select->getDest();
+ lowerMove(SelectDest, Src, false);
+ } else {
+ llvm::report_fatal_error("Unexpected consumer type");
}
}
template <class Machine>
-void TargetX86Base<Machine>::lowerArithAndBr(const InstArithmetic *Arith,
- const InstBr *Br) {
+void TargetX86Base<Machine>::lowerArithAndConsumer(const InstArithmetic *Arith,
+ const Inst *Consumer) {
Variable *T = nullptr;
Operand *Src0 = legalize(Arith->getSrc(0));
Operand *Src1 = legalize(Arith->getSrc(1));
@@ -2950,9 +3036,16 @@ void TargetX86Base<Machine>::lowerArithAndBr(const InstArithmetic *Arith,
_or(T, Src1);
break;
}
- Context.insert(InstFakeUse::create(Func, T));
- Context.insert(InstFakeDef::create(Func, Dest));
- _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
+
+ if (Consumer == nullptr) {
+ llvm::report_fatal_error("Expected a consumer instruction");
+ } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
+ Context.insert(InstFakeUse::create(Func, T));
+ Context.insert(InstFakeDef::create(Func, Dest));
+ _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
+ } else {
+ llvm::report_fatal_error("Unexpected consumer type");
+ }
}
template <class Machine>
@@ -4617,96 +4710,47 @@ void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) {
}
template <class Machine>
-void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
- Variable *Dest = Inst->getDest();
- Type DestTy = Dest->getType();
- Operand *SrcT = Inst->getTrueOperand();
- Operand *SrcF = Inst->getFalseOperand();
- Operand *Condition = Inst->getCondition();
-
- if (isVectorType(DestTy)) {
- Type SrcTy = SrcT->getType();
- Variable *T = makeReg(SrcTy);
- Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
- Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
- if (InstructionSet >= Traits::SSE4_1) {
- // TODO(wala): If the condition operand is a constant, use blendps or
- // pblendw.
- //
- // Use blendvps or pblendvb to implement select.
- if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
- SrcTy == IceType_v4f32) {
- Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
- Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
- _movp(xmm0, ConditionRM);
- _psll(xmm0, Ctx->getConstantInt8(31));
- _movp(T, SrcFRM);
- _blendvps(T, SrcTRM, xmm0);
- _movp(Dest, T);
- } else {
- assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
- Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
- : IceType_v16i8;
- Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
- lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
- _movp(T, SrcFRM);
- _pblendvb(T, SrcTRM, xmm0);
- _movp(Dest, T);
- }
- return;
- }
- // Lower select without Traits::SSE4.1:
- // a=d?b:c ==>
- // if elementtype(d) != i1:
- // d=sext(d);
- // a=(b&d)|(c&~d);
- Variable *T2 = makeReg(SrcTy);
- // Sign extend the condition operand if applicable.
- if (SrcTy == IceType_v4f32) {
- // The sext operation takes only integer arguments.
- Variable *T3 = Func->makeVariable(IceType_v4i32);
- lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
- _movp(T, T3);
- } else if (typeElementType(SrcTy) != IceType_i1) {
- lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
- } else {
- Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
- _movp(T, ConditionRM);
- }
- _movp(T2, T);
- _pand(T, SrcTRM);
- _pandn(T2, SrcFRM);
- _por(T, T2);
- _movp(Dest, T);
+void TargetX86Base<Machine>::lowerSelect(const InstSelect *Select) {
+ Variable *Dest = Select->getDest();
+ if (isVectorType(Dest->getType())) {
+ lowerSelectVector(Select);
return;
}
- typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne;
- Operand *CmpOpnd0 = nullptr;
- Operand *CmpOpnd1 = nullptr;
+ Operand *Condition = Select->getCondition();
// Handle folding opportunities.
- if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
+ if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
assert(Producer->isDeleted());
switch (BoolFolding::getProducerKind(Producer)) {
default:
break;
- case BoolFolding::PK_Icmp32: {
- auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
- Cond = Traits::getIcmp32Mapping(Cmp->getCondition());
- CmpOpnd1 = legalize(Producer->getSrc(1));
- CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
- } break;
+ case BoolFolding::PK_Icmp32:
+ case BoolFolding::PK_Icmp64: {
+ lowerIcmpAndConsumer(llvm::dyn_cast<InstIcmp>(Producer), Select);
+ return;
+ }
+ case BoolFolding::PK_Fcmp: {
+ lowerFcmpAndConsumer(llvm::dyn_cast<InstFcmp>(Producer), Select);
+ return;
+ }
}
}
- if (CmpOpnd0 == nullptr) {
- CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
- CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
- }
- assert(CmpOpnd0);
- assert(CmpOpnd1);
- _cmp(CmpOpnd0, CmpOpnd1);
+ Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem);
+ Operand *Zero = Ctx->getConstantZero(IceType_i32);
+ _cmp(CmpResult, Zero);
+ Operand *SrcT = Select->getTrueOperand();
+ Operand *SrcF = Select->getFalseOperand();
+ const typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne;
+ lowerSelectMove(Dest, Cond, SrcT, SrcF);
+}
+
+template <class Machine>
+void TargetX86Base<Machine>::lowerSelectMove(Variable *Dest,
+ typename Traits::Cond::BrCond Cond,
+ Operand *SrcT, Operand *SrcF) {
+ Type DestTy = Dest->getType();
if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
// The cmov instruction doesn't allow 8-bit or FP operands, so we need
// explicit control flow.
@@ -4734,25 +4778,22 @@ void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
SrcF = legalizeUndef(SrcF);
// Set the low portion.
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *TLo = nullptr;
- Operand *SrcFLo = legalize(loOperand(SrcF));
- _mov(TLo, SrcFLo);
- Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
- _cmov(TLo, SrcTLo, Cond);
- _mov(DestLo, TLo);
+ lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF));
// Set the high portion.
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Variable *THi = nullptr;
- Operand *SrcFHi = legalize(hiOperand(SrcF));
- _mov(THi, SrcFHi);
- Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
- _cmov(THi, SrcTHi, Cond);
- _mov(DestHi, THi);
+ lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF));
return;
}
assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
(Traits::Is64Bit && DestTy == IceType_i64));
+ lowerSelectIntMove(Dest, Cond, SrcT, SrcF);
+}
+
+template <class Machine>
+void TargetX86Base<Machine>::lowerSelectIntMove(
+ Variable *Dest, typename Traits::Cond::BrCond Cond, Operand *SrcT,
+ Operand *SrcF) {
Variable *T = nullptr;
SrcF = legalize(SrcF);
_mov(T, SrcF);
@@ -4762,6 +4803,165 @@ void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
}
template <class Machine>
+void TargetX86Base<Machine>::lowerMove(Variable *Dest, Operand *Src,
John 2015/12/07 13:07:44 What do you think about asserting !Src->isRemateri
sehr 2015/12/15 20:45:44 Done.
+ bool IsRedefinition) {
+ assert(Dest->getType() == Src->getType());
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ Src = legalize(Src);
+ Operand *SrcLo = loOperand(Src);
+ Operand *SrcHi = hiOperand(Src);
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Variable *T_Lo = nullptr, *T_Hi = nullptr;
+ _mov(T_Lo, SrcLo);
+ if (IsRedefinition) {
John 2015/12/07 13:07:44 I would personally do _mov(DestLo, T_Lo); if (IsR
Jim Stichnoth 2015/12/08 18:55:59 I agree with John's preference on this. Thinking
sehr 2015/12/15 20:45:44 Followed Jim's extension of this suggestion.
sehr 2015/12/15 20:45:44 Done.
+ _mov_redefined(DestLo, T_Lo);
+ } else {
+ _mov(DestLo, T_Lo);
+ }
+ _mov(T_Hi, SrcHi);
+ if (IsRedefinition) {
+ _mov_redefined(DestHi, T_Hi);
+ } else {
+ _mov(DestHi, T_Hi);
+ }
+ } else {
+ Operand *SrcLegal;
+ if (Dest->hasReg()) {
+ // If Dest already has a physical register, then only basic legalization
+ // is needed, as the source operand can be a register, immediate, or
+ // memory.
+ SrcLegal = legalize(Src, Legal_Reg, Dest->getRegNum());
+ } else {
+ // If Dest could be a stack operand, then RI must be a physical register
+ // or a scalar integer immediate.
+ SrcLegal = legalize(Src, Legal_Reg | Legal_Imm);
+ }
+ if (isVectorType(Dest->getType())) {
+ if (IsRedefinition) {
+ _movp_redefined(Dest, SrcLegal);
+ } else {
+ _movp(Dest, SrcLegal);
+ }
+ } else {
+ if (IsRedefinition) {
+ _mov_redefined(Dest, SrcLegal);
+ } else {
+ _mov(Dest, SrcLegal);
+ }
+ }
+ }
+}
+
+template <class Machine>
+bool TargetX86Base<Machine>::lowerOptimizeFcmpSelect(const InstFcmp *Fcmp,
+ const InstSelect *Select) {
+ Operand *CmpSrc0 = Fcmp->getSrc(0);
+ Operand *CmpSrc1 = Fcmp->getSrc(1);
+ Operand *SelectSrcT = Select->getTrueOperand();
+ Operand *SelectSrcF = Select->getFalseOperand();
+
+ if (CmpSrc0->getType() != SelectSrcT->getType())
+ return false;
+
+ // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here.
+ InstFcmp::FCond Condition = Fcmp->getCondition();
+ switch (Condition) {
+ default:
+ return false;
+ case InstFcmp::True:
+ case InstFcmp::False:
+ case InstFcmp::Ogt:
+ case InstFcmp::Olt:
+ (void)CmpSrc0;
+ (void)CmpSrc1;
+ (void)SelectSrcT;
+ (void)SelectSrcF;
+ break;
+ }
+ return false;
+}
+
+template <class Machine>
+void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Icmp) {
+ Variable *Dest = Icmp->getDest();
+ if (isVectorType(Dest->getType())) {
+ lowerIcmpVector(Icmp);
+ } else {
+ constexpr Inst *Consumer = nullptr;
+ lowerIcmpAndConsumer(Icmp, Consumer);
+ }
+}
+
+template <class Machine>
+void TargetX86Base<Machine>::lowerSelectVector(const InstSelect *Inst) {
+ Variable *Dest = Inst->getDest();
+ Type DestTy = Dest->getType();
+ Operand *SrcT = Inst->getTrueOperand();
+ Operand *SrcF = Inst->getFalseOperand();
+ Operand *Condition = Inst->getCondition();
+
+ if (!isVectorType(DestTy))
+ llvm::report_fatal_error("Expected a vector select");
+
+ Type SrcTy = SrcT->getType();
+ Variable *T = makeReg(SrcTy);
+ Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
+ Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
+ if (InstructionSet >= Traits::SSE4_1) {
+ // TODO(wala): If the condition operand is a constant, use blendps or
+ // pblendw.
+ //
+ // Use blendvps or pblendvb to implement select.
+ if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
+ SrcTy == IceType_v4f32) {
+ Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
+ Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
+ _movp(xmm0, ConditionRM);
+ _psll(xmm0, Ctx->getConstantInt8(31));
+ _movp(T, SrcFRM);
+ _blendvps(T, SrcTRM, xmm0);
+ _movp(Dest, T);
+ } else {
+ assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
+ Type SignExtTy =
+ Condition->getType() == IceType_v8i1 ? IceType_v8i16 : IceType_v16i8;
+ Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
+ lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
+ _movp(T, SrcFRM);
+ _pblendvb(T, SrcTRM, xmm0);
+ _movp(Dest, T);
+ }
+ return;
+ }
+ // Lower select without Traits::SSE4.1:
+ // a=d?b:c ==>
+ // if elementtype(d) != i1:
+ // d=sext(d);
+ // a=(b&d)|(c&~d);
+ Variable *T2 = makeReg(SrcTy);
+ // Sign extend the condition operand if applicable.
+ if (SrcTy == IceType_v4f32) {
+ // The sext operation takes only integer arguments.
+ Variable *T3 = Func->makeVariable(IceType_v4i32);
+ lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
+ _movp(T, T3);
+ } else if (typeElementType(SrcTy) != IceType_i1) {
+ lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
+ } else {
+ Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
+ _movp(T, ConditionRM);
+ }
+ _movp(T2, T);
+ _pand(T, SrcTRM);
+ _pandn(T2, SrcFRM);
+ _por(T, T2);
+ _movp(Dest, T);
+
+ return;
+}
+
+template <class Machine>
void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
Operand *Value = Inst->getData();
Operand *Addr = Inst->getAddr();
@@ -5521,7 +5721,7 @@ Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) {
case IceType_f64:
Context.insert(InstFakeDef::create(Func, Reg));
// TODO(stichnot): Use xorps/xorpd instead of pxor.
Jim Stichnoth 2015/12/08 18:55:59 TODONE?
sehr 2015/12/15 20:45:44 TODONEDONE.
- _pxor(Reg, Reg);
+ _xorps(Reg, Reg);
break;
default:
// All vector types use the same pxor instruction.

Powered by Google App Engine
This is Rietveld 408576698