Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index 00db25a5a29823b3411b6b030f5fd174184aeb55..cc6f2226a96984d3eb4e61c04dc33fa32b780a93 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -22,6 +22,7 @@ |
#include "IceOperand.h" |
#include "IceTargetLoweringX8632.def" |
#include "IceTargetLoweringX8632.h" |
+#include "llvm/Support/CommandLine.h" |
namespace Ice { |
@@ -123,6 +124,17 @@ const unsigned X86_MAX_XMM_ARGS = 4; |
// The number of bits in a byte |
const unsigned X86_CHAR_BIT = 8; |
+// Instruction set options |
+namespace cl = ::llvm::cl; |
+cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( |
+ "mattr", cl::desc("X86 target attributes"), |
+ cl::init(TargetX8632::SSE2), |
+ cl::values( |
+ clEnumValN(TargetX8632::SSE2, "sse2", |
+ "Enable SSE2 instructions (default)"), |
+ clEnumValN(TargetX8632::SSE4_1, "sse4.1", |
+ "Enable SSE 4.1 instructions"), clEnumValEnd)); |
+ |
// Return a string representation of the type that is suitable for use |
// in an identifier. |
IceString typeIdentString(const Type Ty) { |
@@ -234,8 +246,9 @@ void __attribute__((unused)) xMacroIntegrityCheck() { |
} // end of anonymous namespace |
TargetX8632::TargetX8632(Cfg *Func) |
- : TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0), |
- LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), |
+ : TargetLowering(Func), InstructionSet(CLInstructionSet), |
+ IsEbpBasedFrame(false), FrameSizeLocals(0), LocalsSizeBytes(0), |
+ NextLabelNumber(0), ComputedLiveRanges(false), |
PhysicalRegisters(VarList(Reg_NUM)) { |
// TODO: Don't initialize IntegerRegisters and friends every time. |
// Instead, initialize in some sort of static initializer for the |
@@ -1228,7 +1241,16 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
_movp(Dest, T); |
} break; |
case InstArithmetic::Mul: { |
- if (Dest->getType() == IceType_v4i32) { |
+ bool TypesAreValidForPmull = |
+ Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; |
+ bool InstructionSetIsValidForPmull = |
+ Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1; |
+ if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { |
+ Variable *T = makeReg(Dest->getType()); |
+ _movp(T, Src0); |
+ _pmull(T, legalizeToVar(Src1)); |
+ _movp(Dest, T); |
+ } else if (Dest->getType() == IceType_v4i32) { |
// Lowering sequence: |
// Note: The mask arguments have index 0 on the left. |
// |
@@ -1243,8 +1265,6 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
// shufps T1, T2, {0,2,0,2} |
// pshufd T4, T1, {0,2,1,3} |
// movups Dest, T4 |
- // |
- // TODO(wala): SSE4.1 has pmulld. |
// Mask that directs pshufd to create a vector with entries |
// Src[1, 0, 3, 0] |
@@ -1273,11 +1293,6 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
_shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); |
_pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); |
_movp(Dest, T4); |
- } else if (Dest->getType() == IceType_v8i16) { |
- Variable *T = makeReg(IceType_v8i16); |
- _movp(T, Src0); |
- _pmullw(T, legalizeToVar(Src1)); |
- _movp(Dest, T); |
} else { |
assert(Dest->getType() == IceType_v16i8); |
// Sz_mul_v16i8 |
@@ -2155,10 +2170,15 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { |
Variable *ExtractedElement = makeReg(InVectorElementTy); |
// TODO(wala): Determine the best lowering sequences for each type. |
- if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
- // Lower extractelement operations where the element is 32 bits |
- // wide with pshufd. |
- // TODO(wala): SSE4.1 has extractps and pextrd |
+ bool CanUsePextr = |
+ Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; |
+ if (CanUsePextr && Ty != IceType_v4f32) { |
+ // Use pextrb, pextrw, or pextrd. |
+ Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); |
+ Variable *SourceVectR = legalizeToVar(SourceVectOperand); |
+ _pextr(ExtractedElement, SourceVectR, Mask); |
+ } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
+ // Use pshufd and movd/movss. |
// |
// ALIGNHACK: Force vector operands to registers in instructions that |
// require aligned memory operands until support for stack alignment |
@@ -2187,13 +2207,9 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { |
_movss(ExtractedElement, T); |
} |
#undef ALIGN_HACK |
- } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) { |
- Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); |
- _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask); |
} else { |
assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); |
// Spill the value to a stack slot and do the extraction in memory. |
- // TODO(wala): SSE4.1 has pextrb. |
// |
// TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when |
// support for legalizing to mem is implemented. |
@@ -2539,10 +2555,18 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
ElementToInsert = Expanded; |
} |
- if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
- // Lower insertelement with 32-bit wide elements using shufps or |
- // movss. |
- // TODO(wala): SSE4.1 has pinsrd and insertps. |
+ if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) { |
+ // Use insertps, pinsrb, pinsrw, or pinsrd. |
+ Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg); |
+ Variable *T = makeReg(Ty); |
+ _movp(T, SourceVectOperand); |
+ if (Ty == IceType_v4f32) |
+ _insertps(T, Element, Ctx->getConstantInt(IceType_i8, Index << 4)); |
+ else |
+ _pinsr(T, Element, Ctx->getConstantInt(IceType_i8, Index)); |
+ _movp(Inst->getDest(), T); |
+ } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
+ // Use shufps or movss. |
Variable *Element = NULL; |
if (InVectorElementTy == IceType_f32) { |
// Element will be in an XMM register since it is floating point. |
@@ -2607,17 +2631,10 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
_movp(Inst->getDest(), T); |
} |
#undef ALIGN_HACK |
- } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) { |
- Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg); |
- Variable *T = makeReg(Ty); |
- _movp(T, SourceVectOperand); |
- _pinsrw(T, Element, Ctx->getConstantInt(IceType_i8, Index)); |
- _movp(Inst->getDest(), T); |
} else { |
assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); |
// Spill the value to a stack slot and perform the insertion in |
// memory. |
- // TODO(wala): SSE4.1 has pinsrb. |
// |
// TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when |
// support for legalizing to mem is implemented. |
@@ -3551,11 +3568,42 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { |
Operand *Condition = Inst->getCondition(); |
if (isVectorType(Dest->getType())) { |
- // a=d?b:c ==> d=sext(d); a=(b&d)|(c&~d) |
- // TODO(wala): SSE4.1 has blendvps and pblendvb. SSE4.1 also has |
- // blendps and pblendw for constant condition operands. |
Type SrcTy = SrcT->getType(); |
Variable *T = makeReg(SrcTy); |
+ // ALIGNHACK: Until stack alignment support is implemented, vector |
+ // instructions need to have vector operands in registers. Once |
+ // there is support for stack alignment, LEGAL_HACK can be removed. |
+#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
+ if (InstructionSet >= SSE4_1) { |
+ // TODO(wala): If the condition operand is a constant, use blendps |
+ // or pblendw. |
+ // |
+ // Use blendvps or pblendvb to implement select. |
+ if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || |
+ SrcTy == IceType_v4f32) { |
+ Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); |
+ _movp(xmm0, Condition); |
+ _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); |
+ _movp(T, SrcF); |
+ _blendvps(T, LEGAL_HACK(SrcT), xmm0); |
+ _movp(Dest, T); |
+ } else { |
+ assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
+ Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 |
+ : IceType_v16i8; |
+ Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); |
+ lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
+ _movp(T, SrcF); |
+ _pblendvb(T, LEGAL_HACK(SrcT), xmm0); |
+ _movp(Dest, T); |
+ } |
+ return; |
+ } |
+ // Lower select without SSE4.1: |
+ // a=d?b:c ==> |
+ // if elementtype(d) != i1: |
+ // d=sext(d); |
+ // a=(b&d)|(c&~d); |
Variable *T2 = makeReg(SrcTy); |
// Sign extend the condition operand if applicable. |
if (SrcTy == IceType_v4f32) { |
@@ -3568,11 +3616,6 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { |
} else { |
_movp(T, Condition); |
} |
- // ALIGNHACK: Until stack alignment support is implemented, the |
- // bitwise vector instructions need to have both operands in |
- // registers. Once there is support for stack alignment, LEGAL_HACK |
- // can be removed. |
-#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
_movp(T2, T); |
_pand(T, LEGAL_HACK(SrcT)); |
_pandn(T2, LEGAL_HACK(SrcF)); |