Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 427843002: Subzero: Add support for SSE4.1 instructions. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Fix an empty line that was deleted Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-arith.ll » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceTargetLoweringX8632.cpp
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 00db25a5a29823b3411b6b030f5fd174184aeb55..cc6f2226a96984d3eb4e61c04dc33fa32b780a93 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -22,6 +22,7 @@
#include "IceOperand.h"
#include "IceTargetLoweringX8632.def"
#include "IceTargetLoweringX8632.h"
+#include "llvm/Support/CommandLine.h"
namespace Ice {
@@ -123,6 +124,17 @@ const unsigned X86_MAX_XMM_ARGS = 4;
// The number of bits in a byte
const unsigned X86_CHAR_BIT = 8;
+// Instruction set options
+namespace cl = ::llvm::cl;
+cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(
+ "mattr", cl::desc("X86 target attributes"),
+ cl::init(TargetX8632::SSE2),
+ cl::values(
+ clEnumValN(TargetX8632::SSE2, "sse2",
+ "Enable SSE2 instructions (default)"),
+ clEnumValN(TargetX8632::SSE4_1, "sse4.1",
+ "Enable SSE 4.1 instructions"), clEnumValEnd));
+
// Return a string representation of the type that is suitable for use
// in an identifier.
IceString typeIdentString(const Type Ty) {
@@ -234,8 +246,9 @@ void __attribute__((unused)) xMacroIntegrityCheck() {
} // end of anonymous namespace
TargetX8632::TargetX8632(Cfg *Func)
- : TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0),
- LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
+ : TargetLowering(Func), InstructionSet(CLInstructionSet),
+ IsEbpBasedFrame(false), FrameSizeLocals(0), LocalsSizeBytes(0),
+ NextLabelNumber(0), ComputedLiveRanges(false),
PhysicalRegisters(VarList(Reg_NUM)) {
// TODO: Don't initialize IntegerRegisters and friends every time.
// Instead, initialize in some sort of static initializer for the
@@ -1228,7 +1241,16 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
_movp(Dest, T);
} break;
case InstArithmetic::Mul: {
- if (Dest->getType() == IceType_v4i32) {
+ bool TypesAreValidForPmull =
+ Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
+ bool InstructionSetIsValidForPmull =
+ Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
+ if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
+ Variable *T = makeReg(Dest->getType());
+ _movp(T, Src0);
+ _pmull(T, legalizeToVar(Src1));
+ _movp(Dest, T);
+ } else if (Dest->getType() == IceType_v4i32) {
// Lowering sequence:
// Note: The mask arguments have index 0 on the left.
//
@@ -1243,8 +1265,6 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
// shufps T1, T2, {0,2,0,2}
// pshufd T4, T1, {0,2,1,3}
// movups Dest, T4
- //
- // TODO(wala): SSE4.1 has pmulld.
// Mask that directs pshufd to create a vector with entries
// Src[1, 0, 3, 0]
@@ -1273,11 +1293,6 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
_shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
_pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
_movp(Dest, T4);
- } else if (Dest->getType() == IceType_v8i16) {
- Variable *T = makeReg(IceType_v8i16);
- _movp(T, Src0);
- _pmullw(T, legalizeToVar(Src1));
- _movp(Dest, T);
} else {
assert(Dest->getType() == IceType_v16i8);
// Sz_mul_v16i8
@@ -2155,10 +2170,15 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
Variable *ExtractedElement = makeReg(InVectorElementTy);
// TODO(wala): Determine the best lowering sequences for each type.
- if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
- // Lower extractelement operations where the element is 32 bits
- // wide with pshufd.
- // TODO(wala): SSE4.1 has extractps and pextrd
+ bool CanUsePextr =
+ Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
+ if (CanUsePextr && Ty != IceType_v4f32) {
+ // Use pextrb, pextrw, or pextrd.
+ Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
+ Variable *SourceVectR = legalizeToVar(SourceVectOperand);
+ _pextr(ExtractedElement, SourceVectR, Mask);
+ } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
+ // Use pshufd and movd/movss.
//
// ALIGNHACK: Force vector operands to registers in instructions that
// require aligned memory operands until support for stack alignment
@@ -2187,13 +2207,9 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
_movss(ExtractedElement, T);
}
#undef ALIGN_HACK
- } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
- Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
- _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask);
} else {
assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
// Spill the value to a stack slot and do the extraction in memory.
- // TODO(wala): SSE4.1 has pextrb.
//
// TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
// support for legalizing to mem is implemented.
@@ -2539,10 +2555,18 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
ElementToInsert = Expanded;
}
- if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
- // Lower insertelement with 32-bit wide elements using shufps or
- // movss.
- // TODO(wala): SSE4.1 has pinsrd and insertps.
+ if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
+ // Use insertps, pinsrb, pinsrw, or pinsrd.
+ Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg);
+ Variable *T = makeReg(Ty);
+ _movp(T, SourceVectOperand);
+ if (Ty == IceType_v4f32)
+ _insertps(T, Element, Ctx->getConstantInt(IceType_i8, Index << 4));
+ else
+ _pinsr(T, Element, Ctx->getConstantInt(IceType_i8, Index));
+ _movp(Inst->getDest(), T);
+ } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
+ // Use shufps or movss.
Variable *Element = NULL;
if (InVectorElementTy == IceType_f32) {
// Element will be in an XMM register since it is floating point.
@@ -2607,17 +2631,10 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
_movp(Inst->getDest(), T);
}
#undef ALIGN_HACK
- } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
- Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg);
- Variable *T = makeReg(Ty);
- _movp(T, SourceVectOperand);
- _pinsrw(T, Element, Ctx->getConstantInt(IceType_i8, Index));
- _movp(Inst->getDest(), T);
} else {
assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
// Spill the value to a stack slot and perform the insertion in
// memory.
- // TODO(wala): SSE4.1 has pinsrb.
//
// TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
// support for legalizing to mem is implemented.
@@ -3551,11 +3568,42 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
Operand *Condition = Inst->getCondition();
if (isVectorType(Dest->getType())) {
- // a=d?b:c ==> d=sext(d); a=(b&d)|(c&~d)
- // TODO(wala): SSE4.1 has blendvps and pblendvb. SSE4.1 also has
- // blendps and pblendw for constant condition operands.
Type SrcTy = SrcT->getType();
Variable *T = makeReg(SrcTy);
+ // ALIGNHACK: Until stack alignment support is implemented, vector
+ // instructions need to have vector operands in registers. Once
+ // there is support for stack alignment, LEGAL_HACK can be removed.
+#define LEGAL_HACK(Vect) legalizeToVar((Vect))
+ if (InstructionSet >= SSE4_1) {
+ // TODO(wala): If the condition operand is a constant, use blendps
+ // or pblendw.
+ //
+ // Use blendvps or pblendvb to implement select.
+ if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
+ SrcTy == IceType_v4f32) {
+ Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);
+ _movp(xmm0, Condition);
+ _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));
+ _movp(T, SrcF);
+ _blendvps(T, LEGAL_HACK(SrcT), xmm0);
+ _movp(Dest, T);
+ } else {
+ assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
+ Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
+ : IceType_v16i8;
+ Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);
+ lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
+ _movp(T, SrcF);
+ _pblendvb(T, LEGAL_HACK(SrcT), xmm0);
+ _movp(Dest, T);
+ }
+ return;
+ }
+ // Lower select without SSE4.1:
+ // a=d?b:c ==>
+ // if elementtype(d) != i1:
+ // d=sext(d);
+ // a=(b&d)|(c&~d);
Variable *T2 = makeReg(SrcTy);
// Sign extend the condition operand if applicable.
if (SrcTy == IceType_v4f32) {
@@ -3568,11 +3616,6 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
} else {
_movp(T, Condition);
}
- // ALIGNHACK: Until stack alignment support is implemented, the
- // bitwise vector instructions need to have both operands in
- // registers. Once there is support for stack alignment, LEGAL_HACK
- // can be removed.
-#define LEGAL_HACK(Vect) legalizeToVar((Vect))
_movp(T2, T);
_pand(T, LEGAL_HACK(SrcT));
_pandn(T2, LEGAL_HACK(SrcF));
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-arith.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698