Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index 45c31514e0eff1b61f8f5a97e3a96ca09307f93a..19a1256ea38df0a24b6041707338a4394a1308dd 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -87,6 +87,8 @@ InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { |
// The maximum number of arguments to pass in XMM registers |
const unsigned X86_MAX_XMM_ARGS = 4; |
+// The number of bits in a byte |
+const unsigned X86_CHAR_BIT = 8; |
// In some cases, there are x-macros tables for both high-level and |
// low-level instructions/operands that use the same enum key value. |
@@ -157,7 +159,7 @@ void xMacroIntegrityCheck() { |
// Define a temporary set of enum values based on low-level |
// table entries. |
enum _tmp_enum { |
-#define X(tag, cvt, sdss, width) _tmp_##tag, |
+#define X(tag, cvt, sdss, pack, width) _tmp_##tag, |
ICETYPEX8632_TABLE |
#undef X |
_num |
@@ -169,7 +171,7 @@ void xMacroIntegrityCheck() { |
#undef X |
// Define a set of constants based on low-level table entries, |
// and ensure the table entry keys are consistent. |
-#define X(tag, cvt, sdss, width) \ |
+#define X(tag, cvt, sdss, pack, width) \ |
static const int _table2_##tag = _tmp_##tag; \ |
STATIC_ASSERT(_table1_##tag == _table2_##tag); |
ICETYPEX8632_TABLE; |
@@ -1573,6 +1575,28 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
_mov(T_Hi, T_Lo); |
_sar(T_Hi, Shift); |
_mov(DestHi, T_Hi); |
+ } else if (isVectorType(Dest->getType())) { |
+ Type DestTy = Dest->getType(); |
+ if (DestTy == IceType_v16i8) { |
+ // onemask = materialize(1,1,...); dst = (src & onemask) > 0 |
+ Variable *OneMask = makeVectorOfOnes(Dest->getType()); |
+ Variable *T = makeReg(DestTy); |
+ _movp(T, Src0RM); |
+ _pand(T, OneMask); |
+ Variable *Zeros = makeVectorOfZeros(Dest->getType()); |
+ _pcmpgt(T, Zeros); |
+ _movp(Dest, T); |
+ } else { |
+ // width = width(elty) - 1; dest = (src << width) >> width |
+ SizeT ShiftAmount = |
+ X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1; |
+ Constant *ShiftConstant = Ctx->getConstantInt(IceType_i8, ShiftAmount); |
+ Variable *T = makeReg(DestTy); |
+ _movp(T, Src0RM); |
+ _psll(T, ShiftConstant); |
+ _psra(T, ShiftConstant); |
+ _movp(Dest, T); |
+ } |
} else { |
// TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and |
// also copy to the high operand of a 64-bit variable. |
@@ -1604,6 +1628,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
_movzx(T, Src0RM); |
_and(T, One); |
_mov(Dest, T); |
+ } else if (isVectorType(Dest->getType())) { |
+ // onemask = materialize(1,1,...); dest = onemask & src |
+ Type DestTy = Dest->getType(); |
+ Variable *OneMask = makeVectorOfOnes(DestTy); |
+ Variable *T = makeReg(DestTy); |
+ _movp(T, Src0RM); |
+ _pand(T, OneMask); |
+ _movp(Dest, T); |
} else { |
// t1 = movzx src; dst = t1 |
Variable *T = makeReg(Dest->getType()); |
@@ -1613,14 +1645,25 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
break; |
} |
case InstCast::Trunc: { |
- Operand *Src0 = Inst->getSrc(0); |
- if (Src0->getType() == IceType_i64) |
- Src0 = loOperand(Src0); |
- Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
- // t1 = trunc Src0RM; Dest = t1 |
- Variable *T = NULL; |
- _mov(T, Src0RM); |
- _mov(Dest, T); |
+ if (isVectorType(Dest->getType())) { |
+ // onemask = materialize(1,1,...); dst = src & onemask |
+ Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
+ Type Src0Ty = Src0RM->getType(); |
+ Variable *OneMask = makeVectorOfOnes(Src0Ty); |
+ Variable *T = makeReg(Dest->getType()); |
+ _movp(T, Src0RM); |
+ _pand(T, OneMask); |
+ _movp(Dest, T); |
+ } else { |
+ Operand *Src0 = Inst->getSrc(0); |
+ if (Src0->getType() == IceType_i64) |
+ Src0 = loOperand(Src0); |
+ Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
+ // t1 = trunc Src0RM; Dest = t1 |
+ Variable *T = NULL; |
+ _mov(T, Src0RM); |
+ _mov(Dest, T); |
+ } |
break; |
} |
case InstCast::Fptrunc: |
@@ -1633,7 +1676,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
break; |
} |
case InstCast::Fptosi: |
- if (Dest->getType() == IceType_i64) { |
+ if (isVectorType(Dest->getType())) { |
+ assert(Dest->getType() == IceType_v4i32 && |
+ Inst->getSrc(0)->getType() == IceType_v4f32); |
+ Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
+ Variable *T = makeReg(Dest->getType()); |
+ _cvt(T, Src0RM); |
+ _movp(Dest, T); |
+ } else if (Dest->getType() == IceType_i64) { |
// Use a helper for converting floating-point values to 64-bit |
// integers. SSE2 appears to have no way to convert from xmm |
// registers to something like the edx:eax register pair, and |
@@ -1660,7 +1710,15 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
} |
break; |
case InstCast::Fptoui: |
- if (Dest->getType() == IceType_i64 || Dest->getType() == IceType_i32) { |
+ if (isVectorType(Dest->getType())) { |
+ assert(Dest->getType() == IceType_v4i32 && |
+ Inst->getSrc(0)->getType() == IceType_v4f32); |
+ const SizeT MaxSrcs = 1; |
+ InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs); |
+ Call->addArg(Inst->getSrc(0)); |
+ lowerCall(Call); |
+ } else if (Dest->getType() == IceType_i64 || |
+ Dest->getType() == IceType_i32) { |
// Use a helper for both x86-32 and x86-64. |
split64(Dest); |
const SizeT MaxSrcs = 1; |
@@ -1687,7 +1745,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
} |
break; |
case InstCast::Sitofp: |
- if (Inst->getSrc(0)->getType() == IceType_i64) { |
+ if (isVectorType(Dest->getType())) { |
+ assert(Dest->getType() == IceType_v4f32 && |
+ Inst->getSrc(0)->getType() == IceType_v4i32); |
+ Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
+ Variable *T = makeReg(Dest->getType()); |
+ _cvt(T, Src0RM); |
+ _movp(Dest, T); |
+ } else if (Inst->getSrc(0)->getType() == IceType_i64) { |
// Use a helper for x86-32. |
const SizeT MaxSrcs = 1; |
Type DestType = Dest->getType(); |
@@ -1713,7 +1778,15 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
break; |
case InstCast::Uitofp: { |
Operand *Src0 = Inst->getSrc(0); |
- if (Src0->getType() == IceType_i64 || Src0->getType() == IceType_i32) { |
+ if (isVectorType(Src0->getType())) { |
+ assert(Dest->getType() == IceType_v4f32 && |
+ Src0->getType() == IceType_v4i32); |
+ const SizeT MaxSrcs = 1; |
+ InstCall *Call = makeHelperCall("Sz_uitofp_v4i32", Dest, MaxSrcs); |
+ Call->addArg(Src0); |
+ lowerCall(Call); |
+ } else if (Src0->getType() == IceType_i64 || |
+ Src0->getType() == IceType_i32) { |
// Use a helper for x86-32 and x86-64. Also use a helper for |
// i32 on x86-32. |
const SizeT MaxSrcs = 1; |
@@ -1752,6 +1825,18 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
switch (Dest->getType()) { |
default: |
llvm_unreachable("Unexpected Bitcast dest type"); |
+ case IceType_i8: { |
+ assert(Src0->getType() == IceType_v8i1); |
+ InstCall *Call = makeHelperCall("Sz_bitcast_v8i1_to_i8", Dest, 1); |
+ Call->addArg(Src0); |
+ lowerCall(Call); |
+ } break; |
+ case IceType_i16: { |
+ assert(Src0->getType() == IceType_v16i1); |
+ InstCall *Call = makeHelperCall("Sz_bitcast_v16i1_to_i16", Dest, 1); |
+ Call->addArg(Src0); |
+ lowerCall(Call); |
+ } break; |
case IceType_i32: |
case IceType_f32: { |
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
@@ -1830,6 +1915,30 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
_store(T_Hi, SpillHi); |
_movq(Dest, Spill); |
} break; |
+ case IceType_v8i1: { |
+ assert(Src0->getType() == IceType_i8); |
+ InstCall *Call = makeHelperCall("Sz_bitcast_i8_to_v8i1", Dest, 1); |
+ Variable *Src0AsI32 = Func->makeVariable(IceType_i32, Context.getNode()); |
+ // Arguments to functions are required to be at least 32 bits wide. |
+ lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); |
+ Call->addArg(Src0AsI32); |
+ lowerCall(Call); |
+ } break; |
+ case IceType_v16i1: { |
+ assert(Src0->getType() == IceType_i16); |
+ InstCall *Call = makeHelperCall("Sz_bitcast_i16_to_v16i1", Dest, 1); |
+ Variable *Src0AsI32 = Func->makeVariable(IceType_i32, Context.getNode()); |
+ // Arguments to functions are required to be at least 32 bits wide. |
+ lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); |
+ Call->addArg(Src0AsI32); |
+ lowerCall(Call); |
+ } break; |
+ case IceType_v8i16: |
+ case IceType_v16i8: |
+ case IceType_v4i32: |
+ case IceType_v4f32: { |
+ _movp(Dest, legalizeToVar(Src0)); |
+ } break; |
} |
break; |
} |
@@ -2875,6 +2984,29 @@ void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { |
lowerCall(Call); |
} |
+Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
+ // There is no support for loading or emitting vector constants, so |
+ // this value is initialized using register operations. |
+ Variable *Reg = makeReg(Ty, RegNum); |
+ // Insert a FakeDef, since otherwise the live range of Reg might |
+ // be overestimated. |
+ Context.insert(InstFakeDef::create(Func, Reg)); |
+ _pxor(Reg, Reg); |
+ return Reg; |
+} |
+ |
+Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
+ // There is no support for loading or emitting vector constants, so |
+ // this value is initialized using register operations. |
+ Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
+ Variable *MinusOne = makeReg(Ty); |
+ // Insert a FakeDef so the live range of MinusOne is not overestimated. |
+ Context.insert(InstFakeDef::create(Func, MinusOne)); |
+ _pcmpeq(MinusOne, MinusOne); |
+ _psub(Dest, MinusOne); |
+ return Dest; |
+} |
+ |
// Helper for legalize() to emit the right code to lower an operand to a |
// register of the appropriate type. |
Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { |
@@ -2937,19 +3069,9 @@ Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed, |
// overestimated. If the constant being lowered is a 64 bit value, |
// then the result should be split and the lo and hi components will |
// need to go in uninitialized registers. |
- |
- if (isVectorType(From->getType())) { |
- // There is no support for loading or emitting vector constants, so |
- // undef values are instead initialized in registers. |
- Variable *Reg = makeReg(From->getType(), RegNum); |
- // Insert a FakeDef, since otherwise the live range of Reg might |
- // be overestimated. |
- Context.insert(InstFakeDef::create(Func, Reg)); |
- _pxor(Reg, Reg); |
- return Reg; |
- } else { |
- From = Ctx->getConstantZero(From->getType()); |
- } |
+ if (isVectorType(From->getType())) |
+ return makeVectorOfZeros(From->getType()); |
+ From = Ctx->getConstantZero(From->getType()); |
} |
// There should be no constants of vector type (other than undef). |
assert(!isVectorType(From->getType())); |