Chromium Code Reviews| Index: src/IceTargetLoweringX8632.cpp |
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
| index 2652e8bfd0e761666e9872378ce626ea72874cfb..343ee3d3cd768db8808cd3df22726cca2f8f310c 100644 |
| --- a/src/IceTargetLoweringX8632.cpp |
| +++ b/src/IceTargetLoweringX8632.cpp |
| @@ -157,7 +157,7 @@ void xMacroIntegrityCheck() { |
| // Define a temporary set of enum values based on low-level |
| // table entries. |
| enum _tmp_enum { |
| -#define X(tag, cvt, sdss, width) _tmp_##tag, |
| +#define X(tag, cvt, sdss, pack, width) _tmp_##tag, |
| ICETYPEX8632_TABLE |
| #undef X |
| _num |
| @@ -169,7 +169,7 @@ void xMacroIntegrityCheck() { |
| #undef X |
| // Define a set of constants based on low-level table entries, |
| // and ensure the table entry keys are consistent. |
| -#define X(tag, cvt, sdss, width) \ |
| +#define X(tag, cvt, sdss, pack, width) \ |
| static const int _table2_##tag = _tmp_##tag; \ |
| STATIC_ASSERT(_table1_##tag == _table2_##tag); |
| ICETYPEX8632_TABLE; |
| @@ -1454,7 +1454,7 @@ void TargetX8632::lowerCall(const InstCall *Instr) { |
| Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); |
| Context.insert(FakeUse); |
| } |
| - |
| + |
| if (!Dest) |
| return; |
| @@ -1520,6 +1520,35 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
| _mov(T_Hi, T_Lo); |
| _sar(T_Hi, Shift); |
| _mov(DestHi, T_Hi); |
| + } else if (isVectorType(Dest->getType())) { |
| + Type DestTy = Dest->getType(); |
| + if (DestTy == IceType_v16i8) { |
| + // onemask = materialize(1,1,...); dst = (src & onemask) > 0 |
| + Variable *OneMask = makeReg(DestTy); |
|
Jim Stichnoth
2014/07/14 18:33:12
Seems to be substantial code duplication across th
wala
2014/07/15 22:52:22
Done.
|
| + Context.insert(InstFakeDef::create(Func, OneMask)); |
| + _pxor(OneMask, OneMask); |
| + Variable *MinusOne = makeReg(DestTy); |
| + Context.insert(InstFakeDef::create(Func, MinusOne)); |
| + _pcmpeq(MinusOne, MinusOne); |
| + _psub(OneMask, MinusOne); |
| + Variable *T = makeReg(DestTy); |
| + _movp(T, Src0RM); |
| + _pand(T, OneMask); |
| + Variable *Zeros = makeReg(DestTy); |
| + Context.insert(InstFakeDef::create(Func, Zeros)); |
| + _pxor(Zeros, Zeros); |
| + _pcmpgt(T, Zeros); |
| + _movp(Dest, T); |
| + } else { |
| + // width = width(elty) - 1; dest = (src << width) >> width |
| + SizeT ShiftAmount = 8 * typeWidthInBytes(typeElementType(DestTy)) - 1; |
|
Jim Stichnoth
2014/07/14 18:33:12
Use CHAR_BIT instead of 8
wala
2014/07/15 22:52:22
Since CHAR_BIT depends on the machine the code is
Jim Stichnoth
2014/07/15 23:15:44
Good point, thanks.
|
| + Constant *ShiftConstant = Ctx->getConstantInt(IceType_i8, ShiftAmount); |
| + Variable *T = makeReg(DestTy); |
| + _movp(T, Src0RM); |
| + _psll(T, ShiftConstant); |
| + _psra(T, ShiftConstant); |
| + _movp(Dest, T); |
| + } |
| } else { |
| // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and |
| // also copy to the high operand of a 64-bit variable. |
| @@ -1551,6 +1580,20 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
| _movzx(T, Src0RM); |
| _and(T, One); |
| _mov(Dest, T); |
| + } else if (isVectorType(Dest->getType())) { |
| + // onemask = materialize(1,1,...); dst = src & onemask |
| + Type DestTy = Dest->getType(); |
| + Variable *OneMask = makeReg(DestTy); |
| + Context.insert(InstFakeDef::create(Func, OneMask)); |
| + _pxor(OneMask, OneMask); |
| + Variable *MinusOne = makeReg(DestTy); |
| + Context.insert(InstFakeDef::create(Func, MinusOne)); |
| + _pcmpeq(MinusOne, MinusOne); |
| + _psub(OneMask, MinusOne); |
| + Variable *T = makeReg(DestTy); |
| + _movp(T, Src0RM); |
| + _pand(T, OneMask); |
| + _movp(Dest, T); |
| } else { |
| // t1 = movzx src; dst = t1 |
| Variable *T = makeReg(Dest->getType()); |
| @@ -1560,14 +1603,31 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
| break; |
| } |
| case InstCast::Trunc: { |
| - Operand *Src0 = Inst->getSrc(0); |
| - if (Src0->getType() == IceType_i64) |
| - Src0 = loOperand(Src0); |
| - Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| - // t1 = trunc Src0RM; Dest = t1 |
| - Variable *T = NULL; |
| - _mov(T, Src0RM); |
| - _mov(Dest, T); |
| + if (isVectorType(Dest->getType())) { |
| + // onemask = materialize(1,1,...); dst = src & onemask |
| + Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| + Type Src0Ty = Src0RM->getType(); |
| + Variable *OneMask = makeReg(Src0Ty); |
| + Context.insert(InstFakeDef::create(Func, OneMask)); |
| + _pxor(OneMask, OneMask); |
| + Variable *MinusOne = makeReg(Src0Ty); |
| + Context.insert(InstFakeDef::create(Func, MinusOne)); |
| + _pcmpeq(MinusOne, MinusOne); |
| + _psub(OneMask, MinusOne); |
| + Variable *T = makeReg(Dest->getType()); |
| + _movp(T, Src0RM); |
| + _pand(T, OneMask); |
| + _movp(Dest, T); |
| + } else { |
| + Operand *Src0 = Inst->getSrc(0); |
| + if (Src0->getType() == IceType_i64) |
| + Src0 = loOperand(Src0); |
| + Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| + // t1 = trunc Src0RM; Dest = t1 |
| + Variable *T = NULL; |
| + _mov(T, Src0RM); |
| + _mov(Dest, T); |
| + } |
| break; |
| } |
| case InstCast::Fptrunc: |
| @@ -1580,7 +1640,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
| break; |
| } |
| case InstCast::Fptosi: |
| - if (Dest->getType() == IceType_i64) { |
| + if (isVectorType(Dest->getType())) { |
| + assert(Dest->getType() == IceType_v4i32 && |
| + Inst->getSrc(0)->getType() == IceType_v4f32); |
| + Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| + Variable *T = makeReg(Dest->getType()); |
| + _cvt(T, Src0RM); |
|
Jim Stichnoth
2014/07/14 18:33:12
Looks plausible, I guess, but I'm looking forward
wala
2014/07/14 20:52:22
This is what LLVM does.
|
| + _movp(Dest, T); |
| + } else if (Dest->getType() == IceType_i64) { |
| // Use a helper for converting floating-point values to 64-bit |
| // integers. SSE2 appears to have no way to convert from xmm |
| // registers to something like the edx:eax register pair, and |
| @@ -1607,7 +1674,15 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
| } |
| break; |
| case InstCast::Fptoui: |
| - if (Dest->getType() == IceType_i64 || Dest->getType() == IceType_i32) { |
| + if (isVectorType(Dest->getType())) { |
| + assert(Dest->getType() == IceType_v4i32 && |
| + Inst->getSrc(0)->getType() == IceType_v4f32); |
| + const SizeT MaxSrcs = 1; |
| + InstCall *Call = makeHelperCall("__fptoui_v4f32", Dest, MaxSrcs); |
|
Jim Stichnoth
2014/07/14 18:33:12
Here and elsewhere, the helper function will be pa
wala
2014/07/14 20:52:22
Such identifiers are reserved to the implementatio
Jim Stichnoth
2014/07/14 22:23:30
I wouldn't say Subzero will be the *complete* impl
wala
2014/07/15 22:52:21
Done.
All helpers have a Sz_ prefix.
|
| + Call->addArg(Inst->getSrc(0)); |
| + lowerCall(Call); |
| + } else if (Dest->getType() == IceType_i64 || |
| + Dest->getType() == IceType_i32) { |
| // Use a helper for both x86-32 and x86-64. |
| split64(Dest); |
| const SizeT MaxSrcs = 1; |
| @@ -1634,7 +1709,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
| } |
| break; |
| case InstCast::Sitofp: |
| - if (Inst->getSrc(0)->getType() == IceType_i64) { |
| + if (isVectorType(Dest->getType())) { |
| + assert(Dest->getType() == IceType_v4f32 && |
| + Inst->getSrc(0)->getType() == IceType_v4i32); |
| + Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| + Variable *T = makeReg(Dest->getType()); |
| + _cvt(T, Src0RM); |
| + _movp(Dest, T); |
| + } else if (Inst->getSrc(0)->getType() == IceType_i64) { |
| // Use a helper for x86-32. |
| const SizeT MaxSrcs = 1; |
| Type DestType = Dest->getType(); |
| @@ -1660,7 +1742,15 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
| break; |
| case InstCast::Uitofp: { |
| Operand *Src0 = Inst->getSrc(0); |
| - if (Src0->getType() == IceType_i64 || Src0->getType() == IceType_i32) { |
| + if (isVectorType(Src0->getType())) { |
| + assert(Dest->getType() == IceType_v4f32 && |
| + Src0->getType() == IceType_v4i32); |
| + const SizeT MaxSrcs = 1; |
| + InstCall *Call = makeHelperCall("__uitofp_v4i32", Dest, MaxSrcs); |
| + Call->addArg(Src0); |
| + lowerCall(Call); |
| + } else if (Src0->getType() == IceType_i64 || |
| + Src0->getType() == IceType_i32) { |
| // Use a helper for x86-32 and x86-64. Also use a helper for |
| // i32 on x86-32. |
| const SizeT MaxSrcs = 1; |
| @@ -1699,6 +1789,18 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
| switch (Dest->getType()) { |
| default: |
| llvm_unreachable("Unexpected Bitcast dest type"); |
| + case IceType_i8: { |
| + assert(Src0->getType() == IceType_v8i1); |
| + InstCall *Call = makeHelperCall("__bitcast_v8i1_to_i8", Dest, 1); |
| + Call->addArg(Src0); |
| + lowerCall(Call); |
| + } break; |
| + case IceType_i16: { |
| + assert(Src0->getType() == IceType_v16i1); |
| + InstCall *Call = makeHelperCall("__bitcast_v16i1_to_i16", Dest, 1); |
| + Call->addArg(Src0); |
| + lowerCall(Call); |
| + } break; |
| case IceType_i32: |
| case IceType_f32: { |
| Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| @@ -1777,6 +1879,30 @@ void TargetX8632::lowerCast(const InstCast *Inst) { |
| _store(T_Hi, SpillHi); |
| _movq(Dest, Spill); |
| } break; |
| + case IceType_v8i1: { |
| + assert(Src0->getType() == IceType_i8); |
| + InstCall *Call = makeHelperCall("__bitcast_i8_to_v8i1", Dest, 1); |
| + Variable *Src0AsI32 = Func->makeVariable(IceType_i32, Context.getNode()); |
| + // Arguments to functions are required to be at least 32 bits wide. |
| + lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); |
| + Call->addArg(Src0AsI32); |
| + lowerCall(Call); |
| + } break; |
| + case IceType_v16i1: { |
| + assert(Src0->getType() == IceType_i16); |
| + InstCall *Call = makeHelperCall("__bitcast_i16_to_v16i1", Dest, 1); |
| + Variable *Src0AsI32 = Func->makeVariable(IceType_i32, Context.getNode()); |
| + // Arguments to functions are required to be at least 32 bits wide. |
| + lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); |
| + Call->addArg(Src0AsI32); |
| + lowerCall(Call); |
| + } break; |
| + case IceType_v8i16: |
| + case IceType_v16i8: |
| + case IceType_v4i32: |
| + case IceType_v4f32: { |
| + _movp(Dest, legalizeToVar(Src0)); |
| + } break; |
| } |
| break; |
| } |