Chromium Code Reviews| Index: src/IceTargetLoweringX8632.cpp |
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
| index e459e2cc41456d103c0c9b36732a3fe019f8be9f..98db4203139492724f1c07f3c230b078f1758d8e 100644 |
| --- a/src/IceTargetLoweringX8632.cpp |
| +++ b/src/IceTargetLoweringX8632.cpp |
| @@ -88,6 +88,20 @@ InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { |
| // The maximum number of arguments to pass in XMM registers |
| const unsigned X86_MAX_XMM_ARGS = 4; |
| +// Return a string representation of the type that is suitable for use |
| +// in an identifier. |
| +IceString typeIdentString(const Type Ty) { |
|
Jim Stichnoth
2014/07/16 19:17:10
Put inside an anonymous namespace
wala
2014/07/17 01:34:53
This is already inside an anonymous namespace.
Jim Stichnoth
2014/07/17 13:03:13
D'oh! Sorry!
|
| + IceString Str; |
| + llvm::raw_string_ostream BaseOS(Str); |
| + Ostream OS(&BaseOS); |
| + if (isVectorType(Ty)) { |
| + OS << "v" << typeNumElements(Ty) << typeElementType(Ty); |
| + } else { |
| + OS << Ty; |
| + } |
| + return BaseOS.str(); |
| +} |
| + |
| // In some cases, there are x-macros tables for both high-level and |
| // low-level instructions/operands that use the same enum key value. |
| // The tables are kept separate to maintain a proper separation |
| @@ -157,7 +171,7 @@ void xMacroIntegrityCheck() { |
| // Define a temporary set of enum values based on low-level |
| // table entries. |
| enum _tmp_enum { |
| -#define X(tag, cvt, sdss, width) _tmp_##tag, |
| +#define X(tag, cvt, sdss, pack, width) _tmp_##tag, |
| ICETYPEX8632_TABLE |
| #undef X |
| _num |
| @@ -169,7 +183,7 @@ void xMacroIntegrityCheck() { |
| #undef X |
| // Define a set of constants based on low-level table entries, |
| // and ensure the table entry keys are consistent. |
| -#define X(tag, cvt, sdss, width) \ |
| +#define X(tag, cvt, sdss, pack, width) \ |
| static const int _table2_##tag = _tmp_##tag; \ |
| STATIC_ASSERT(_table1_##tag == _table2_##tag); |
| ICETYPEX8632_TABLE; |
| @@ -1137,53 +1151,197 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
| break; |
| } |
| } else if (isVectorType(Dest->getType())) { |
| + // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in |
| + // registers. This is a workaround of the fact that there is no |
| + // support for aligning stack operands. Once alignment support is |
| + // implemented, replace legalizeToVar(Src1) with Src1. |
|
Jim Stichnoth
2014/07/16 19:17:10
I'm not adamant about this, but I think it might b
wala
2014/07/17 01:34:54
Good idea. Done.
|
| + // |
| + // TODO: Trap on divide and modulo by zero. |
|
Jim Stichnoth
2014/07/16 19:17:10
I believe this should only trap for integer div/mo
wala
2014/07/17 01:34:53
Done.
|
| + // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 |
| switch (Inst->getOp()) { |
| case InstArithmetic::_num: |
| llvm_unreachable("Unknown arithmetic operator"); |
| break; |
| - case InstArithmetic::Add: |
| - case InstArithmetic::And: |
| - case InstArithmetic::Or: |
| - case InstArithmetic::Xor: |
| - case InstArithmetic::Sub: |
| - case InstArithmetic::Mul: |
| - case InstArithmetic::Shl: |
| - case InstArithmetic::Lshr: |
| - case InstArithmetic::Ashr: |
| - case InstArithmetic::Udiv: |
| - case InstArithmetic::Sdiv: |
| - case InstArithmetic::Urem: |
| - case InstArithmetic::Srem: |
| - // TODO(wala): Handle these. |
| - Func->setError("Unhandled instruction"); |
| - break; |
| + case InstArithmetic::Add: { |
| + Variable *T = makeReg(Dest->getType()); |
| + _movp(T, Src0); |
| + _padd(T, legalizeToVar(Src1)); |
| + _movp(Dest, T); |
| + } break; |
| + case InstArithmetic::And: { |
| + Variable *T = makeReg(Dest->getType()); |
| + _movp(T, Src0); |
| + _pand(T, legalizeToVar(Src1)); |
| + _movp(Dest, T); |
| + } break; |
| + case InstArithmetic::Or: { |
| + Variable *T = makeReg(Dest->getType()); |
| + _movp(T, Src0); |
| + _por(T, legalizeToVar(Src1)); |
| + _movp(Dest, T); |
| + } break; |
| + case InstArithmetic::Xor: { |
| + Variable *T = makeReg(Dest->getType()); |
| + _movp(T, Src0); |
| + _pxor(T, legalizeToVar(Src1)); |
| + _movp(Dest, T); |
| + } break; |
| + case InstArithmetic::Sub: { |
| + Variable *T = makeReg(Dest->getType()); |
| + _movp(T, Src0); |
| + _psub(T, legalizeToVar(Src1)); |
| + _movp(Dest, T); |
| + } break; |
| + case InstArithmetic::Mul: { |
| + if (Dest->getType() == IceType_v4i32) { |
| + // Lowering sequence: |
| + // movups T1, Src0 |
| + // pshufd T2, Src0, [1,0,3,0] |
|
jvoung (off chromium)
2014/07/16 19:23:26
nit: The ordering of the vector ([1, 0, 3, 0]) is
wala
2014/07/17 01:34:53
I write vectors as if they were arrays in the comm
jvoung (off chromium)
2014/07/17 15:00:38
Okay, that helps some.
|
| + // pshufd T3, Src1, [1,0,3,0] |
| + // # T1 = { Src0[0] * Src1[0], Src0[2] * Src1[2] } |
| + // pmuludq T1, Src1 |
| + // # T2 = { Src0[1] * Src1[1], Src0[3] * Src1[3] } |
| + // pmuludq T2, T3 |
| + // # T1 = { lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2]) } |
| + // shufps T1, T2, [0,2,0,2] |
| + // pshufd T4, T1, [0,2,1,3] |
| + // movups Dest, T4 |
| + // |
| + // TODO(wala): SSE4.1 has pmulld. |
| + |
| + // Mask that directs pshufd to create a vector with entries |
| + // Src[1, 0, 3, 0] |
| + const unsigned Constant1030 = 0x31; |
| + Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030); |
| + // Mask that directs shufps to create a vector with entries |
| + // Dest[0, 2], Src[0, 2] |
| + const unsigned Mask0202 = 0x88; |
| + // Mask that directs pshufd to create a vector with entries |
| + // Src[0, 2, 1, 3] |
| + const unsigned Mask0213 = 0xd8; |
| + Variable *T1 = makeReg(IceType_v4i32); |
| + Variable *T2 = makeReg(IceType_v4i32); |
| + Variable *T3 = makeReg(IceType_v4i32); |
| + Variable *T4 = makeReg(IceType_v4i32); |
| + _movp(T1, Src0); |
| + // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R |
| + // with Src1 after stack operand alignment support is |
| + // implemented. |
| + Variable *Src0R = legalizeToVar(Src0); |
| + Variable *Src1R = legalizeToVar(Src1); |
| + _pshufd(T2, Src0R, Mask1030); |
| + _pshufd(T3, Src1R, Mask1030); |
| + _pmuludq(T1, Src1R); |
| + _pmuludq(T2, T3); |
| + _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); |
| + _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); |
| + _movp(Dest, T4); |
| + } else if (Dest->getType() == IceType_v8i16) { |
| + Variable *T = makeReg(IceType_v8i16); |
| + _movp(T, Src0); |
| + _pmullw(T, legalizeToVar(Src1)); |
| + _movp(Dest, T); |
| + } else { |
| + assert(Dest->getType() == IceType_v16i8); |
| + // Sz_mul_v16i8 |
| + const IceString Helper = "Sz_mul_v16i8"; |
| + const SizeT MaxSrcs = 2; |
| + InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| + Call->addArg(Src0); |
| + Call->addArg(Src1); |
| + lowerCall(Call); |
| + } |
| + } break; |
| + case InstArithmetic::Shl: { |
| + // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8 |
| + const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType()); |
| + const SizeT MaxSrcs = 2; |
| + InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| + Call->addArg(Src0); |
| + Call->addArg(Src1); |
| + lowerCall(Call); |
| + } break; |
| + case InstArithmetic::Lshr: { |
| + // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8 |
| + const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType()); |
| + const SizeT MaxSrcs = 2; |
| + InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| + Call->addArg(Src0); |
| + Call->addArg(Src1); |
| + lowerCall(Call); |
| + } break; |
| + case InstArithmetic::Ashr: { |
| + // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8 |
| + const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType()); |
| + const SizeT MaxSrcs = 2; |
| + InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| + Call->addArg(Src0); |
| + Call->addArg(Src1); |
| + lowerCall(Call); |
| + } break; |
| + case InstArithmetic::Udiv: { |
| + // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8 |
| + const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType()); |
| + const SizeT MaxSrcs = 2; |
| + InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| + Call->addArg(Src0); |
| + Call->addArg(Src1); |
| + lowerCall(Call); |
| + } break; |
| + case InstArithmetic::Sdiv: { |
| + // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8 |
| + const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType()); |
| + const SizeT MaxSrcs = 2; |
| + InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| + Call->addArg(Src0); |
| + Call->addArg(Src1); |
| + lowerCall(Call); |
| + } break; |
| + case InstArithmetic::Urem: { |
| + // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8 |
| + const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType()); |
| + const SizeT MaxSrcs = 2; |
| + InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| + Call->addArg(Src0); |
| + Call->addArg(Src1); |
| + lowerCall(Call); |
| + } break; |
| + case InstArithmetic::Srem: { |
| + // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8 |
| + const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType()); |
| + const SizeT MaxSrcs = 2; |
| + InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| + Call->addArg(Src0); |
| + Call->addArg(Src1); |
| + lowerCall(Call); |
| + } break; |
| case InstArithmetic::Fadd: { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _addps(T, Src1); |
| + _addps(T, legalizeToVar(Src1)); |
| _movp(Dest, T); |
| } break; |
| case InstArithmetic::Fsub: { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _subps(T, Src1); |
| + _subps(T, legalizeToVar(Src1)); |
| _movp(Dest, T); |
| } break; |
| case InstArithmetic::Fmul: { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _mulps(T, Src1); |
| + _mulps(T, legalizeToVar(Src1)); |
| _movp(Dest, T); |
| } break; |
| case InstArithmetic::Fdiv: { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _divps(T, Src1); |
| + _divps(T, legalizeToVar(Src1)); |
| _movp(Dest, T); |
| } break; |
| case InstArithmetic::Frem: { |
| const SizeT MaxSrcs = 2; |
| - InstCall *Call = makeHelperCall("__frem_v4f32", Dest, MaxSrcs); |
| + InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs); |
| Call->addArg(Src0); |
| Call->addArg(Src1); |
| lowerCall(Call); |