src/IceTargetLoweringX8632.cpp - Issue 397833002: Lower the rest of the vector arithmetic operations.

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 397833002: Lower the rest of the vector arithmetic operations. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Format crosstest.py Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringX8632.cpp

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp

index e459e2cc41456d103c0c9b36732a3fe019f8be9f..98db4203139492724f1c07f3c230b078f1758d8e 100644

--- a/src/IceTargetLoweringX8632.cpp

+++ b/src/IceTargetLoweringX8632.cpp

@@ -88,6 +88,20 @@ InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {

// The maximum number of arguments to pass in XMM registers

const unsigned X86_MAX_XMM_ARGS = 4;

+// Return a string representation of the type that is suitable for use

+// in an identifier.

+IceString typeIdentString(const Type Ty) {

Jim Stichnoth 2014/07/16 19:17:10 Put inside an anonymous namespace

wala 2014/07/17 01:34:53 This is already inside an anonymous namespace.

Jim Stichnoth 2014/07/17 13:03:13 D'oh! Sorry!

+ IceString Str;

+ llvm::raw_string_ostream BaseOS(Str);

+ Ostream OS(&BaseOS);

+ if (isVectorType(Ty)) {

+ OS << "v" << typeNumElements(Ty) << typeElementType(Ty);

+ } else {

+ OS << Ty;

+ }

+ return BaseOS.str();

// In some cases, there are x-macros tables for both high-level and

// low-level instructions/operands that use the same enum key value.

// The tables are kept separate to maintain a proper separation

@@ -157,7 +171,7 @@ void xMacroIntegrityCheck() {

// Define a temporary set of enum values based on low-level

// table entries.

enum _tmp_enum {

-#define X(tag, cvt, sdss, width) _tmp_##tag,

+#define X(tag, cvt, sdss, pack, width) _tmp_##tag,

ICETYPEX8632_TABLE

#undef X

_num

@@ -169,7 +183,7 @@ void xMacroIntegrityCheck() {

#undef X

// Define a set of constants based on low-level table entries,

// and ensure the table entry keys are consistent.

-#define X(tag, cvt, sdss, width) \

+#define X(tag, cvt, sdss, pack, width) \

static const int _table2_##tag = _tmp_##tag; \

STATIC_ASSERT(_table1_##tag == _table2_##tag);

ICETYPEX8632_TABLE;

@@ -1137,53 +1151,197 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

break;

}

} else if (isVectorType(Dest->getType())) {

+ // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in

+ // registers. This is a workaround of the fact that there is no

+ // support for aligning stack operands. Once alignment support is

+ // implemented, replace legalizeToVar(Src1) with Src1.

Jim Stichnoth 2014/07/16 19:17:10 I'm not adamant about this, but I think it might b

wala 2014/07/17 01:34:54 Good idea. Done.

+ //

+ // TODO: Trap on divide and modulo by zero.

Jim Stichnoth 2014/07/16 19:17:10 I believe this should only trap for integer div/mo

wala 2014/07/17 01:34:53 Done.

+ // See: https://code.google.com/p/nativeclient/issues/detail?id=3899

switch (Inst->getOp()) {

case InstArithmetic::_num:

llvm_unreachable("Unknown arithmetic operator");

break;

- case InstArithmetic::Add:

- case InstArithmetic::And:

- case InstArithmetic::Or:

- case InstArithmetic::Xor:

- case InstArithmetic::Sub:

- case InstArithmetic::Mul:

- case InstArithmetic::Shl:

- case InstArithmetic::Lshr:

- case InstArithmetic::Ashr:

- case InstArithmetic::Udiv:

- case InstArithmetic::Sdiv:

- case InstArithmetic::Urem:

- case InstArithmetic::Srem:

- // TODO(wala): Handle these.

- Func->setError("Unhandled instruction");

- break;

+ case InstArithmetic::Add: {

+ Variable *T = makeReg(Dest->getType());

+ _movp(T, Src0);

+ _padd(T, legalizeToVar(Src1));

+ _movp(Dest, T);

+ } break;

+ case InstArithmetic::And: {

+ Variable *T = makeReg(Dest->getType());

+ _movp(T, Src0);

+ _pand(T, legalizeToVar(Src1));

+ _movp(Dest, T);

+ } break;

+ case InstArithmetic::Or: {

+ Variable *T = makeReg(Dest->getType());

+ _movp(T, Src0);

+ _por(T, legalizeToVar(Src1));

+ _movp(Dest, T);

+ } break;

+ case InstArithmetic::Xor: {

+ Variable *T = makeReg(Dest->getType());

+ _movp(T, Src0);

+ _pxor(T, legalizeToVar(Src1));

+ _movp(Dest, T);

+ } break;

+ case InstArithmetic::Sub: {

+ Variable *T = makeReg(Dest->getType());

+ _movp(T, Src0);

+ _psub(T, legalizeToVar(Src1));

+ _movp(Dest, T);

+ } break;

+ case InstArithmetic::Mul: {

+ if (Dest->getType() == IceType_v4i32) {

+ // Lowering sequence:

+ // movups T1, Src0

+ // pshufd T2, Src0, [1,0,3,0]

jvoung (off chromium) 2014/07/16 19:23:26 nit: The ordering of the vector ([1, 0, 3, 0]) is

wala 2014/07/17 01:34:53 I write vectors as if they were arrays in the comm

jvoung (off chromium) 2014/07/17 15:00:38 Okay, that helps some.

+ // pshufd T3, Src1, [1,0,3,0]

+ // # T1 = { Src0[0] * Src1[0], Src0[2] * Src1[2] }

+ // pmuludq T1, Src1

+ // # T2 = { Src0[1] * Src1[1], Src0[3] * Src1[3] }

+ // pmuludq T2, T3

+ // # T1 = { lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2]) }

+ // shufps T1, T2, [0,2,0,2]

+ // pshufd T4, T1, [0,2,1,3]

+ // movups Dest, T4

+ //

+ // TODO(wala): SSE4.1 has pmulld.

+ // Mask that directs pshufd to create a vector with entries

+ // Src[1, 0, 3, 0]

+ const unsigned Constant1030 = 0x31;

+ Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030);

+ // Mask that directs shufps to create a vector with entries

+ // Dest[0, 2], Src[0, 2]

+ const unsigned Mask0202 = 0x88;

+ // Mask that directs pshufd to create a vector with entries

+ // Src[0, 2, 1, 3]

+ const unsigned Mask0213 = 0xd8;

+ Variable *T1 = makeReg(IceType_v4i32);

+ Variable *T2 = makeReg(IceType_v4i32);

+ Variable *T3 = makeReg(IceType_v4i32);

+ Variable *T4 = makeReg(IceType_v4i32);

+ _movp(T1, Src0);

+ // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R

+ // with Src1 after stack operand alignment support is

+ // implemented.

+ Variable *Src0R = legalizeToVar(Src0);

+ Variable *Src1R = legalizeToVar(Src1);

+ _pshufd(T2, Src0R, Mask1030);

+ _pshufd(T3, Src1R, Mask1030);

+ _pmuludq(T1, Src1R);

+ _pmuludq(T2, T3);

+ _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));

+ _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));

+ _movp(Dest, T4);

+ } else if (Dest->getType() == IceType_v8i16) {

+ Variable *T = makeReg(IceType_v8i16);

+ _movp(T, Src0);

+ _pmullw(T, legalizeToVar(Src1));

+ _movp(Dest, T);

+ } else {

+ assert(Dest->getType() == IceType_v16i8);

+ // Sz_mul_v16i8

+ const IceString Helper = "Sz_mul_v16i8";

+ const SizeT MaxSrcs = 2;

+ InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

+ Call->addArg(Src0);

+ Call->addArg(Src1);

+ lowerCall(Call);

+ }

+ } break;

+ case InstArithmetic::Shl: {

+ // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8

+ const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType());

+ const SizeT MaxSrcs = 2;

+ InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

+ Call->addArg(Src0);

+ Call->addArg(Src1);

+ lowerCall(Call);

+ } break;

+ case InstArithmetic::Lshr: {

+ // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8

+ const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType());

+ const SizeT MaxSrcs = 2;

+ InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

+ Call->addArg(Src0);

+ Call->addArg(Src1);

+ lowerCall(Call);

+ } break;

+ case InstArithmetic::Ashr: {

+ // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8

+ const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType());

+ const SizeT MaxSrcs = 2;

+ InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

+ Call->addArg(Src0);

+ Call->addArg(Src1);

+ lowerCall(Call);

+ } break;

+ case InstArithmetic::Udiv: {

+ // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8

+ const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType());

+ const SizeT MaxSrcs = 2;

+ InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

+ Call->addArg(Src0);

+ Call->addArg(Src1);

+ lowerCall(Call);

+ } break;

+ case InstArithmetic::Sdiv: {

+ // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8

+ const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType());

+ const SizeT MaxSrcs = 2;

+ InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

+ Call->addArg(Src0);

+ Call->addArg(Src1);

+ lowerCall(Call);

+ } break;

+ case InstArithmetic::Urem: {

+ // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8

+ const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType());

+ const SizeT MaxSrcs = 2;

+ InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

+ Call->addArg(Src0);

+ Call->addArg(Src1);

+ lowerCall(Call);

+ } break;

+ case InstArithmetic::Srem: {

+ // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8

+ const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType());

+ const SizeT MaxSrcs = 2;

+ InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

+ Call->addArg(Src0);

+ Call->addArg(Src1);

+ lowerCall(Call);

+ } break;

case InstArithmetic::Fadd: {

Variable *T = makeReg(Dest->getType());

_movp(T, Src0);

- _addps(T, Src1);

+ _addps(T, legalizeToVar(Src1));

_movp(Dest, T);

} break;

case InstArithmetic::Fsub: {

Variable *T = makeReg(Dest->getType());

_movp(T, Src0);

- _subps(T, Src1);

+ _subps(T, legalizeToVar(Src1));

_movp(Dest, T);

} break;

case InstArithmetic::Fmul: {

Variable *T = makeReg(Dest->getType());

_movp(T, Src0);

- _mulps(T, Src1);

+ _mulps(T, legalizeToVar(Src1));

_movp(Dest, T);

} break;

case InstArithmetic::Fdiv: {

Variable *T = makeReg(Dest->getType());

_movp(T, Src0);

- _divps(T, Src1);

+ _divps(T, legalizeToVar(Src1));

_movp(Dest, T);

} break;

case InstArithmetic::Frem: {

const SizeT MaxSrcs = 2;

- InstCall *Call = makeHelperCall("__frem_v4f32", Dest, MaxSrcs);

+ InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs);

Call->addArg(Src0);

Call->addArg(Src1);

lowerCall(Call);

« crosstest/test_arith_main.cpp ('K') | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-arith.ll » ('j') | no next file with comments »