src/IceTargetLoweringX8632.cpp - Issue 383303003: Lower casting operations that involve vector types.

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 383303003: Lower casting operations that involve vector types. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Rebase and make bitcast changes Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringX8632.cpp

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp

index 45c31514e0eff1b61f8f5a97e3a96ca09307f93a..19a1256ea38df0a24b6041707338a4394a1308dd 100644

--- a/src/IceTargetLoweringX8632.cpp

+++ b/src/IceTargetLoweringX8632.cpp

@@ -87,6 +87,8 @@ InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {

// The maximum number of arguments to pass in XMM registers

const unsigned X86_MAX_XMM_ARGS = 4;

+// The number of bits in a byte

+const unsigned X86_CHAR_BIT = 8;

// In some cases, there are x-macros tables for both high-level and

// low-level instructions/operands that use the same enum key value.

@@ -157,7 +159,7 @@ void xMacroIntegrityCheck() {

// Define a temporary set of enum values based on low-level

// table entries.

enum _tmp_enum {

-#define X(tag, cvt, sdss, width) _tmp_##tag,

+#define X(tag, cvt, sdss, pack, width) _tmp_##tag,

ICETYPEX8632_TABLE

#undef X

_num

@@ -169,7 +171,7 @@ void xMacroIntegrityCheck() {

#undef X

// Define a set of constants based on low-level table entries,

// and ensure the table entry keys are consistent.

-#define X(tag, cvt, sdss, width) \

+#define X(tag, cvt, sdss, pack, width) \

static const int _table2_##tag = _tmp_##tag; \

STATIC_ASSERT(_table1_##tag == _table2_##tag);

ICETYPEX8632_TABLE;

@@ -1573,6 +1575,28 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

_mov(T_Hi, T_Lo);

_sar(T_Hi, Shift);

_mov(DestHi, T_Hi);

+ } else if (isVectorType(Dest->getType())) {

+ Type DestTy = Dest->getType();

+ if (DestTy == IceType_v16i8) {

+ // onemask = materialize(1,1,...); dst = (src & onemask) > 0

+ Variable *OneMask = makeVectorOfOnes(Dest->getType());

+ Variable *T = makeReg(DestTy);

+ _movp(T, Src0RM);

+ _pand(T, OneMask);

+ Variable *Zeros = makeVectorOfZeros(Dest->getType());

+ _pcmpgt(T, Zeros);

+ _movp(Dest, T);

+ } else {

+ // width = width(elty) - 1; dest = (src << width) >> width

+ SizeT ShiftAmount =

+ X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;

+ Constant *ShiftConstant = Ctx->getConstantInt(IceType_i8, ShiftAmount);

+ Variable *T = makeReg(DestTy);

+ _movp(T, Src0RM);

+ _psll(T, ShiftConstant);

+ _psra(T, ShiftConstant);

+ _movp(Dest, T);

+ }

} else {

// TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and

// also copy to the high operand of a 64-bit variable.

@@ -1604,6 +1628,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

_movzx(T, Src0RM);

_and(T, One);

_mov(Dest, T);

+ } else if (isVectorType(Dest->getType())) {

+ // onemask = materialize(1,1,...); dest = onemask & src

+ Type DestTy = Dest->getType();

+ Variable *OneMask = makeVectorOfOnes(DestTy);

+ Variable *T = makeReg(DestTy);

+ _movp(T, Src0RM);

+ _pand(T, OneMask);

+ _movp(Dest, T);

} else {

// t1 = movzx src; dst = t1

Variable *T = makeReg(Dest->getType());

@@ -1613,14 +1645,25 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

break;

}

case InstCast::Trunc: {

- Operand *Src0 = Inst->getSrc(0);

- if (Src0->getType() == IceType_i64)

- Src0 = loOperand(Src0);

- Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);

- // t1 = trunc Src0RM; Dest = t1

- Variable *T = NULL;

- _mov(T, Src0RM);

- _mov(Dest, T);

+ if (isVectorType(Dest->getType())) {

+ // onemask = materialize(1,1,...); dst = src & onemask

+ Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);

+ Type Src0Ty = Src0RM->getType();

+ Variable *OneMask = makeVectorOfOnes(Src0Ty);

+ Variable *T = makeReg(Dest->getType());

+ _movp(T, Src0RM);

+ _pand(T, OneMask);

+ _movp(Dest, T);

+ } else {

+ Operand *Src0 = Inst->getSrc(0);

+ if (Src0->getType() == IceType_i64)

+ Src0 = loOperand(Src0);

+ Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);

+ // t1 = trunc Src0RM; Dest = t1

+ Variable *T = NULL;

+ _mov(T, Src0RM);

+ _mov(Dest, T);

+ }

break;

}

case InstCast::Fptrunc:

@@ -1633,7 +1676,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

break;

}

case InstCast::Fptosi:

- if (Dest->getType() == IceType_i64) {

+ if (isVectorType(Dest->getType())) {

+ assert(Dest->getType() == IceType_v4i32 &&

+ Inst->getSrc(0)->getType() == IceType_v4f32);

+ Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);

+ Variable *T = makeReg(Dest->getType());

+ _cvt(T, Src0RM);

+ _movp(Dest, T);

+ } else if (Dest->getType() == IceType_i64) {

// Use a helper for converting floating-point values to 64-bit

// integers. SSE2 appears to have no way to convert from xmm

// registers to something like the edx:eax register pair, and

@@ -1660,7 +1710,15 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

}

break;

case InstCast::Fptoui:

- if (Dest->getType() == IceType_i64 || Dest->getType() == IceType_i32) {

+ if (isVectorType(Dest->getType())) {

+ assert(Dest->getType() == IceType_v4i32 &&

+ Inst->getSrc(0)->getType() == IceType_v4f32);

+ const SizeT MaxSrcs = 1;

+ InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);

+ Call->addArg(Inst->getSrc(0));

+ lowerCall(Call);

+ } else if (Dest->getType() == IceType_i64 ||

+ Dest->getType() == IceType_i32) {

// Use a helper for both x86-32 and x86-64.

split64(Dest);

const SizeT MaxSrcs = 1;

@@ -1687,7 +1745,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

}

break;

case InstCast::Sitofp:

- if (Inst->getSrc(0)->getType() == IceType_i64) {

+ if (isVectorType(Dest->getType())) {

+ assert(Dest->getType() == IceType_v4f32 &&

+ Inst->getSrc(0)->getType() == IceType_v4i32);

+ Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);

+ Variable *T = makeReg(Dest->getType());

+ _cvt(T, Src0RM);

+ _movp(Dest, T);

+ } else if (Inst->getSrc(0)->getType() == IceType_i64) {

// Use a helper for x86-32.

const SizeT MaxSrcs = 1;

Type DestType = Dest->getType();

@@ -1713,7 +1778,15 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

break;

case InstCast::Uitofp: {

Operand *Src0 = Inst->getSrc(0);

- if (Src0->getType() == IceType_i64 || Src0->getType() == IceType_i32) {

+ if (isVectorType(Src0->getType())) {

+ assert(Dest->getType() == IceType_v4f32 &&

+ Src0->getType() == IceType_v4i32);

+ const SizeT MaxSrcs = 1;

+ InstCall *Call = makeHelperCall("Sz_uitofp_v4i32", Dest, MaxSrcs);

+ Call->addArg(Src0);

+ lowerCall(Call);

+ } else if (Src0->getType() == IceType_i64 ||

+ Src0->getType() == IceType_i32) {

// Use a helper for x86-32 and x86-64. Also use a helper for

// i32 on x86-32.

const SizeT MaxSrcs = 1;

@@ -1752,6 +1825,18 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

switch (Dest->getType()) {

default:

llvm_unreachable("Unexpected Bitcast dest type");

+ case IceType_i8: {

+ assert(Src0->getType() == IceType_v8i1);

+ InstCall *Call = makeHelperCall("Sz_bitcast_v8i1_to_i8", Dest, 1);

+ Call->addArg(Src0);

+ lowerCall(Call);

+ } break;

+ case IceType_i16: {

+ assert(Src0->getType() == IceType_v16i1);

+ InstCall *Call = makeHelperCall("Sz_bitcast_v16i1_to_i16", Dest, 1);

+ Call->addArg(Src0);

+ lowerCall(Call);

+ } break;

case IceType_i32:

case IceType_f32: {

Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);

@@ -1830,6 +1915,30 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

_store(T_Hi, SpillHi);

_movq(Dest, Spill);

} break;

+ case IceType_v8i1: {

+ assert(Src0->getType() == IceType_i8);

+ InstCall *Call = makeHelperCall("Sz_bitcast_i8_to_v8i1", Dest, 1);

+ Variable *Src0AsI32 = Func->makeVariable(IceType_i32, Context.getNode());

+ // Arguments to functions are required to be at least 32 bits wide.

+ lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));

+ Call->addArg(Src0AsI32);

+ lowerCall(Call);

+ } break;

+ case IceType_v16i1: {

+ assert(Src0->getType() == IceType_i16);

+ InstCall *Call = makeHelperCall("Sz_bitcast_i16_to_v16i1", Dest, 1);

+ Variable *Src0AsI32 = Func->makeVariable(IceType_i32, Context.getNode());

+ // Arguments to functions are required to be at least 32 bits wide.

+ lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));

+ Call->addArg(Src0AsI32);

+ lowerCall(Call);

+ } break;

+ case IceType_v8i16:

+ case IceType_v16i8:

+ case IceType_v4i32:

+ case IceType_v4f32: {

+ _movp(Dest, legalizeToVar(Src0));

+ } break;

}

break;

}

@@ -2875,6 +2984,29 @@ void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {

lowerCall(Call);

}

+Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {

+ // There is no support for loading or emitting vector constants, so

+ // this value is initialized using register operations.

+ Variable *Reg = makeReg(Ty, RegNum);

+ // Insert a FakeDef, since otherwise the live range of Reg might

+ // be overestimated.

+ Context.insert(InstFakeDef::create(Func, Reg));

+ _pxor(Reg, Reg);

+ return Reg;

+Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {

+ // There is no support for loading or emitting vector constants, so

+ // this value is initialized using register operations.

+ Variable *Dest = makeVectorOfZeros(Ty, RegNum);

+ Variable *MinusOne = makeReg(Ty);

+ // Insert a FakeDef so the live range of MinusOne is not overestimated.

+ Context.insert(InstFakeDef::create(Func, MinusOne));

+ _pcmpeq(MinusOne, MinusOne);

+ _psub(Dest, MinusOne);

+ return Dest;

// Helper for legalize() to emit the right code to lower an operand to a

// register of the appropriate type.

Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {

@@ -2937,19 +3069,9 @@ Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,

// overestimated. If the constant being lowered is a 64 bit value,

// then the result should be split and the lo and hi components will

// need to go in uninitialized registers.

- if (isVectorType(From->getType())) {

- // There is no support for loading or emitting vector constants, so

- // undef values are instead initialized in registers.

- Variable *Reg = makeReg(From->getType(), RegNum);

- // Insert a FakeDef, since otherwise the live range of Reg might

- // be overestimated.

- Context.insert(InstFakeDef::create(Func, Reg));

- _pxor(Reg, Reg);

- return Reg;

- } else {

- From = Ctx->getConstantZero(From->getType());

- }

+ if (isVectorType(From->getType()))

+ return makeVectorOfZeros(From->getType());

+ From = Ctx->getConstantZero(From->getType());

}

// There should be no constants of vector type (other than undef).

assert(!isVectorType(From->getType()));

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-bitcast.ll » ('j') | no next file with comments »