Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 383303003: Lower casting operations that involve vector types. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Fix formatting changes Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/IceTargetLoweringX8632.cpp
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 2652e8bfd0e761666e9872378ce626ea72874cfb..343ee3d3cd768db8808cd3df22726cca2f8f310c 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -157,7 +157,7 @@ void xMacroIntegrityCheck() {
// Define a temporary set of enum values based on low-level
// table entries.
enum _tmp_enum {
-#define X(tag, cvt, sdss, width) _tmp_##tag,
+#define X(tag, cvt, sdss, pack, width) _tmp_##tag,
ICETYPEX8632_TABLE
#undef X
_num
@@ -169,7 +169,7 @@ void xMacroIntegrityCheck() {
#undef X
// Define a set of constants based on low-level table entries,
// and ensure the table entry keys are consistent.
-#define X(tag, cvt, sdss, width) \
+#define X(tag, cvt, sdss, pack, width) \
static const int _table2_##tag = _tmp_##tag; \
STATIC_ASSERT(_table1_##tag == _table2_##tag);
ICETYPEX8632_TABLE;
@@ -1454,7 +1454,7 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Context.insert(FakeUse);
}
-
+
if (!Dest)
return;
@@ -1520,6 +1520,35 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
_mov(T_Hi, T_Lo);
_sar(T_Hi, Shift);
_mov(DestHi, T_Hi);
+ } else if (isVectorType(Dest->getType())) {
+ Type DestTy = Dest->getType();
+ if (DestTy == IceType_v16i8) {
+ // onemask = materialize(1,1,...); dst = (src & onemask) > 0
+ Variable *OneMask = makeReg(DestTy);
Jim Stichnoth 2014/07/14 18:33:12 Seems to be substantial code duplication across th
wala 2014/07/15 22:52:22 Done.
+ Context.insert(InstFakeDef::create(Func, OneMask));
+ _pxor(OneMask, OneMask);
+ Variable *MinusOne = makeReg(DestTy);
+ Context.insert(InstFakeDef::create(Func, MinusOne));
+ _pcmpeq(MinusOne, MinusOne);
+ _psub(OneMask, MinusOne);
+ Variable *T = makeReg(DestTy);
+ _movp(T, Src0RM);
+ _pand(T, OneMask);
+ Variable *Zeros = makeReg(DestTy);
+ Context.insert(InstFakeDef::create(Func, Zeros));
+ _pxor(Zeros, Zeros);
+ _pcmpgt(T, Zeros);
+ _movp(Dest, T);
+ } else {
+ // width = width(elty) - 1; dest = (src << width) >> width
+ SizeT ShiftAmount = 8 * typeWidthInBytes(typeElementType(DestTy)) - 1;
Jim Stichnoth 2014/07/14 18:33:12 Use CHAR_BIT instead of 8
wala 2014/07/15 22:52:22 Since CHAR_BIT depends on the machine the code is
Jim Stichnoth 2014/07/15 23:15:44 Good point, thanks.
+ Constant *ShiftConstant = Ctx->getConstantInt(IceType_i8, ShiftAmount);
+ Variable *T = makeReg(DestTy);
+ _movp(T, Src0RM);
+ _psll(T, ShiftConstant);
+ _psra(T, ShiftConstant);
+ _movp(Dest, T);
+ }
} else {
// TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and
// also copy to the high operand of a 64-bit variable.
@@ -1551,6 +1580,20 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
_movzx(T, Src0RM);
_and(T, One);
_mov(Dest, T);
+ } else if (isVectorType(Dest->getType())) {
+ // onemask = materialize(1,1,...); dst = src & onemask
+ Type DestTy = Dest->getType();
+ Variable *OneMask = makeReg(DestTy);
+ Context.insert(InstFakeDef::create(Func, OneMask));
+ _pxor(OneMask, OneMask);
+ Variable *MinusOne = makeReg(DestTy);
+ Context.insert(InstFakeDef::create(Func, MinusOne));
+ _pcmpeq(MinusOne, MinusOne);
+ _psub(OneMask, MinusOne);
+ Variable *T = makeReg(DestTy);
+ _movp(T, Src0RM);
+ _pand(T, OneMask);
+ _movp(Dest, T);
} else {
// t1 = movzx src; dst = t1
Variable *T = makeReg(Dest->getType());
@@ -1560,14 +1603,31 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
break;
}
case InstCast::Trunc: {
- Operand *Src0 = Inst->getSrc(0);
- if (Src0->getType() == IceType_i64)
- Src0 = loOperand(Src0);
- Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
- // t1 = trunc Src0RM; Dest = t1
- Variable *T = NULL;
- _mov(T, Src0RM);
- _mov(Dest, T);
+ if (isVectorType(Dest->getType())) {
+ // onemask = materialize(1,1,...); dst = src & onemask
+ Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
+ Type Src0Ty = Src0RM->getType();
+ Variable *OneMask = makeReg(Src0Ty);
+ Context.insert(InstFakeDef::create(Func, OneMask));
+ _pxor(OneMask, OneMask);
+ Variable *MinusOne = makeReg(Src0Ty);
+ Context.insert(InstFakeDef::create(Func, MinusOne));
+ _pcmpeq(MinusOne, MinusOne);
+ _psub(OneMask, MinusOne);
+ Variable *T = makeReg(Dest->getType());
+ _movp(T, Src0RM);
+ _pand(T, OneMask);
+ _movp(Dest, T);
+ } else {
+ Operand *Src0 = Inst->getSrc(0);
+ if (Src0->getType() == IceType_i64)
+ Src0 = loOperand(Src0);
+ Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ // t1 = trunc Src0RM; Dest = t1
+ Variable *T = NULL;
+ _mov(T, Src0RM);
+ _mov(Dest, T);
+ }
break;
}
case InstCast::Fptrunc:
@@ -1580,7 +1640,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
break;
}
case InstCast::Fptosi:
- if (Dest->getType() == IceType_i64) {
+ if (isVectorType(Dest->getType())) {
+ assert(Dest->getType() == IceType_v4i32 &&
+ Inst->getSrc(0)->getType() == IceType_v4f32);
+ Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
+ Variable *T = makeReg(Dest->getType());
+ _cvt(T, Src0RM);
Jim Stichnoth 2014/07/14 18:33:12 Looks plausible, I guess, but I'm looking forward
wala 2014/07/14 20:52:22 This is what LLVM does.
+ _movp(Dest, T);
+ } else if (Dest->getType() == IceType_i64) {
// Use a helper for converting floating-point values to 64-bit
// integers. SSE2 appears to have no way to convert from xmm
// registers to something like the edx:eax register pair, and
@@ -1607,7 +1674,15 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
}
break;
case InstCast::Fptoui:
- if (Dest->getType() == IceType_i64 || Dest->getType() == IceType_i32) {
+ if (isVectorType(Dest->getType())) {
+ assert(Dest->getType() == IceType_v4i32 &&
+ Inst->getSrc(0)->getType() == IceType_v4f32);
+ const SizeT MaxSrcs = 1;
+ InstCall *Call = makeHelperCall("__fptoui_v4f32", Dest, MaxSrcs);
Jim Stichnoth 2014/07/14 18:33:12 Here and elsewhere, the helper function will be pa
wala 2014/07/14 20:52:22 Such identifiers are reserved to the implementatio
Jim Stichnoth 2014/07/14 22:23:30 I wouldn't say Subzero will be the *complete* impl
wala 2014/07/15 22:52:21 Done. All helpers have a Sz_ prefix.
+ Call->addArg(Inst->getSrc(0));
+ lowerCall(Call);
+ } else if (Dest->getType() == IceType_i64 ||
+ Dest->getType() == IceType_i32) {
// Use a helper for both x86-32 and x86-64.
split64(Dest);
const SizeT MaxSrcs = 1;
@@ -1634,7 +1709,14 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
}
break;
case InstCast::Sitofp:
- if (Inst->getSrc(0)->getType() == IceType_i64) {
+ if (isVectorType(Dest->getType())) {
+ assert(Dest->getType() == IceType_v4f32 &&
+ Inst->getSrc(0)->getType() == IceType_v4i32);
+ Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
+ Variable *T = makeReg(Dest->getType());
+ _cvt(T, Src0RM);
+ _movp(Dest, T);
+ } else if (Inst->getSrc(0)->getType() == IceType_i64) {
// Use a helper for x86-32.
const SizeT MaxSrcs = 1;
Type DestType = Dest->getType();
@@ -1660,7 +1742,15 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
break;
case InstCast::Uitofp: {
Operand *Src0 = Inst->getSrc(0);
- if (Src0->getType() == IceType_i64 || Src0->getType() == IceType_i32) {
+ if (isVectorType(Src0->getType())) {
+ assert(Dest->getType() == IceType_v4f32 &&
+ Src0->getType() == IceType_v4i32);
+ const SizeT MaxSrcs = 1;
+ InstCall *Call = makeHelperCall("__uitofp_v4i32", Dest, MaxSrcs);
+ Call->addArg(Src0);
+ lowerCall(Call);
+ } else if (Src0->getType() == IceType_i64 ||
+ Src0->getType() == IceType_i32) {
// Use a helper for x86-32 and x86-64. Also use a helper for
// i32 on x86-32.
const SizeT MaxSrcs = 1;
@@ -1699,6 +1789,18 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
switch (Dest->getType()) {
default:
llvm_unreachable("Unexpected Bitcast dest type");
+ case IceType_i8: {
+ assert(Src0->getType() == IceType_v8i1);
+ InstCall *Call = makeHelperCall("__bitcast_v8i1_to_i8", Dest, 1);
+ Call->addArg(Src0);
+ lowerCall(Call);
+ } break;
+ case IceType_i16: {
+ assert(Src0->getType() == IceType_v16i1);
+ InstCall *Call = makeHelperCall("__bitcast_v16i1_to_i16", Dest, 1);
+ Call->addArg(Src0);
+ lowerCall(Call);
+ } break;
case IceType_i32:
case IceType_f32: {
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
@@ -1777,6 +1879,30 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
_store(T_Hi, SpillHi);
_movq(Dest, Spill);
} break;
+ case IceType_v8i1: {
+ assert(Src0->getType() == IceType_i8);
+ InstCall *Call = makeHelperCall("__bitcast_i8_to_v8i1", Dest, 1);
+ Variable *Src0AsI32 = Func->makeVariable(IceType_i32, Context.getNode());
+ // Arguments to functions are required to be at least 32 bits wide.
+ lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
+ Call->addArg(Src0AsI32);
+ lowerCall(Call);
+ } break;
+ case IceType_v16i1: {
+ assert(Src0->getType() == IceType_i16);
+ InstCall *Call = makeHelperCall("__bitcast_i16_to_v16i1", Dest, 1);
+ Variable *Src0AsI32 = Func->makeVariable(IceType_i32, Context.getNode());
+ // Arguments to functions are required to be at least 32 bits wide.
+ lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
+ Call->addArg(Src0AsI32);
+ lowerCall(Call);
+ } break;
+ case IceType_v8i16:
+ case IceType_v16i8:
+ case IceType_v4i32:
+ case IceType_v4f32: {
+ _movp(Dest, legalizeToVar(Src0));
+ } break;
}
break;
}

Powered by Google App Engine
This is Rietveld 408576698