Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(18)

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 412593002: Lower icmp operations between vector values. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Remove unused typedefs. Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-icmp.ll » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceTargetLoweringX8632.cpp
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 3808ecbb85cabb7ed570f2c82a6d8cabf93d07a6..71b4c17304486264ecc21d315e8b63f3c3797d5a 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2261,6 +2261,124 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
Operand *Src1 = legalize(Inst->getSrc(1));
Variable *Dest = Inst->getDest();
+ if (isVectorType(Dest->getType())) {
+ Type Ty = Src0->getType();
+ // Promote i1 vectors to 128 bit integer vector types.
+ if (typeElementType(Ty) == IceType_i1) {
+ Type NewTy = IceType_NUM;
+ switch (Ty) {
+ default:
+ llvm_unreachable("unexpected type");
+ break;
+ case IceType_v4i1:
+ NewTy = IceType_v4i32;
+ break;
+ case IceType_v8i1:
+ NewTy = IceType_v8i16;
+ break;
+ case IceType_v16i1:
+ NewTy = IceType_v16i8;
+ break;
+ }
+ Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode());
+ Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode());
+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
+ Src0 = NewSrc0;
+ Src1 = NewSrc1;
+ Ty = NewTy;
+ }
+
+ InstIcmp::ICond Condition = Inst->getCondition();
+
+ // SSE2 only has signed comparison operations. Transform unsigned
+ // inputs in a manner that allows for the use of signed comparison
+ // operations by flipping the high order bits.
+ if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
+ Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
+ Variable *T0 = makeReg(Ty);
+ Variable *T1 = makeReg(Ty);
+ Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
+ _movp(T0, Src0);
+ _pxor(T0, HighOrderBits);
+ _movp(T1, Src1);
+ _pxor(T1, HighOrderBits);
+ Src0 = T0;
+ Src1 = T1;
+ }
+
+ // TODO: ALIGNHACK: Both operands to compare instructions need to be
+ // in registers until stack alignment support is implemented. Once
+ // there is support for stack alignment, LEGAL_HACK can be removed.
+#define LEGAL_HACK(Vect) legalizeToVar((Vect))
+ Variable *T = makeReg(Ty);
+ switch (Condition) {
+ default:
+ llvm_unreachable("unexpected condition");
+ break;
+ case InstIcmp::Eq: {
+ _movp(T, Src0);
+ _pcmpeq(T, LEGAL_HACK(Src1));
+ } break;
+ case InstIcmp::Ne: {
+ _movp(T, Src0);
+ _pcmpeq(T, LEGAL_HACK(Src1));
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);
+ _pxor(T, MinusOne);
+ } break;
+ case InstIcmp::Ugt:
+ case InstIcmp::Sgt: {
+ _movp(T, Src0);
+ _pcmpgt(T, LEGAL_HACK(Src1));
+ } break;
+ case InstIcmp::Uge:
+ case InstIcmp::Sge: {
+ // !(Src1 > Src0)
+ _movp(T, Src1);
+ _pcmpgt(T, LEGAL_HACK(Src0));
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);
+ _pxor(T, MinusOne);
+ } break;
+ case InstIcmp::Ult:
+ case InstIcmp::Slt: {
+ _movp(T, Src1);
+ _pcmpgt(T, LEGAL_HACK(Src0));
+ } break;
+ case InstIcmp::Ule:
+ case InstIcmp::Sle: {
+ // !(Src0 > Src1)
+ _movp(T, Src0);
+ _pcmpgt(T, LEGAL_HACK(Src1));
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);
+ _pxor(T, MinusOne);
+ } break;
+ }
+#undef LEGAL_HACK
+
+ _movp(Dest, T);
+
+ // The following pattern occurs often in lowered C and C++ code:
+ //
+ // %cmp = icmp pred <n x ty> %src0, %src1
+ // %cmp.ext = sext <n x i1> %cmp to <n x ty>
+ //
+ // We can avoid the sext operation by copying the result from pcmpgt
+ // and pcmpeq, which is already sign extended, to the result of the
+ // sext operation
+ if (InstCast *NextCast =
+ llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
+ if (NextCast->getCastKind() == InstCast::Sext &&
+ NextCast->getSrc(0) == Dest) {
+ _movp(NextCast->getDest(), T);
+ // Skip over the instruction.
+ NextCast->setDeleted();
+ Context.advanceNext();
+ }
+ }
+
+ return;
+ }
+
// If Src1 is an immediate, or known to be a physical register, we can
// allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
// a physical register. (Actually, either Src0 or Src1 can be chosen for
@@ -3398,9 +3516,14 @@ void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
lowerCall(Call);
}
+// There is no support for loading or emitting vector constants, so the
+// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
+// etc. are initialized with register operations.
+//
+// TODO(wala): Add limited support for vector constants so that
+// complex initialization in registers is unnecessary.
+
Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
- // There is no support for loading or emitting vector constants, so
- // this value is initialized using register operations.
Variable *Reg = makeReg(Ty, RegNum);
// Insert a FakeDef, since otherwise the live range of Reg might
// be overestimated.
@@ -3409,18 +3532,41 @@ Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
return Reg;
}
+Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
+ Variable *MinusOnes = makeReg(Ty, RegNum);
+ // Insert a FakeDef so the live range of MinusOnes is not overestimated.
+ Context.insert(InstFakeDef::create(Func, MinusOnes));
+ _pcmpeq(MinusOnes, MinusOnes);
+ return MinusOnes;
+}
+
Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
- // There is no support for loading or emitting vector constants, so
- // this value is initialized using register operations.
Variable *Dest = makeVectorOfZeros(Ty, RegNum);
- Variable *MinusOne = makeReg(Ty);
- // Insert a FakeDef so the live range of MinusOne is not overestimated.
- Context.insert(InstFakeDef::create(Func, MinusOne));
- _pcmpeq(MinusOne, MinusOne);
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);
_psub(Dest, MinusOne);
return Dest;
}
+Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
+ assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
+ Ty == IceType_v16i8);
+ if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
+ Variable *Reg = makeVectorOfOnes(Ty, RegNum);
+ SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
+ _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift));
+ return Reg;
+ } else {
+ // SSE has no left shift operation for vectors of 8 bit integers.
+ const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
+ Constant *ConstantMask =
+ Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK);
+ Variable *Reg = makeReg(Ty, RegNum);
+ _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
+ _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
+ return Reg;
+ }
+}
+
OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
Variable *Slot,
uint32_t Offset) {
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-icmp.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698