Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index 3808ecbb85cabb7ed570f2c82a6d8cabf93d07a6..71b4c17304486264ecc21d315e8b63f3c3797d5a 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -2261,6 +2261,124 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
Operand *Src1 = legalize(Inst->getSrc(1)); |
Variable *Dest = Inst->getDest(); |
+ if (isVectorType(Dest->getType())) { |
+ Type Ty = Src0->getType(); |
+ // Promote i1 vectors to 128 bit integer vector types. |
+ if (typeElementType(Ty) == IceType_i1) { |
+ Type NewTy = IceType_NUM; |
+ switch (Ty) { |
+ default: |
+ llvm_unreachable("unexpected type"); |
+ break; |
+ case IceType_v4i1: |
+ NewTy = IceType_v4i32; |
+ break; |
+ case IceType_v8i1: |
+ NewTy = IceType_v8i16; |
+ break; |
+ case IceType_v16i1: |
+ NewTy = IceType_v16i8; |
+ break; |
+ } |
+ Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode()); |
+ Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode()); |
+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0)); |
+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1)); |
+ Src0 = NewSrc0; |
+ Src1 = NewSrc1; |
+ Ty = NewTy; |
+ } |
+ |
+ InstIcmp::ICond Condition = Inst->getCondition(); |
+ |
+ // SSE2 only has signed comparison operations. Transform unsigned |
+ // inputs in a manner that allows for the use of signed comparison |
+ // operations by flipping the high order bits. |
+ if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge || |
+ Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) { |
+ Variable *T0 = makeReg(Ty); |
+ Variable *T1 = makeReg(Ty); |
+ Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); |
+ _movp(T0, Src0); |
+ _pxor(T0, HighOrderBits); |
+ _movp(T1, Src1); |
+ _pxor(T1, HighOrderBits); |
+ Src0 = T0; |
+ Src1 = T1; |
+ } |
+ |
+ // TODO: ALIGNHACK: Both operands to compare instructions need to be |
+ // in registers until stack alignment support is implemented. Once |
+ // there is support for stack alignment, LEGAL_HACK can be removed. |
+#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
+ Variable *T = makeReg(Ty); |
+ switch (Condition) { |
+ default: |
+ llvm_unreachable("unexpected condition"); |
+ break; |
+ case InstIcmp::Eq: { |
+ _movp(T, Src0); |
+ _pcmpeq(T, LEGAL_HACK(Src1)); |
+ } break; |
+ case InstIcmp::Ne: { |
+ _movp(T, Src0); |
+ _pcmpeq(T, LEGAL_HACK(Src1)); |
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
+ _pxor(T, MinusOne); |
+ } break; |
+ case InstIcmp::Ugt: |
+ case InstIcmp::Sgt: { |
+ _movp(T, Src0); |
+ _pcmpgt(T, LEGAL_HACK(Src1)); |
+ } break; |
+ case InstIcmp::Uge: |
+ case InstIcmp::Sge: { |
+ // !(Src1 > Src0) |
+ _movp(T, Src1); |
+ _pcmpgt(T, LEGAL_HACK(Src0)); |
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
+ _pxor(T, MinusOne); |
+ } break; |
+ case InstIcmp::Ult: |
+ case InstIcmp::Slt: { |
+ _movp(T, Src1); |
+ _pcmpgt(T, LEGAL_HACK(Src0)); |
+ } break; |
+ case InstIcmp::Ule: |
+ case InstIcmp::Sle: { |
+ // !(Src0 > Src1) |
+ _movp(T, Src0); |
+ _pcmpgt(T, LEGAL_HACK(Src1)); |
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
+ _pxor(T, MinusOne); |
+ } break; |
+ } |
+#undef LEGAL_HACK |
+ |
+ _movp(Dest, T); |
+ |
+ // The following pattern occurs often in lowered C and C++ code: |
+ // |
+ // %cmp = icmp pred <n x ty> %src0, %src1 |
+ // %cmp.ext = sext <n x i1> %cmp to <n x ty> |
+ // |
+ // We can avoid the sext operation by copying the result from pcmpgt |
+ // and pcmpeq, which is already sign extended, to the result of the |
+ // sext operation |
+ if (InstCast *NextCast = |
+ llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
+ if (NextCast->getCastKind() == InstCast::Sext && |
+ NextCast->getSrc(0) == Dest) { |
+ _movp(NextCast->getDest(), T); |
+ // Skip over the instruction. |
+ NextCast->setDeleted(); |
+ Context.advanceNext(); |
+ } |
+ } |
+ |
+ return; |
+ } |
+ |
// If Src1 is an immediate, or known to be a physical register, we can |
// allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
// a physical register. (Actually, either Src0 or Src1 can be chosen for |
@@ -3398,9 +3516,14 @@ void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { |
lowerCall(Call); |
} |
+// There is no support for loading or emitting vector constants, so the |
+// vector values returned from makeVectorOfZeros, makeVectorOfOnes, |
+// etc. are initialized with register operations. |
+// |
+// TODO(wala): Add limited support for vector constants so that |
+// complex initialization in registers is unnecessary. |
+ |
Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
- // There is no support for loading or emitting vector constants, so |
- // this value is initialized using register operations. |
Variable *Reg = makeReg(Ty, RegNum); |
// Insert a FakeDef, since otherwise the live range of Reg might |
// be overestimated. |
@@ -3409,18 +3532,41 @@ Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
return Reg; |
} |
+Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) { |
+ Variable *MinusOnes = makeReg(Ty, RegNum); |
+ // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
+ Context.insert(InstFakeDef::create(Func, MinusOnes)); |
+ _pcmpeq(MinusOnes, MinusOnes); |
+ return MinusOnes; |
+} |
+ |
Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
- // There is no support for loading or emitting vector constants, so |
- // this value is initialized using register operations. |
Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
- Variable *MinusOne = makeReg(Ty); |
- // Insert a FakeDef so the live range of MinusOne is not overestimated. |
- Context.insert(InstFakeDef::create(Func, MinusOne)); |
- _pcmpeq(MinusOne, MinusOne); |
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
_psub(Dest, MinusOne); |
return Dest; |
} |
+Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { |
+ assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || |
+ Ty == IceType_v16i8); |
+ if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { |
+ Variable *Reg = makeVectorOfOnes(Ty, RegNum); |
+ SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; |
+ _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift)); |
+ return Reg; |
+ } else { |
+ // SSE has no left shift operation for vectors of 8 bit integers. |
+ const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
+ Constant *ConstantMask = |
+ Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK); |
+ Variable *Reg = makeReg(Ty, RegNum); |
+ _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
+ _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
+ return Reg; |
+ } |
+} |
+ |
OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, |
Variable *Slot, |
uint32_t Offset) { |