src/IceTargetLoweringX8632.cpp - Issue 412593002: Lower icmp operations between vector values.

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 412593002: Lower icmp operations between vector values. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Remove unused typedefs. Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringX8632.cpp

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp

index 3808ecbb85cabb7ed570f2c82a6d8cabf93d07a6..71b4c17304486264ecc21d315e8b63f3c3797d5a 100644

--- a/src/IceTargetLoweringX8632.cpp

+++ b/src/IceTargetLoweringX8632.cpp

@@ -2261,6 +2261,124 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

Operand *Src1 = legalize(Inst->getSrc(1));

Variable *Dest = Inst->getDest();

+ if (isVectorType(Dest->getType())) {

+ Type Ty = Src0->getType();

+ // Promote i1 vectors to 128 bit integer vector types.

+ if (typeElementType(Ty) == IceType_i1) {

+ Type NewTy = IceType_NUM;

+ switch (Ty) {

+ default:

+ llvm_unreachable("unexpected type");

+ break;

+ case IceType_v4i1:

+ NewTy = IceType_v4i32;

+ break;

+ case IceType_v8i1:

+ NewTy = IceType_v8i16;

+ break;

+ case IceType_v16i1:

+ NewTy = IceType_v16i8;

+ break;

+ }

+ Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode());

+ Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode());

+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));

+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));

+ Src0 = NewSrc0;

+ Src1 = NewSrc1;

+ Ty = NewTy;

+ }

+ InstIcmp::ICond Condition = Inst->getCondition();

+ // SSE2 only has signed comparison operations. Transform unsigned

+ // inputs in a manner that allows for the use of signed comparison

+ // operations by flipping the high order bits.

+ if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||

+ Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {

+ Variable *T0 = makeReg(Ty);

+ Variable *T1 = makeReg(Ty);

+ Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);

+ _movp(T0, Src0);

+ _pxor(T0, HighOrderBits);

+ _movp(T1, Src1);

+ _pxor(T1, HighOrderBits);

+ Src0 = T0;

+ Src1 = T1;

+ }

+ // TODO: ALIGNHACK: Both operands to compare instructions need to be

+ // in registers until stack alignment support is implemented. Once

+ // there is support for stack alignment, LEGAL_HACK can be removed.

+#define LEGAL_HACK(Vect) legalizeToVar((Vect))

+ Variable *T = makeReg(Ty);

+ switch (Condition) {

+ default:

+ llvm_unreachable("unexpected condition");

+ break;

+ case InstIcmp::Eq: {

+ _movp(T, Src0);

+ _pcmpeq(T, LEGAL_HACK(Src1));

+ } break;

+ case InstIcmp::Ne: {

+ _movp(T, Src0);

+ _pcmpeq(T, LEGAL_HACK(Src1));

+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);

+ _pxor(T, MinusOne);

+ } break;

+ case InstIcmp::Ugt:

+ case InstIcmp::Sgt: {

+ _movp(T, Src0);

+ _pcmpgt(T, LEGAL_HACK(Src1));

+ } break;

+ case InstIcmp::Uge:

+ case InstIcmp::Sge: {

+ // !(Src1 > Src0)

+ _movp(T, Src1);

+ _pcmpgt(T, LEGAL_HACK(Src0));

+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);

+ _pxor(T, MinusOne);

+ } break;

+ case InstIcmp::Ult:

+ case InstIcmp::Slt: {

+ _movp(T, Src1);

+ _pcmpgt(T, LEGAL_HACK(Src0));

+ } break;

+ case InstIcmp::Ule:

+ case InstIcmp::Sle: {

+ // !(Src0 > Src1)

+ _movp(T, Src0);

+ _pcmpgt(T, LEGAL_HACK(Src1));

+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);

+ _pxor(T, MinusOne);

+ } break;

+ }

+#undef LEGAL_HACK

+ _movp(Dest, T);

+ // The following pattern occurs often in lowered C and C++ code:

+ //

+ // %cmp = icmp pred <n x ty> %src0, %src1

+ // %cmp.ext = sext <n x i1> %cmp to <n x ty>

+ //

+ // We can avoid the sext operation by copying the result from pcmpgt

+ // and pcmpeq, which is already sign extended, to the result of the

+ // sext operation

+ if (InstCast *NextCast =

+ llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {

+ if (NextCast->getCastKind() == InstCast::Sext &&

+ NextCast->getSrc(0) == Dest) {

+ _movp(NextCast->getDest(), T);

+ // Skip over the instruction.

+ NextCast->setDeleted();

+ Context.advanceNext();

+ }

+ return;

+ }

// If Src1 is an immediate, or known to be a physical register, we can

// allow Src0 to be a memory operand. Otherwise, Src0 must be copied into

// a physical register. (Actually, either Src0 or Src1 can be chosen for

@@ -3398,9 +3516,14 @@ void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {

lowerCall(Call);

}

+// There is no support for loading or emitting vector constants, so the

+// vector values returned from makeVectorOfZeros, makeVectorOfOnes,

+// etc. are initialized with register operations.

+//

+// TODO(wala): Add limited support for vector constants so that

+// complex initialization in registers is unnecessary.

Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {

- // There is no support for loading or emitting vector constants, so

- // this value is initialized using register operations.

Variable *Reg = makeReg(Ty, RegNum);

// Insert a FakeDef, since otherwise the live range of Reg might

// be overestimated.

@@ -3409,18 +3532,41 @@ Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {

return Reg;

}

+Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {

+ Variable *MinusOnes = makeReg(Ty, RegNum);

+ // Insert a FakeDef so the live range of MinusOnes is not overestimated.

+ Context.insert(InstFakeDef::create(Func, MinusOnes));

+ _pcmpeq(MinusOnes, MinusOnes);

+ return MinusOnes;

Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {

- // There is no support for loading or emitting vector constants, so

- // this value is initialized using register operations.

Variable *Dest = makeVectorOfZeros(Ty, RegNum);

- Variable *MinusOne = makeReg(Ty);

- // Insert a FakeDef so the live range of MinusOne is not overestimated.

- Context.insert(InstFakeDef::create(Func, MinusOne));

- _pcmpeq(MinusOne, MinusOne);

+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);

_psub(Dest, MinusOne);

return Dest;

}

+Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {

+ assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||

+ Ty == IceType_v16i8);

+ if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {

+ Variable *Reg = makeVectorOfOnes(Ty, RegNum);

+ SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;

+ _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift));

+ return Reg;

+ } else {

+ // SSE has no left shift operation for vectors of 8 bit integers.

+ const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;

+ Constant *ConstantMask =

+ Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK);

+ Variable *Reg = makeReg(Ty, RegNum);

+ _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));

+ _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));

+ return Reg;

+ }

OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,

Variable *Slot,

uint32_t Offset) {

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-icmp.ll » ('j') | no next file with comments »