src/IceTargetLoweringX8632.cpp - Issue 412593002: Lower icmp operations between vector values.

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 412593002: Lower icmp operations between vector values. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Pass -filetype=obj to llvm-mc. Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringX8632.cpp

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp

index 3808ecbb85cabb7ed570f2c82a6d8cabf93d07a6..025e16b4e8ced7253e1531281d48bea6a9cb7d1b 100644

--- a/src/IceTargetLoweringX8632.cpp

+++ b/src/IceTargetLoweringX8632.cpp

@@ -172,7 +172,7 @@ void xMacroIntegrityCheck() {

_num

};

// Define a set of constants based on high-level table entries.

-#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;

+#define X(tag, str, isunsigned) static const int _table1_##tag = InstIcmp::tag;

ICEINSTICMP_TABLE;

#undef X

// Define a set of constants based on low-level table entries,

@@ -184,7 +184,7 @@ void xMacroIntegrityCheck() {

#undef X

// Repeat the static asserts with respect to the high-level

// table entries in case the high-level table has extra entries.

-#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);

+#define X(tag, str, isunsigned) STATIC_ASSERT(_table1_##tag == _table2_##tag);

ICEINSTICMP_TABLE;

#undef X

}

@@ -2261,83 +2261,198 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

Operand *Src1 = legalize(Inst->getSrc(1));

Variable *Dest = Inst->getDest();

- // If Src1 is an immediate, or known to be a physical register, we can

- // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into

- // a physical register. (Actually, either Src0 or Src1 can be chosen for

- // the physical register, but unfortunately we have to commit to one or

- // the other before register allocation.)

- bool IsSrc1ImmOrReg = false;

- if (llvm::isa<Constant>(Src1)) {

- IsSrc1ImmOrReg = true;

- } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {

- if (Var->hasReg())

+ if (isVectorType(Dest->getType())) {

+ Type Ty = Src0->getType();

+ // Promote i1 vectors to 128 bit integer vector types.

+ if (typeElementType(Ty) == IceType_i1) {

+ Type NewTy = IceType_NUM;

+ switch (Ty) {

+ default:

+ llvm_unreachable("unexpected type");

+ break;

+ case IceType_v4i1:

+ NewTy = IceType_v4i32;

+ break;

+ case IceType_v8i1:

+ NewTy = IceType_v8i16;

+ break;

+ case IceType_v16i1:

+ NewTy = IceType_v16i8;

+ break;

+ }

+ Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode());

+ Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode());

+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));

+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));

+ Src0 = NewSrc0;

+ Src1 = NewSrc1;

+ Ty = NewTy;

+ }

+ // SSE2 only has signed comparison operations. Transform unsigned

+ // inputs in a manner that allows for the use of signed comparison

+ // operations by flipping the high order bits.

+ if (Inst->isUnsigned()) {

Jim Stichnoth 2014/07/23 17:29:28 I have a minor problem and a major problem with is

wala 2014/07/23 20:40:36 Done. Removed the unsigned attribute.

+ Variable *T0 = makeReg(Ty);

+ Variable *T1 = makeReg(Ty);

+ Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);

+ _movp(T0, Src0);

+ _pxor(T0, HighOrderBits);

+ _movp(T1, Src1);

+ _pxor(T1, HighOrderBits);

+ Src0 = T0;

+ Src1 = T1;

+ }

+ // TODO: ALIGNHACK: Both operands to compare instructions need to be

+ // in registers until stack alignment support is implemented. Once

+ // there is support for stack alignment, LEGAL_HACK can be removed.

+#define LEGAL_HACK(Vect) legalizeToVar((Vect))

+ Variable *T = makeReg(Ty);

+ switch (Inst->getCondition()) {

+ default:

+ llvm_unreachable("unexpected condition");

+ break;

+ case InstIcmp::Eq: {

+ _movp(T, Src0);

+ _pcmpeq(T, LEGAL_HACK(Src1));

+ } break;

+ case InstIcmp::Ne: {

+ _movp(T, Src0);

+ _pcmpeq(T, LEGAL_HACK(Src1));

+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);

+ _pxor(T, MinusOne);

+ } break;

+ case InstIcmp::Ugt:

+ case InstIcmp::Sgt: {

+ _movp(T, Src0);

+ _pcmpgt(T, LEGAL_HACK(Src1));

+ } break;

+ case InstIcmp::Uge:

+ case InstIcmp::Sge: {

+ // !(Src1 > Src0)

+ _movp(T, Src1);

+ _pcmpgt(T, LEGAL_HACK(Src0));

+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);

+ _pxor(T, MinusOne);

+ } break;

+ case InstIcmp::Ult:

+ case InstIcmp::Slt: {

+ _movp(T, Src1);

+ _pcmpgt(T, LEGAL_HACK(Src0));

+ } break;

+ case InstIcmp::Ule:

+ case InstIcmp::Sle: {

+ // !(Src0 > Src1)

+ _movp(T, Src0);

+ _pcmpgt(T, LEGAL_HACK(Src1));

+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);

+ _pxor(T, MinusOne);

+ } break;

+ }

+#undef LEGAL_HACK

+ _movp(Dest, T);

+ // The following pattern occurs often in lowered C and C++ code:

+ //

+ // %cmp = icmp pred <n x ty> %src0, %src1

+ // %cmp.ext = sext <n x i1> %cmp to <n x ty>

+ //

+ // We can avoid the sext operation by copying the result from pcmpgt

+ // and pcmpeq, which is already sign extended, to the result of the

+ // sext operation

+ if (InstCast *NextCast =

+ llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {

+ if (NextCast->getCastKind() == InstCast::Sext &&

+ NextCast->getSrc(0) == Dest) {

+ _movp(NextCast->getDest(), T);

+ // Skip over the instruction.

+ NextCast->setDeleted();

+ Context.advanceNext();

+ }

jvoung (off chromium) 2014/07/23 18:54:55 could this just return; and then the scalar versi

wala 2014/07/23 20:40:36 Done.

+ } else {

+ // If Src1 is an immediate, or known to be a physical register, we can

+ // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into

+ // a physical register. (Actually, either Src0 or Src1 can be chosen for

+ // the physical register, but unfortunately we have to commit to one or

+ // the other before register allocation.)

+ bool IsSrc1ImmOrReg = false;

+ if (llvm::isa<Constant>(Src1)) {

IsSrc1ImmOrReg = true;

- }

+ } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {

+ if (Var->hasReg())

+ IsSrc1ImmOrReg = true;

+ }

+ // Try to fuse a compare immediately followed by a conditional branch. This

+ // is possible when the compare dest and the branch source operands are the

+ // same, and are their only uses. TODO: implement this optimization for

+ // i64.

+ if (InstBr *NextBr =

+ llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {

+ if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&

+ Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {

+ Operand *Src0New =

+ legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);

+ _cmp(Src0New, Src1);

+ _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),

+ NextBr->getTargetFalse());

+ // Skip over the following branch instruction.

+ NextBr->setDeleted();

+ Context.advanceNext();

+ return;

+ }

- // Try to fuse a compare immediately followed by a conditional branch. This

- // is possible when the compare dest and the branch source operands are the

- // same, and are their only uses. TODO: implement this optimization for i64.

- if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {

- if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&

- Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {

- Operand *Src0New =

- legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);

- _cmp(Src0New, Src1);

- _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),

- NextBr->getTargetFalse());

- // Skip over the following branch instruction.

- NextBr->setDeleted();

- Context.advanceNext();

+ // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:

+ Constant *Zero = Ctx->getConstantZero(IceType_i32);

+ Constant *One = Ctx->getConstantInt(IceType_i32, 1);

+ if (Src0->getType() == IceType_i64) {

+ InstIcmp::ICond Condition = Inst->getCondition();

+ size_t Index = static_cast<size_t>(Condition);

+ assert(Index < TableIcmp64Size);

+ Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);

+ Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);

+ if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {

+ InstX8632Label *Label = InstX8632Label::create(Func, this);

+ _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));

+ _cmp(loOperand(Src0), Src1LoRI);

+ _br(InstX8632Br::Br_ne, Label);

+ _cmp(hiOperand(Src0), Src1HiRI);

+ _br(InstX8632Br::Br_ne, Label);

+ Context.insert(InstFakeUse::create(Func, Dest));

+ _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));

+ Context.insert(Label);

+ } else {

+ InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);

+ InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);

+ _mov(Dest, One);

+ _cmp(hiOperand(Src0), Src1HiRI);

+ _br(TableIcmp64[Index].C1, LabelTrue);

+ _br(TableIcmp64[Index].C2, LabelFalse);

+ _cmp(loOperand(Src0), Src1LoRI);

+ _br(TableIcmp64[Index].C3, LabelTrue);

+ Context.insert(LabelFalse);

+ Context.insert(InstFakeUse::create(Func, Dest));

+ _mov(Dest, Zero);

+ Context.insert(LabelTrue);

+ }

return;

}

- }

- // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:

- Constant *Zero = Ctx->getConstantZero(IceType_i32);

- Constant *One = Ctx->getConstantInt(IceType_i32, 1);

- if (Src0->getType() == IceType_i64) {

- InstIcmp::ICond Condition = Inst->getCondition();

- size_t Index = static_cast<size_t>(Condition);

- assert(Index < TableIcmp64Size);

- Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);

- Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);

- if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {

- InstX8632Label *Label = InstX8632Label::create(Func, this);

- _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));

- _cmp(loOperand(Src0), Src1LoRI);

- _br(InstX8632Br::Br_ne, Label);

- _cmp(hiOperand(Src0), Src1HiRI);

- _br(InstX8632Br::Br_ne, Label);

- Context.insert(InstFakeUse::create(Func, Dest));

- _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));

- Context.insert(Label);

- } else {

- InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);

- InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);

- _mov(Dest, One);

- _cmp(hiOperand(Src0), Src1HiRI);

- _br(TableIcmp64[Index].C1, LabelTrue);

- _br(TableIcmp64[Index].C2, LabelFalse);

- _cmp(loOperand(Src0), Src1LoRI);

- _br(TableIcmp64[Index].C3, LabelTrue);

- Context.insert(LabelFalse);

- Context.insert(InstFakeUse::create(Func, Dest));

- _mov(Dest, Zero);

- Context.insert(LabelTrue);

- }

- return;

+ // cmp b, c

+ Operand *Src0New =

+ legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);

+ InstX8632Label *Label = InstX8632Label::create(Func, this);

+ _cmp(Src0New, Src1);

+ _mov(Dest, One);

+ _br(getIcmp32Mapping(Inst->getCondition()), Label);

+ Context.insert(InstFakeUse::create(Func, Dest));

+ _mov(Dest, Zero);

+ Context.insert(Label);

}

- // cmp b, c

- Operand *Src0New =

- legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);

- InstX8632Label *Label = InstX8632Label::create(Func, this);

- _cmp(Src0New, Src1);

- _mov(Dest, One);

- _br(getIcmp32Mapping(Inst->getCondition()), Label);

- Context.insert(InstFakeUse::create(Func, Dest));

- _mov(Dest, Zero);

- Context.insert(Label);

}

void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {

@@ -3398,9 +3513,14 @@ void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {

lowerCall(Call);

}

+// There is no support for loading or emitting vector constants, so the

+// vector values returned from makeVectorOfZeros, makeVectorOfOnes,

+// etc. are initialized with register operations.

+//

+// TODO(wala): Add limited support for vector constants so that

+// complex initialization in registers is unnecessary.

Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {

- // There is no support for loading or emitting vector constants, so

- // this value is initialized using register operations.

Variable *Reg = makeReg(Ty, RegNum);

// Insert a FakeDef, since otherwise the live range of Reg might

// be overestimated.

@@ -3409,18 +3529,41 @@ Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {

return Reg;

}

+Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {

+ Variable *MinusOnes = makeReg(Ty, RegNum);

+ // Insert a FakeDef so the live range of MinusOnes is not overestimated.

+ Context.insert(InstFakeDef::create(Func, MinusOnes));

+ _pcmpeq(MinusOnes, MinusOnes);

+ return MinusOnes;

Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {

- // There is no support for loading or emitting vector constants, so

- // this value is initialized using register operations.

Variable *Dest = makeVectorOfZeros(Ty, RegNum);

- Variable *MinusOne = makeReg(Ty);

- // Insert a FakeDef so the live range of MinusOne is not overestimated.

- Context.insert(InstFakeDef::create(Func, MinusOne));

- _pcmpeq(MinusOne, MinusOne);

+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);

_psub(Dest, MinusOne);

return Dest;

}

+Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {

+ assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||

+ Ty == IceType_v16i8);

+ if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {

+ Variable *Reg = makeVectorOfOnes(Ty, RegNum);

+ SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;

+ _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift));

+ return Reg;

+ } else {

+ // SSE has no left shift operation for vectors of 8 bit integers.

+ const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;

+ Constant *ConstantMask =

+ Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK);

+ Variable *Reg = makeReg(Ty, RegNum);

+ _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));

+ _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));

+ return Reg;

+ }

OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,

Variable *Slot,

uint32_t Offset) {

« src/IceInst.def ('K') | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-icmp.ll » ('j') | tests_lit/llvm2ice_tests/vector-icmp.ll » ('J')