Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(908)

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 412593002: Lower icmp operations between vector values. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Pass -filetype=obj to llvm-mc. Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/IceTargetLoweringX8632.cpp
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 3808ecbb85cabb7ed570f2c82a6d8cabf93d07a6..025e16b4e8ced7253e1531281d48bea6a9cb7d1b 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -172,7 +172,7 @@ void xMacroIntegrityCheck() {
_num
};
// Define a set of constants based on high-level table entries.
-#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
+#define X(tag, str, isunsigned) static const int _table1_##tag = InstIcmp::tag;
ICEINSTICMP_TABLE;
#undef X
// Define a set of constants based on low-level table entries,
@@ -184,7 +184,7 @@ void xMacroIntegrityCheck() {
#undef X
// Repeat the static asserts with respect to the high-level
// table entries in case the high-level table has extra entries.
-#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
+#define X(tag, str, isunsigned) STATIC_ASSERT(_table1_##tag == _table2_##tag);
ICEINSTICMP_TABLE;
#undef X
}
@@ -2261,83 +2261,198 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
Operand *Src1 = legalize(Inst->getSrc(1));
Variable *Dest = Inst->getDest();
- // If Src1 is an immediate, or known to be a physical register, we can
- // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
- // a physical register. (Actually, either Src0 or Src1 can be chosen for
- // the physical register, but unfortunately we have to commit to one or
- // the other before register allocation.)
- bool IsSrc1ImmOrReg = false;
- if (llvm::isa<Constant>(Src1)) {
- IsSrc1ImmOrReg = true;
- } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
- if (Var->hasReg())
+ if (isVectorType(Dest->getType())) {
+ Type Ty = Src0->getType();
+ // Promote i1 vectors to 128 bit integer vector types.
+ if (typeElementType(Ty) == IceType_i1) {
+ Type NewTy = IceType_NUM;
+ switch (Ty) {
+ default:
+ llvm_unreachable("unexpected type");
+ break;
+ case IceType_v4i1:
+ NewTy = IceType_v4i32;
+ break;
+ case IceType_v8i1:
+ NewTy = IceType_v8i16;
+ break;
+ case IceType_v16i1:
+ NewTy = IceType_v16i8;
+ break;
+ }
+ Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode());
+ Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode());
+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
+ Src0 = NewSrc0;
+ Src1 = NewSrc1;
+ Ty = NewTy;
+ }
+
+ // SSE2 only has signed comparison operations. Transform unsigned
+ // inputs in a manner that allows for the use of signed comparison
+ // operations by flipping the high order bits.
+ if (Inst->isUnsigned()) {
Jim Stichnoth 2014/07/23 17:29:28 I have a minor problem and a major problem with is
wala 2014/07/23 20:40:36 Done. Removed the unsigned attribute.
+ Variable *T0 = makeReg(Ty);
+ Variable *T1 = makeReg(Ty);
+ Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
+ _movp(T0, Src0);
+ _pxor(T0, HighOrderBits);
+ _movp(T1, Src1);
+ _pxor(T1, HighOrderBits);
+ Src0 = T0;
+ Src1 = T1;
+ }
+
+ // TODO: ALIGNHACK: Both operands to compare instructions need to be
+ // in registers until stack alignment support is implemented. Once
+ // there is support for stack alignment, LEGAL_HACK can be removed.
+#define LEGAL_HACK(Vect) legalizeToVar((Vect))
+ Variable *T = makeReg(Ty);
+ switch (Inst->getCondition()) {
+ default:
+ llvm_unreachable("unexpected condition");
+ break;
+ case InstIcmp::Eq: {
+ _movp(T, Src0);
+ _pcmpeq(T, LEGAL_HACK(Src1));
+ } break;
+ case InstIcmp::Ne: {
+ _movp(T, Src0);
+ _pcmpeq(T, LEGAL_HACK(Src1));
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);
+ _pxor(T, MinusOne);
+ } break;
+ case InstIcmp::Ugt:
+ case InstIcmp::Sgt: {
+ _movp(T, Src0);
+ _pcmpgt(T, LEGAL_HACK(Src1));
+ } break;
+ case InstIcmp::Uge:
+ case InstIcmp::Sge: {
+ // !(Src1 > Src0)
+ _movp(T, Src1);
+ _pcmpgt(T, LEGAL_HACK(Src0));
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);
+ _pxor(T, MinusOne);
+ } break;
+ case InstIcmp::Ult:
+ case InstIcmp::Slt: {
+ _movp(T, Src1);
+ _pcmpgt(T, LEGAL_HACK(Src0));
+ } break;
+ case InstIcmp::Ule:
+ case InstIcmp::Sle: {
+ // !(Src0 > Src1)
+ _movp(T, Src0);
+ _pcmpgt(T, LEGAL_HACK(Src1));
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);
+ _pxor(T, MinusOne);
+ } break;
+ }
+#undef LEGAL_HACK
+
+ _movp(Dest, T);
+
+ // The following pattern occurs often in lowered C and C++ code:
+ //
+ // %cmp = icmp pred <n x ty> %src0, %src1
+ // %cmp.ext = sext <n x i1> %cmp to <n x ty>
+ //
+ // We can avoid the sext operation by copying the result from pcmpgt
+ // and pcmpeq, which is already sign extended, to the result of the
+ // sext operation
+ if (InstCast *NextCast =
+ llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
+ if (NextCast->getCastKind() == InstCast::Sext &&
+ NextCast->getSrc(0) == Dest) {
+ _movp(NextCast->getDest(), T);
+ // Skip over the instruction.
+ NextCast->setDeleted();
+ Context.advanceNext();
+ }
+ }
jvoung (off chromium) 2014/07/23 18:54:55 could this just return; and then the scalar versi
wala 2014/07/23 20:40:36 Done.
+ } else {
+ // If Src1 is an immediate, or known to be a physical register, we can
+ // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
+ // a physical register. (Actually, either Src0 or Src1 can be chosen for
+ // the physical register, but unfortunately we have to commit to one or
+ // the other before register allocation.)
+ bool IsSrc1ImmOrReg = false;
+ if (llvm::isa<Constant>(Src1)) {
IsSrc1ImmOrReg = true;
- }
+ } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
+ if (Var->hasReg())
+ IsSrc1ImmOrReg = true;
+ }
+
+ // Try to fuse a compare immediately followed by a conditional branch. This
+ // is possible when the compare dest and the branch source operands are the
+ // same, and are their only uses. TODO: implement this optimization for
+ // i64.
+ if (InstBr *NextBr =
+ llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
+ if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
+ Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
+ Operand *Src0New =
+ legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
+ _cmp(Src0New, Src1);
+ _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
+ NextBr->getTargetFalse());
+ // Skip over the following branch instruction.
+ NextBr->setDeleted();
+ Context.advanceNext();
+ return;
+ }
+ }
- // Try to fuse a compare immediately followed by a conditional branch. This
- // is possible when the compare dest and the branch source operands are the
- // same, and are their only uses. TODO: implement this optimization for i64.
- if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
- if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
- Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
- Operand *Src0New =
- legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
- _cmp(Src0New, Src1);
- _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
- NextBr->getTargetFalse());
- // Skip over the following branch instruction.
- NextBr->setDeleted();
- Context.advanceNext();
+ // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
+ Constant *Zero = Ctx->getConstantZero(IceType_i32);
+ Constant *One = Ctx->getConstantInt(IceType_i32, 1);
+ if (Src0->getType() == IceType_i64) {
+ InstIcmp::ICond Condition = Inst->getCondition();
+ size_t Index = static_cast<size_t>(Condition);
+ assert(Index < TableIcmp64Size);
+ Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
+ Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
+ if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
+ InstX8632Label *Label = InstX8632Label::create(Func, this);
+ _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
+ _cmp(loOperand(Src0), Src1LoRI);
+ _br(InstX8632Br::Br_ne, Label);
+ _cmp(hiOperand(Src0), Src1HiRI);
+ _br(InstX8632Br::Br_ne, Label);
+ Context.insert(InstFakeUse::create(Func, Dest));
+ _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
+ Context.insert(Label);
+ } else {
+ InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
+ InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
+ _mov(Dest, One);
+ _cmp(hiOperand(Src0), Src1HiRI);
+ _br(TableIcmp64[Index].C1, LabelTrue);
+ _br(TableIcmp64[Index].C2, LabelFalse);
+ _cmp(loOperand(Src0), Src1LoRI);
+ _br(TableIcmp64[Index].C3, LabelTrue);
+ Context.insert(LabelFalse);
+ Context.insert(InstFakeUse::create(Func, Dest));
+ _mov(Dest, Zero);
+ Context.insert(LabelTrue);
+ }
return;
}
- }
- // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- Constant *One = Ctx->getConstantInt(IceType_i32, 1);
- if (Src0->getType() == IceType_i64) {
- InstIcmp::ICond Condition = Inst->getCondition();
- size_t Index = static_cast<size_t>(Condition);
- assert(Index < TableIcmp64Size);
- Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
- Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
- if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
- InstX8632Label *Label = InstX8632Label::create(Func, this);
- _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
- _cmp(loOperand(Src0), Src1LoRI);
- _br(InstX8632Br::Br_ne, Label);
- _cmp(hiOperand(Src0), Src1HiRI);
- _br(InstX8632Br::Br_ne, Label);
- Context.insert(InstFakeUse::create(Func, Dest));
- _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
- Context.insert(Label);
- } else {
- InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
- InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
- _mov(Dest, One);
- _cmp(hiOperand(Src0), Src1HiRI);
- _br(TableIcmp64[Index].C1, LabelTrue);
- _br(TableIcmp64[Index].C2, LabelFalse);
- _cmp(loOperand(Src0), Src1LoRI);
- _br(TableIcmp64[Index].C3, LabelTrue);
- Context.insert(LabelFalse);
- Context.insert(InstFakeUse::create(Func, Dest));
- _mov(Dest, Zero);
- Context.insert(LabelTrue);
- }
- return;
+ // cmp b, c
+ Operand *Src0New =
+ legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
+ InstX8632Label *Label = InstX8632Label::create(Func, this);
+ _cmp(Src0New, Src1);
+ _mov(Dest, One);
+ _br(getIcmp32Mapping(Inst->getCondition()), Label);
+ Context.insert(InstFakeUse::create(Func, Dest));
+ _mov(Dest, Zero);
+ Context.insert(Label);
}
-
- // cmp b, c
- Operand *Src0New =
- legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
- InstX8632Label *Label = InstX8632Label::create(Func, this);
- _cmp(Src0New, Src1);
- _mov(Dest, One);
- _br(getIcmp32Mapping(Inst->getCondition()), Label);
- Context.insert(InstFakeUse::create(Func, Dest));
- _mov(Dest, Zero);
- Context.insert(Label);
}
void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
@@ -3398,9 +3513,14 @@ void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
lowerCall(Call);
}
+// There is no support for loading or emitting vector constants, so the
+// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
+// etc. are initialized with register operations.
+//
+// TODO(wala): Add limited support for vector constants so that
+// complex initialization in registers is unnecessary.
+
Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
- // There is no support for loading or emitting vector constants, so
- // this value is initialized using register operations.
Variable *Reg = makeReg(Ty, RegNum);
// Insert a FakeDef, since otherwise the live range of Reg might
// be overestimated.
@@ -3409,18 +3529,41 @@ Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
return Reg;
}
+Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
+ Variable *MinusOnes = makeReg(Ty, RegNum);
+ // Insert a FakeDef so the live range of MinusOnes is not overestimated.
+ Context.insert(InstFakeDef::create(Func, MinusOnes));
+ _pcmpeq(MinusOnes, MinusOnes);
+ return MinusOnes;
+}
+
Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
- // There is no support for loading or emitting vector constants, so
- // this value is initialized using register operations.
Variable *Dest = makeVectorOfZeros(Ty, RegNum);
- Variable *MinusOne = makeReg(Ty);
- // Insert a FakeDef so the live range of MinusOne is not overestimated.
- Context.insert(InstFakeDef::create(Func, MinusOne));
- _pcmpeq(MinusOne, MinusOne);
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty);
_psub(Dest, MinusOne);
return Dest;
}
+Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
+ assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
+ Ty == IceType_v16i8);
+ if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
+ Variable *Reg = makeVectorOfOnes(Ty, RegNum);
+ SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
+ _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift));
+ return Reg;
+ } else {
+ // SSE has no left shift operation for vectors of 8 bit integers.
+ const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
+ Constant *ConstantMask =
+ Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK);
+ Variable *Reg = makeReg(Ty, RegNum);
+ _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
+ _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
+ return Reg;
+ }
+}
+
OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
Variable *Slot,
uint32_t Offset) {

Powered by Google App Engine
This is Rietveld 408576698