Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index 3808ecbb85cabb7ed570f2c82a6d8cabf93d07a6..025e16b4e8ced7253e1531281d48bea6a9cb7d1b 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -172,7 +172,7 @@ void xMacroIntegrityCheck() { |
_num |
}; |
// Define a set of constants based on high-level table entries. |
-#define X(tag, str) static const int _table1_##tag = InstIcmp::tag; |
+#define X(tag, str, isunsigned) static const int _table1_##tag = InstIcmp::tag; |
ICEINSTICMP_TABLE; |
#undef X |
// Define a set of constants based on low-level table entries, |
@@ -184,7 +184,7 @@ void xMacroIntegrityCheck() { |
#undef X |
// Repeat the static asserts with respect to the high-level |
// table entries in case the high-level table has extra entries. |
-#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); |
+#define X(tag, str, isunsigned) STATIC_ASSERT(_table1_##tag == _table2_##tag); |
ICEINSTICMP_TABLE; |
#undef X |
} |
@@ -2261,83 +2261,198 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
Operand *Src1 = legalize(Inst->getSrc(1)); |
Variable *Dest = Inst->getDest(); |
- // If Src1 is an immediate, or known to be a physical register, we can |
- // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
- // a physical register. (Actually, either Src0 or Src1 can be chosen for |
- // the physical register, but unfortunately we have to commit to one or |
- // the other before register allocation.) |
- bool IsSrc1ImmOrReg = false; |
- if (llvm::isa<Constant>(Src1)) { |
- IsSrc1ImmOrReg = true; |
- } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
- if (Var->hasReg()) |
+ if (isVectorType(Dest->getType())) { |
+ Type Ty = Src0->getType(); |
+ // Promote i1 vectors to 128 bit integer vector types. |
+ if (typeElementType(Ty) == IceType_i1) { |
+ Type NewTy = IceType_NUM; |
+ switch (Ty) { |
+ default: |
+ llvm_unreachable("unexpected type"); |
+ break; |
+ case IceType_v4i1: |
+ NewTy = IceType_v4i32; |
+ break; |
+ case IceType_v8i1: |
+ NewTy = IceType_v8i16; |
+ break; |
+ case IceType_v16i1: |
+ NewTy = IceType_v16i8; |
+ break; |
+ } |
+ Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode()); |
+ Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode()); |
+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0)); |
+ lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1)); |
+ Src0 = NewSrc0; |
+ Src1 = NewSrc1; |
+ Ty = NewTy; |
+ } |
+ |
+ // SSE2 only has signed comparison operations. Transform unsigned |
+ // inputs in a manner that allows for the use of signed comparison |
+ // operations by flipping the high order bits. |
+ if (Inst->isUnsigned()) { |
Jim Stichnoth
2014/07/23 17:29:28
I have a minor problem and a major problem with is
wala
2014/07/23 20:40:36
Done.
Removed the unsigned attribute.
|
+ Variable *T0 = makeReg(Ty); |
+ Variable *T1 = makeReg(Ty); |
+ Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); |
+ _movp(T0, Src0); |
+ _pxor(T0, HighOrderBits); |
+ _movp(T1, Src1); |
+ _pxor(T1, HighOrderBits); |
+ Src0 = T0; |
+ Src1 = T1; |
+ } |
+ |
+ // TODO: ALIGNHACK: Both operands to compare instructions need to be |
+ // in registers until stack alignment support is implemented. Once |
+ // there is support for stack alignment, LEGAL_HACK can be removed. |
+#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
+ Variable *T = makeReg(Ty); |
+ switch (Inst->getCondition()) { |
+ default: |
+ llvm_unreachable("unexpected condition"); |
+ break; |
+ case InstIcmp::Eq: { |
+ _movp(T, Src0); |
+ _pcmpeq(T, LEGAL_HACK(Src1)); |
+ } break; |
+ case InstIcmp::Ne: { |
+ _movp(T, Src0); |
+ _pcmpeq(T, LEGAL_HACK(Src1)); |
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
+ _pxor(T, MinusOne); |
+ } break; |
+ case InstIcmp::Ugt: |
+ case InstIcmp::Sgt: { |
+ _movp(T, Src0); |
+ _pcmpgt(T, LEGAL_HACK(Src1)); |
+ } break; |
+ case InstIcmp::Uge: |
+ case InstIcmp::Sge: { |
+ // !(Src1 > Src0) |
+ _movp(T, Src1); |
+ _pcmpgt(T, LEGAL_HACK(Src0)); |
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
+ _pxor(T, MinusOne); |
+ } break; |
+ case InstIcmp::Ult: |
+ case InstIcmp::Slt: { |
+ _movp(T, Src1); |
+ _pcmpgt(T, LEGAL_HACK(Src0)); |
+ } break; |
+ case InstIcmp::Ule: |
+ case InstIcmp::Sle: { |
+ // !(Src0 > Src1) |
+ _movp(T, Src0); |
+ _pcmpgt(T, LEGAL_HACK(Src1)); |
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
+ _pxor(T, MinusOne); |
+ } break; |
+ } |
+#undef LEGAL_HACK |
+ |
+ _movp(Dest, T); |
+ |
+ // The following pattern occurs often in lowered C and C++ code: |
+ // |
+ // %cmp = icmp pred <n x ty> %src0, %src1 |
+ // %cmp.ext = sext <n x i1> %cmp to <n x ty> |
+ // |
+ // We can avoid the sext operation by copying the result from pcmpgt |
+ // and pcmpeq, which is already sign extended, to the result of the |
+ // sext operation |
+ if (InstCast *NextCast = |
+ llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
+ if (NextCast->getCastKind() == InstCast::Sext && |
+ NextCast->getSrc(0) == Dest) { |
+ _movp(NextCast->getDest(), T); |
+ // Skip over the instruction. |
+ NextCast->setDeleted(); |
+ Context.advanceNext(); |
+ } |
+ } |
jvoung (off chromium)
2014/07/23 18:54:55
could this just return;
and then the scalar versi
wala
2014/07/23 20:40:36
Done.
|
+ } else { |
+ // If Src1 is an immediate, or known to be a physical register, we can |
+ // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
+ // a physical register. (Actually, either Src0 or Src1 can be chosen for |
+ // the physical register, but unfortunately we have to commit to one or |
+ // the other before register allocation.) |
+ bool IsSrc1ImmOrReg = false; |
+ if (llvm::isa<Constant>(Src1)) { |
IsSrc1ImmOrReg = true; |
- } |
+ } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
+ if (Var->hasReg()) |
+ IsSrc1ImmOrReg = true; |
+ } |
+ |
+ // Try to fuse a compare immediately followed by a conditional branch. This |
+ // is possible when the compare dest and the branch source operands are the |
+ // same, and are their only uses. TODO: implement this optimization for |
+ // i64. |
+ if (InstBr *NextBr = |
+ llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { |
+ if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && |
+ Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { |
+ Operand *Src0New = |
+ legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
+ _cmp(Src0New, Src1); |
+ _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), |
+ NextBr->getTargetFalse()); |
+ // Skip over the following branch instruction. |
+ NextBr->setDeleted(); |
+ Context.advanceNext(); |
+ return; |
+ } |
+ } |
- // Try to fuse a compare immediately followed by a conditional branch. This |
- // is possible when the compare dest and the branch source operands are the |
- // same, and are their only uses. TODO: implement this optimization for i64. |
- if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { |
- if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && |
- Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { |
- Operand *Src0New = |
- legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
- _cmp(Src0New, Src1); |
- _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), |
- NextBr->getTargetFalse()); |
- // Skip over the following branch instruction. |
- NextBr->setDeleted(); |
- Context.advanceNext(); |
+ // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
+ Constant *Zero = Ctx->getConstantZero(IceType_i32); |
+ Constant *One = Ctx->getConstantInt(IceType_i32, 1); |
+ if (Src0->getType() == IceType_i64) { |
+ InstIcmp::ICond Condition = Inst->getCondition(); |
+ size_t Index = static_cast<size_t>(Condition); |
+ assert(Index < TableIcmp64Size); |
+ Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
+ Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
+ if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { |
+ InstX8632Label *Label = InstX8632Label::create(Func, this); |
+ _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One)); |
+ _cmp(loOperand(Src0), Src1LoRI); |
+ _br(InstX8632Br::Br_ne, Label); |
+ _cmp(hiOperand(Src0), Src1HiRI); |
+ _br(InstX8632Br::Br_ne, Label); |
+ Context.insert(InstFakeUse::create(Func, Dest)); |
+ _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero)); |
+ Context.insert(Label); |
+ } else { |
+ InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); |
+ InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); |
+ _mov(Dest, One); |
+ _cmp(hiOperand(Src0), Src1HiRI); |
+ _br(TableIcmp64[Index].C1, LabelTrue); |
+ _br(TableIcmp64[Index].C2, LabelFalse); |
+ _cmp(loOperand(Src0), Src1LoRI); |
+ _br(TableIcmp64[Index].C3, LabelTrue); |
+ Context.insert(LabelFalse); |
+ Context.insert(InstFakeUse::create(Func, Dest)); |
+ _mov(Dest, Zero); |
+ Context.insert(LabelTrue); |
+ } |
return; |
} |
- } |
- // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
- Constant *Zero = Ctx->getConstantZero(IceType_i32); |
- Constant *One = Ctx->getConstantInt(IceType_i32, 1); |
- if (Src0->getType() == IceType_i64) { |
- InstIcmp::ICond Condition = Inst->getCondition(); |
- size_t Index = static_cast<size_t>(Condition); |
- assert(Index < TableIcmp64Size); |
- Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
- Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
- if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { |
- InstX8632Label *Label = InstX8632Label::create(Func, this); |
- _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One)); |
- _cmp(loOperand(Src0), Src1LoRI); |
- _br(InstX8632Br::Br_ne, Label); |
- _cmp(hiOperand(Src0), Src1HiRI); |
- _br(InstX8632Br::Br_ne, Label); |
- Context.insert(InstFakeUse::create(Func, Dest)); |
- _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero)); |
- Context.insert(Label); |
- } else { |
- InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); |
- InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); |
- _mov(Dest, One); |
- _cmp(hiOperand(Src0), Src1HiRI); |
- _br(TableIcmp64[Index].C1, LabelTrue); |
- _br(TableIcmp64[Index].C2, LabelFalse); |
- _cmp(loOperand(Src0), Src1LoRI); |
- _br(TableIcmp64[Index].C3, LabelTrue); |
- Context.insert(LabelFalse); |
- Context.insert(InstFakeUse::create(Func, Dest)); |
- _mov(Dest, Zero); |
- Context.insert(LabelTrue); |
- } |
- return; |
+ // cmp b, c |
+ Operand *Src0New = |
+ legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
+ InstX8632Label *Label = InstX8632Label::create(Func, this); |
+ _cmp(Src0New, Src1); |
+ _mov(Dest, One); |
+ _br(getIcmp32Mapping(Inst->getCondition()), Label); |
+ Context.insert(InstFakeUse::create(Func, Dest)); |
+ _mov(Dest, Zero); |
+ Context.insert(Label); |
} |
- |
- // cmp b, c |
- Operand *Src0New = |
- legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
- InstX8632Label *Label = InstX8632Label::create(Func, this); |
- _cmp(Src0New, Src1); |
- _mov(Dest, One); |
- _br(getIcmp32Mapping(Inst->getCondition()), Label); |
- Context.insert(InstFakeUse::create(Func, Dest)); |
- _mov(Dest, Zero); |
- Context.insert(Label); |
} |
void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
@@ -3398,9 +3513,14 @@ void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { |
lowerCall(Call); |
} |
+// There is no support for loading or emitting vector constants, so the |
+// vector values returned from makeVectorOfZeros, makeVectorOfOnes, |
+// etc. are initialized with register operations. |
+// |
+// TODO(wala): Add limited support for vector constants so that |
+// complex initialization in registers is unnecessary. |
+ |
Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
- // There is no support for loading or emitting vector constants, so |
- // this value is initialized using register operations. |
Variable *Reg = makeReg(Ty, RegNum); |
// Insert a FakeDef, since otherwise the live range of Reg might |
// be overestimated. |
@@ -3409,18 +3529,41 @@ Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
return Reg; |
} |
+Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) { |
+ Variable *MinusOnes = makeReg(Ty, RegNum); |
+ // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
+ Context.insert(InstFakeDef::create(Func, MinusOnes)); |
+ _pcmpeq(MinusOnes, MinusOnes); |
+ return MinusOnes; |
+} |
+ |
Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
- // There is no support for loading or emitting vector constants, so |
- // this value is initialized using register operations. |
Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
- Variable *MinusOne = makeReg(Ty); |
- // Insert a FakeDef so the live range of MinusOne is not overestimated. |
- Context.insert(InstFakeDef::create(Func, MinusOne)); |
- _pcmpeq(MinusOne, MinusOne); |
+ Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
_psub(Dest, MinusOne); |
return Dest; |
} |
+Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { |
+ assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || |
+ Ty == IceType_v16i8); |
+ if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { |
+ Variable *Reg = makeVectorOfOnes(Ty, RegNum); |
+ SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; |
+ _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift)); |
+ return Reg; |
+ } else { |
+ // SSE has no left shift operation for vectors of 8 bit integers. |
+ const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
+ Constant *ConstantMask = |
+ Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK); |
+ Variable *Reg = makeReg(Ty, RegNum); |
+ _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
+ _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
+ return Reg; |
+ } |
+} |
+ |
OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, |
Variable *Slot, |
uint32_t Offset) { |