Chromium Code Reviews| Index: src/IceTargetLoweringX8632.cpp |
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
| index 3808ecbb85cabb7ed570f2c82a6d8cabf93d07a6..025e16b4e8ced7253e1531281d48bea6a9cb7d1b 100644 |
| --- a/src/IceTargetLoweringX8632.cpp |
| +++ b/src/IceTargetLoweringX8632.cpp |
| @@ -172,7 +172,7 @@ void xMacroIntegrityCheck() { |
| _num |
| }; |
| // Define a set of constants based on high-level table entries. |
| -#define X(tag, str) static const int _table1_##tag = InstIcmp::tag; |
| +#define X(tag, str, isunsigned) static const int _table1_##tag = InstIcmp::tag; |
| ICEINSTICMP_TABLE; |
| #undef X |
| // Define a set of constants based on low-level table entries, |
| @@ -184,7 +184,7 @@ void xMacroIntegrityCheck() { |
| #undef X |
| // Repeat the static asserts with respect to the high-level |
| // table entries in case the high-level table has extra entries. |
| -#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); |
| +#define X(tag, str, isunsigned) STATIC_ASSERT(_table1_##tag == _table2_##tag); |
| ICEINSTICMP_TABLE; |
| #undef X |
| } |
| @@ -2261,83 +2261,198 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
| Operand *Src1 = legalize(Inst->getSrc(1)); |
| Variable *Dest = Inst->getDest(); |
| - // If Src1 is an immediate, or known to be a physical register, we can |
| - // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
| - // a physical register. (Actually, either Src0 or Src1 can be chosen for |
| - // the physical register, but unfortunately we have to commit to one or |
| - // the other before register allocation.) |
| - bool IsSrc1ImmOrReg = false; |
| - if (llvm::isa<Constant>(Src1)) { |
| - IsSrc1ImmOrReg = true; |
| - } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
| - if (Var->hasReg()) |
| + if (isVectorType(Dest->getType())) { |
| + Type Ty = Src0->getType(); |
| + // Promote i1 vectors to 128 bit integer vector types. |
| + if (typeElementType(Ty) == IceType_i1) { |
| + Type NewTy = IceType_NUM; |
| + switch (Ty) { |
| + default: |
| + llvm_unreachable("unexpected type"); |
| + break; |
| + case IceType_v4i1: |
| + NewTy = IceType_v4i32; |
| + break; |
| + case IceType_v8i1: |
| + NewTy = IceType_v8i16; |
| + break; |
| + case IceType_v16i1: |
| + NewTy = IceType_v16i8; |
| + break; |
| + } |
| + Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode()); |
| + Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode()); |
| + lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0)); |
| + lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1)); |
| + Src0 = NewSrc0; |
| + Src1 = NewSrc1; |
| + Ty = NewTy; |
| + } |
| + |
| + // SSE2 only has signed comparison operations. Transform unsigned |
| + // inputs in a manner that allows for the use of signed comparison |
| + // operations by flipping the high order bits. |
| + if (Inst->isUnsigned()) { |
|
Jim Stichnoth
2014/07/23 17:29:28
I have a minor problem and a major problem with is
wala
2014/07/23 20:40:36
Done.
Removed the unsigned attribute.
|
| + Variable *T0 = makeReg(Ty); |
| + Variable *T1 = makeReg(Ty); |
| + Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); |
| + _movp(T0, Src0); |
| + _pxor(T0, HighOrderBits); |
| + _movp(T1, Src1); |
| + _pxor(T1, HighOrderBits); |
| + Src0 = T0; |
| + Src1 = T1; |
| + } |
| + |
| + // TODO: ALIGNHACK: Both operands to compare instructions need to be |
| + // in registers until stack alignment support is implemented. Once |
| + // there is support for stack alignment, LEGAL_HACK can be removed. |
| +#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
| + Variable *T = makeReg(Ty); |
| + switch (Inst->getCondition()) { |
| + default: |
| + llvm_unreachable("unexpected condition"); |
| + break; |
| + case InstIcmp::Eq: { |
| + _movp(T, Src0); |
| + _pcmpeq(T, LEGAL_HACK(Src1)); |
| + } break; |
| + case InstIcmp::Ne: { |
| + _movp(T, Src0); |
| + _pcmpeq(T, LEGAL_HACK(Src1)); |
| + Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| + _pxor(T, MinusOne); |
| + } break; |
| + case InstIcmp::Ugt: |
| + case InstIcmp::Sgt: { |
| + _movp(T, Src0); |
| + _pcmpgt(T, LEGAL_HACK(Src1)); |
| + } break; |
| + case InstIcmp::Uge: |
| + case InstIcmp::Sge: { |
| + // !(Src1 > Src0) |
| + _movp(T, Src1); |
| + _pcmpgt(T, LEGAL_HACK(Src0)); |
| + Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| + _pxor(T, MinusOne); |
| + } break; |
| + case InstIcmp::Ult: |
| + case InstIcmp::Slt: { |
| + _movp(T, Src1); |
| + _pcmpgt(T, LEGAL_HACK(Src0)); |
| + } break; |
| + case InstIcmp::Ule: |
| + case InstIcmp::Sle: { |
| + // !(Src0 > Src1) |
| + _movp(T, Src0); |
| + _pcmpgt(T, LEGAL_HACK(Src1)); |
| + Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| + _pxor(T, MinusOne); |
| + } break; |
| + } |
| +#undef LEGAL_HACK |
| + |
| + _movp(Dest, T); |
| + |
| + // The following pattern occurs often in lowered C and C++ code: |
| + // |
| + // %cmp = icmp pred <n x ty> %src0, %src1 |
| + // %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| + // |
| + // We can avoid the sext operation by copying the result from pcmpgt |
| + // and pcmpeq, which is already sign extended, to the result of the |
| + // sext operation |
| + if (InstCast *NextCast = |
| + llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
| + if (NextCast->getCastKind() == InstCast::Sext && |
| + NextCast->getSrc(0) == Dest) { |
| + _movp(NextCast->getDest(), T); |
| + // Skip over the instruction. |
| + NextCast->setDeleted(); |
| + Context.advanceNext(); |
| + } |
| + } |
|
jvoung (off chromium)
2014/07/23 18:54:55
could this just return;
and then the scalar versi
wala
2014/07/23 20:40:36
Done.
|
| + } else { |
| + // If Src1 is an immediate, or known to be a physical register, we can |
| + // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
| + // a physical register. (Actually, either Src0 or Src1 can be chosen for |
| + // the physical register, but unfortunately we have to commit to one or |
| + // the other before register allocation.) |
| + bool IsSrc1ImmOrReg = false; |
| + if (llvm::isa<Constant>(Src1)) { |
| IsSrc1ImmOrReg = true; |
| - } |
| + } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
| + if (Var->hasReg()) |
| + IsSrc1ImmOrReg = true; |
| + } |
| + |
| + // Try to fuse a compare immediately followed by a conditional branch. This |
| + // is possible when the compare dest and the branch source operands are the |
| + // same, and are their only uses. TODO: implement this optimization for |
| + // i64. |
| + if (InstBr *NextBr = |
| + llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { |
| + if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && |
| + Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { |
| + Operand *Src0New = |
| + legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
| + _cmp(Src0New, Src1); |
| + _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), |
| + NextBr->getTargetFalse()); |
| + // Skip over the following branch instruction. |
| + NextBr->setDeleted(); |
| + Context.advanceNext(); |
| + return; |
| + } |
| + } |
| - // Try to fuse a compare immediately followed by a conditional branch. This |
| - // is possible when the compare dest and the branch source operands are the |
| - // same, and are their only uses. TODO: implement this optimization for i64. |
| - if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { |
| - if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && |
| - Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { |
| - Operand *Src0New = |
| - legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
| - _cmp(Src0New, Src1); |
| - _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), |
| - NextBr->getTargetFalse()); |
| - // Skip over the following branch instruction. |
| - NextBr->setDeleted(); |
| - Context.advanceNext(); |
| + // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
| + Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| + Constant *One = Ctx->getConstantInt(IceType_i32, 1); |
| + if (Src0->getType() == IceType_i64) { |
| + InstIcmp::ICond Condition = Inst->getCondition(); |
| + size_t Index = static_cast<size_t>(Condition); |
| + assert(Index < TableIcmp64Size); |
| + Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
| + Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
| + if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { |
| + InstX8632Label *Label = InstX8632Label::create(Func, this); |
| + _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One)); |
| + _cmp(loOperand(Src0), Src1LoRI); |
| + _br(InstX8632Br::Br_ne, Label); |
| + _cmp(hiOperand(Src0), Src1HiRI); |
| + _br(InstX8632Br::Br_ne, Label); |
| + Context.insert(InstFakeUse::create(Func, Dest)); |
| + _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero)); |
| + Context.insert(Label); |
| + } else { |
| + InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); |
| + InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); |
| + _mov(Dest, One); |
| + _cmp(hiOperand(Src0), Src1HiRI); |
| + _br(TableIcmp64[Index].C1, LabelTrue); |
| + _br(TableIcmp64[Index].C2, LabelFalse); |
| + _cmp(loOperand(Src0), Src1LoRI); |
| + _br(TableIcmp64[Index].C3, LabelTrue); |
| + Context.insert(LabelFalse); |
| + Context.insert(InstFakeUse::create(Func, Dest)); |
| + _mov(Dest, Zero); |
| + Context.insert(LabelTrue); |
| + } |
| return; |
| } |
| - } |
| - // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
| - Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| - Constant *One = Ctx->getConstantInt(IceType_i32, 1); |
| - if (Src0->getType() == IceType_i64) { |
| - InstIcmp::ICond Condition = Inst->getCondition(); |
| - size_t Index = static_cast<size_t>(Condition); |
| - assert(Index < TableIcmp64Size); |
| - Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
| - Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
| - if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { |
| - InstX8632Label *Label = InstX8632Label::create(Func, this); |
| - _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One)); |
| - _cmp(loOperand(Src0), Src1LoRI); |
| - _br(InstX8632Br::Br_ne, Label); |
| - _cmp(hiOperand(Src0), Src1HiRI); |
| - _br(InstX8632Br::Br_ne, Label); |
| - Context.insert(InstFakeUse::create(Func, Dest)); |
| - _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero)); |
| - Context.insert(Label); |
| - } else { |
| - InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); |
| - InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); |
| - _mov(Dest, One); |
| - _cmp(hiOperand(Src0), Src1HiRI); |
| - _br(TableIcmp64[Index].C1, LabelTrue); |
| - _br(TableIcmp64[Index].C2, LabelFalse); |
| - _cmp(loOperand(Src0), Src1LoRI); |
| - _br(TableIcmp64[Index].C3, LabelTrue); |
| - Context.insert(LabelFalse); |
| - Context.insert(InstFakeUse::create(Func, Dest)); |
| - _mov(Dest, Zero); |
| - Context.insert(LabelTrue); |
| - } |
| - return; |
| + // cmp b, c |
| + Operand *Src0New = |
| + legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
| + InstX8632Label *Label = InstX8632Label::create(Func, this); |
| + _cmp(Src0New, Src1); |
| + _mov(Dest, One); |
| + _br(getIcmp32Mapping(Inst->getCondition()), Label); |
| + Context.insert(InstFakeUse::create(Func, Dest)); |
| + _mov(Dest, Zero); |
| + Context.insert(Label); |
| } |
| - |
| - // cmp b, c |
| - Operand *Src0New = |
| - legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
| - InstX8632Label *Label = InstX8632Label::create(Func, this); |
| - _cmp(Src0New, Src1); |
| - _mov(Dest, One); |
| - _br(getIcmp32Mapping(Inst->getCondition()), Label); |
| - Context.insert(InstFakeUse::create(Func, Dest)); |
| - _mov(Dest, Zero); |
| - Context.insert(Label); |
| } |
| void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
| @@ -3398,9 +3513,14 @@ void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { |
| lowerCall(Call); |
| } |
| +// There is no support for loading or emitting vector constants, so the |
| +// vector values returned from makeVectorOfZeros, makeVectorOfOnes, |
| +// etc. are initialized with register operations. |
| +// |
| +// TODO(wala): Add limited support for vector constants so that |
| +// complex initialization in registers is unnecessary. |
| + |
| Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
| - // There is no support for loading or emitting vector constants, so |
| - // this value is initialized using register operations. |
| Variable *Reg = makeReg(Ty, RegNum); |
| // Insert a FakeDef, since otherwise the live range of Reg might |
| // be overestimated. |
| @@ -3409,18 +3529,41 @@ Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
| return Reg; |
| } |
| +Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) { |
| + Variable *MinusOnes = makeReg(Ty, RegNum); |
| + // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
| + Context.insert(InstFakeDef::create(Func, MinusOnes)); |
| + _pcmpeq(MinusOnes, MinusOnes); |
| + return MinusOnes; |
| +} |
| + |
| Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
| - // There is no support for loading or emitting vector constants, so |
| - // this value is initialized using register operations. |
| Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
| - Variable *MinusOne = makeReg(Ty); |
| - // Insert a FakeDef so the live range of MinusOne is not overestimated. |
| - Context.insert(InstFakeDef::create(Func, MinusOne)); |
| - _pcmpeq(MinusOne, MinusOne); |
| + Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| _psub(Dest, MinusOne); |
| return Dest; |
| } |
| +Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { |
| + assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || |
| + Ty == IceType_v16i8); |
| + if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { |
| + Variable *Reg = makeVectorOfOnes(Ty, RegNum); |
| + SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; |
| + _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift)); |
| + return Reg; |
| + } else { |
| + // SSE has no left shift operation for vectors of 8 bit integers. |
| + const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
| + Constant *ConstantMask = |
| + Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK); |
| + Variable *Reg = makeReg(Ty, RegNum); |
| + _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
| + _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
| + return Reg; |
| + } |
| +} |
| + |
| OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, |
| Variable *Slot, |
| uint32_t Offset) { |