| Index: src/IceTargetLoweringX8632.cpp
|
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
|
| index 71b4c17304486264ecc21d315e8b63f3c3797d5a..32a41247aeb91ff29cd13c6fa9fa2086b66050f3 100644
|
| --- a/src/IceTargetLoweringX8632.cpp
|
| +++ b/src/IceTargetLoweringX8632.cpp
|
| @@ -27,27 +27,48 @@ namespace Ice {
|
|
|
| namespace {
|
|
|
| -// The following table summarizes the logic for lowering the fcmp instruction.
|
| +// The following table summarizes the logic for lowering the fcmp
|
| +// instruction when the operands are floating point scalar values.
|
| // There is one table entry for each of the 16 conditions. A comment in
|
| -// lowerFcmp() describes the lowering template. In the most general case, there
|
| -// is a compare followed by two conditional branches, because some fcmp
|
| -// conditions don't map to a single x86 conditional branch. However, in many
|
| -// cases it is possible to swap the operands in the comparison and have a single
|
| -// conditional branch. Since it's quite tedious to validate the table by hand,
|
| -// good execution tests are helpful.
|
| -
|
| -const struct TableFcmp_ {
|
| +// lowerFcmp() describes the lowering template. In the most general
|
| +// case, there is a compare followed by two conditional branches,
|
| +// because some fcmp conditions don't map to a single x86 conditional
|
| +// branch. However, in many cases it is possible to swap the operands
|
| +// in the comparison and have a single conditional branch. Since it's
|
| +// quite tedious to validate the table by hand, good execution tests are
|
| +// helpful.
|
| +
|
| +const struct TableScalarFcmp_ {
|
| uint32_t Default;
|
| bool SwapOperands;
|
| InstX8632::BrCond C1, C2;
|
| -} TableFcmp[] = {
|
| +} TableScalarFcmp[] = {
|
| #define X(val, dflt, swap, C1, C2) \
|
| { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \
|
| ,
|
| - FCMPX8632_TABLE
|
| + SCALAR_FCMPX8632_TABLE
|
| #undef X
|
| };
|
| -const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
|
| +const size_t TableScalarFcmpSize = llvm::array_lengthof(TableScalarFcmp);
|
| +
|
| +// The following table summarizes the logic for lowering the fcmp
|
| +// instruction when the operands are vectors of floating point values.
|
| +// For most fcmp conditions, there is a clear mapping to a single x86
|
| +// cmpps instruction variant. Some fcmp conditions require special code
|
| +// to handle and these are marked in the table with a Cmpps_Invalid
|
| +// predicate.
|
| +
|
| +const struct TableVectorFcmp_ {
|
| + bool SwapOperands;
|
| + InstX8632Cmpps::CmppsCond Predicate;
|
| +} TableVectorFcmp[] = {
|
| +#define X(val, swap, pred) \
|
| + { swap, InstX8632Cmpps::pred } \
|
| + ,
|
| + VECTOR_FCMPX8632_TABLE
|
| +#undef X
|
| + };
|
| +const size_t TableVectorFcmpSize = llvm::array_lengthof(TableVectorFcmp);
|
|
|
| // The following table summarizes the logic for lowering the icmp instruction
|
| // for i32 and narrower types. Each icmp condition has a clear mapping to an
|
| @@ -133,13 +154,13 @@ IceString typeIdentString(const Type Ty) {
|
| // are added or deleted. This dummy function uses static_assert to
|
| // ensure everything is kept in sync.
|
| void xMacroIntegrityCheck() {
|
| - // Validate the enum values in FCMPX8632_TABLE.
|
| + // Validate the enum values in SCALAR_FCMPX8632_TABLE.
|
| {
|
| // Define a temporary set of enum values based on low-level
|
| // table entries.
|
| enum _tmp_enum {
|
| #define X(val, dflt, swap, C1, C2) _tmp_##val,
|
| - FCMPX8632_TABLE
|
| + SCALAR_FCMPX8632_TABLE
|
| #undef X
|
| _num
|
| };
|
| @@ -152,7 +173,35 @@ void xMacroIntegrityCheck() {
|
| #define X(val, dflt, swap, C1, C2) \
|
| static const int _table2_##val = _tmp_##val; \
|
| STATIC_ASSERT(_table1_##val == _table2_##val);
|
| - FCMPX8632_TABLE;
|
| + SCALAR_FCMPX8632_TABLE;
|
| +#undef X
|
| +// Repeat the static asserts with respect to the high-level
|
| +// table entries in case the high-level table has extra entries.
|
| +#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
|
| + ICEINSTFCMP_TABLE;
|
| +#undef X
|
| + }
|
| +
|
| + // Validate the enum values in VECTOR_FCMPX8632_TABLE.
|
| + {
|
| + // Define a temporary set of enum values based on low-level
|
| + // table entries.
|
| + enum _tmp_enum {
|
| +#define X(val, swap, pred) _tmp_##val,
|
| + VECTOR_FCMPX8632_TABLE
|
| +#undef X
|
| + _num
|
| + };
|
| +// Define a set of constants based on high-level table entries.
|
| +#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
|
| + ICEINSTFCMP_TABLE;
|
| +#undef X
|
| +// Define a set of constants based on low-level table entries,
|
| +// and ensure the table entry keys are consistent.
|
| +#define X(val, swap, pred) \
|
| + static const int _table2_##val = _tmp_##val; \
|
| + STATIC_ASSERT(_table1_##val == _table2_##val);
|
| + VECTOR_FCMPX8632_TABLE;
|
| #undef X
|
| // Repeat the static asserts with respect to the high-level
|
| // table entries in case the high-level table has extra entries.
|
| @@ -2213,6 +2262,68 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
|
| Operand *Src0 = Inst->getSrc(0);
|
| Operand *Src1 = Inst->getSrc(1);
|
| Variable *Dest = Inst->getDest();
|
| +
|
| + if (isVectorType(Dest->getType())) {
|
| + InstFcmp::FCond Condition = Inst->getCondition();
|
| + size_t Index = static_cast<size_t>(Condition);
|
| + assert(Index < TableVectorFcmpSize);
|
| +
|
| + if (TableVectorFcmp[Index].SwapOperands) {
|
| + Operand *T = Src0;
|
| + Src0 = Src1;
|
| + Src1 = T;
|
| + }
|
| +
|
| + Variable *T = NULL;
|
| +
|
| + // ALIGNHACK: Without support for stack alignment, both operands to
|
| + // cmpps need to be forced into registers. Once support for stack
|
| + // alignment is implemented, remove LEGAL_HACK.
|
| +#define LEGAL_HACK(Vect) legalizeToVar((Vect))
|
| + switch (Condition) {
|
| + default: {
|
| + InstX8632Cmpps::CmppsCond Predicate = TableVectorFcmp[Index].Predicate;
|
| + assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
|
| + T = makeReg(Src0->getType());
|
| + _movp(T, Src0);
|
| + _cmpps(T, LEGAL_HACK(Src1), Predicate);
|
| + } break;
|
| + case InstFcmp::False:
|
| + T = makeVectorOfZeros(Src0->getType());
|
| + break;
|
| + case InstFcmp::One: {
|
| + // Check both unequal and ordered.
|
| + T = makeReg(Src0->getType());
|
| + Variable *T2 = makeReg(Src0->getType());
|
| + Src1 = LEGAL_HACK(Src1);
|
| + _movp(T, Src0);
|
| + _cmpps(T, Src1, InstX8632Cmpps::Cmpps_neq);
|
| + _movp(T2, Src0);
|
| + _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_ord);
|
| + _pand(T, T2);
|
| + } break;
|
| + case InstFcmp::Ueq: {
|
| + // Check both equal or unordered.
|
| + T = makeReg(Src0->getType());
|
| + Variable *T2 = makeReg(Src0->getType());
|
| + Src1 = LEGAL_HACK(Src1);
|
| + _movp(T, Src0);
|
| + _cmpps(T, Src1, InstX8632Cmpps::Cmpps_eq);
|
| + _movp(T2, Src0);
|
| + _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_unord);
|
| + _por(T, T2);
|
| + } break;
|
| + case InstFcmp::True:
|
| + T = makeVectorOfMinusOnes(IceType_v4i32);
|
| + break;
|
| + }
|
| +#undef LEGAL_HACK
|
| +
|
| + _movp(Dest, T);
|
| + eliminateNextVectorSextInstruction(Dest);
|
| + return;
|
| + }
|
| +
|
| // Lowering a = fcmp cond, b, c
|
| // ucomiss b, c /* only if C1 != Br_None */
|
| // /* but swap b,c order if SwapOperands==true */
|
| @@ -2224,14 +2335,14 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
|
| // label: /* only if C1 != Br_None */
|
| InstFcmp::FCond Condition = Inst->getCondition();
|
| size_t Index = static_cast<size_t>(Condition);
|
| - assert(Index < TableFcmpSize);
|
| - if (TableFcmp[Index].SwapOperands) {
|
| + assert(Index < TableScalarFcmpSize);
|
| + if (TableScalarFcmp[Index].SwapOperands) {
|
| Operand *Tmp = Src0;
|
| Src0 = Src1;
|
| Src1 = Tmp;
|
| }
|
| - bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
|
| - bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
|
| + bool HasC1 = (TableScalarFcmp[Index].C1 != InstX8632Br::Br_None);
|
| + bool HasC2 = (TableScalarFcmp[Index].C2 != InstX8632Br::Br_None);
|
| if (HasC1) {
|
| Src0 = legalize(Src0);
|
| Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
|
| @@ -2240,17 +2351,17 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
|
| _ucomiss(T, Src1RM);
|
| }
|
| Constant *Default =
|
| - Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);
|
| + Ctx->getConstantInt(IceType_i32, TableScalarFcmp[Index].Default);
|
| _mov(Dest, Default);
|
| if (HasC1) {
|
| InstX8632Label *Label = InstX8632Label::create(Func, this);
|
| - _br(TableFcmp[Index].C1, Label);
|
| + _br(TableScalarFcmp[Index].C1, Label);
|
| if (HasC2) {
|
| - _br(TableFcmp[Index].C2, Label);
|
| + _br(TableScalarFcmp[Index].C2, Label);
|
| }
|
| Context.insert(InstFakeUse::create(Func, Dest));
|
| Constant *NonDefault =
|
| - Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);
|
| + Ctx->getConstantInt(IceType_i32, !TableScalarFcmp[Index].Default);
|
| _mov(Dest, NonDefault);
|
| Context.insert(Label);
|
| }
|
| @@ -2356,26 +2467,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
|
| #undef LEGAL_HACK
|
|
|
| _movp(Dest, T);
|
| -
|
| - // The following pattern occurs often in lowered C and C++ code:
|
| - //
|
| - // %cmp = icmp pred <n x ty> %src0, %src1
|
| - // %cmp.ext = sext <n x i1> %cmp to <n x ty>
|
| - //
|
| - // We can avoid the sext operation by copying the result from pcmpgt
|
| - // and pcmpeq, which is already sign extended, to the result of the
|
| - // sext operation
|
| - if (InstCast *NextCast =
|
| - llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
|
| - if (NextCast->getCastKind() == InstCast::Sext &&
|
| - NextCast->getSrc(0) == Dest) {
|
| - _movp(NextCast->getDest(), T);
|
| - // Skip over the instruction.
|
| - NextCast->setDeleted();
|
| - Context.advanceNext();
|
| - }
|
| - }
|
| -
|
| + eliminateNextVectorSextInstruction(Dest);
|
| return;
|
| }
|
|
|
| @@ -3509,6 +3601,28 @@ void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
|
| _br(Inst->getLabelDefault());
|
| }
|
|
|
| +// The following pattern occurs often in lowered C and C++ code:
|
| +//
|
| +// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
|
| +// %cmp.ext = sext <n x i1> %cmp to <n x ty>
|
| +//
|
| +// We can eliminate the sext operation by copying the result of pcmpeqd,
|
| +// pcmpgtd, or cmpps (which produce sign extended results) the result of
|
| +// the sext operation.
|
| +void
|
| +TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {
|
| + if (InstCast *NextCast =
|
| + llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
|
| + if (NextCast->getCastKind() == InstCast::Sext &&
|
| + NextCast->getSrc(0) == SignExtendedResult) {
|
| + _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
|
| + // Skip over the instruction.
|
| + NextCast->setDeleted();
|
| + Context.advanceNext();
|
| + }
|
| + }
|
| +}
|
| +
|
| void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
|
| const SizeT MaxSrcs = 0;
|
| Variable *Dest = NULL;
|
|
|