Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index 71b4c17304486264ecc21d315e8b63f3c3797d5a..32a41247aeb91ff29cd13c6fa9fa2086b66050f3 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -27,27 +27,48 @@ namespace Ice { |
namespace { |
-// The following table summarizes the logic for lowering the fcmp instruction. |
+// The following table summarizes the logic for lowering the fcmp |
+// instruction when the operands are floating point scalar values. |
// There is one table entry for each of the 16 conditions. A comment in |
-// lowerFcmp() describes the lowering template. In the most general case, there |
-// is a compare followed by two conditional branches, because some fcmp |
-// conditions don't map to a single x86 conditional branch. However, in many |
-// cases it is possible to swap the operands in the comparison and have a single |
-// conditional branch. Since it's quite tedious to validate the table by hand, |
-// good execution tests are helpful. |
- |
-const struct TableFcmp_ { |
+// lowerFcmp() describes the lowering template. In the most general |
+// case, there is a compare followed by two conditional branches, |
+// because some fcmp conditions don't map to a single x86 conditional |
+// branch. However, in many cases it is possible to swap the operands |
+// in the comparison and have a single conditional branch. Since it's |
+// quite tedious to validate the table by hand, good execution tests are |
+// helpful. |
+ |
+const struct TableScalarFcmp_ { |
uint32_t Default; |
bool SwapOperands; |
InstX8632::BrCond C1, C2; |
-} TableFcmp[] = { |
+} TableScalarFcmp[] = { |
#define X(val, dflt, swap, C1, C2) \ |
{ dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \ |
, |
- FCMPX8632_TABLE |
+ SCALAR_FCMPX8632_TABLE |
#undef X |
}; |
-const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); |
+const size_t TableScalarFcmpSize = llvm::array_lengthof(TableScalarFcmp); |
+ |
+// The following table summarizes the logic for lowering the fcmp |
+// instruction when the operands are vectors of floating point values. |
+// For most fcmp conditions, there is a clear mapping to a single x86 |
+// cmpps instruction variant. Some fcmp conditions require special code |
+// to handle and these are marked in the table with a Cmpps_Invalid |
+// predicate. |
+ |
+const struct TableVectorFcmp_ { |
+ bool SwapOperands; |
+ InstX8632Cmpps::CmppsCond Predicate; |
+} TableVectorFcmp[] = { |
+#define X(val, swap, pred) \ |
+ { swap, InstX8632Cmpps::pred } \ |
+ , |
+ VECTOR_FCMPX8632_TABLE |
+#undef X |
+ }; |
+const size_t TableVectorFcmpSize = llvm::array_lengthof(TableVectorFcmp); |
// The following table summarizes the logic for lowering the icmp instruction |
// for i32 and narrower types. Each icmp condition has a clear mapping to an |
@@ -133,13 +154,13 @@ IceString typeIdentString(const Type Ty) { |
// are added or deleted. This dummy function uses static_assert to |
// ensure everything is kept in sync. |
void xMacroIntegrityCheck() { |
- // Validate the enum values in FCMPX8632_TABLE. |
+ // Validate the enum values in SCALAR_FCMPX8632_TABLE. |
{ |
// Define a temporary set of enum values based on low-level |
// table entries. |
enum _tmp_enum { |
#define X(val, dflt, swap, C1, C2) _tmp_##val, |
- FCMPX8632_TABLE |
+ SCALAR_FCMPX8632_TABLE |
#undef X |
_num |
}; |
@@ -152,7 +173,35 @@ void xMacroIntegrityCheck() { |
#define X(val, dflt, swap, C1, C2) \ |
static const int _table2_##val = _tmp_##val; \ |
STATIC_ASSERT(_table1_##val == _table2_##val); |
- FCMPX8632_TABLE; |
+ SCALAR_FCMPX8632_TABLE; |
+#undef X |
+// Repeat the static asserts with respect to the high-level |
+// table entries in case the high-level table has extra entries. |
+#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); |
+ ICEINSTFCMP_TABLE; |
+#undef X |
+ } |
+ |
+ // Validate the enum values in VECTOR_FCMPX8632_TABLE. |
+ { |
+ // Define a temporary set of enum values based on low-level |
+ // table entries. |
+ enum _tmp_enum { |
+#define X(val, swap, pred) _tmp_##val, |
+ VECTOR_FCMPX8632_TABLE |
+#undef X |
+ _num |
+ }; |
+// Define a set of constants based on high-level table entries. |
+#define X(tag, str) static const int _table1_##tag = InstFcmp::tag; |
+ ICEINSTFCMP_TABLE; |
+#undef X |
+// Define a set of constants based on low-level table entries, |
+// and ensure the table entry keys are consistent. |
+#define X(val, swap, pred) \ |
+ static const int _table2_##val = _tmp_##val; \ |
+ STATIC_ASSERT(_table1_##val == _table2_##val); |
+ VECTOR_FCMPX8632_TABLE; |
#undef X |
// Repeat the static asserts with respect to the high-level |
// table entries in case the high-level table has extra entries. |
@@ -2213,6 +2262,68 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
Operand *Src0 = Inst->getSrc(0); |
Operand *Src1 = Inst->getSrc(1); |
Variable *Dest = Inst->getDest(); |
+ |
+ if (isVectorType(Dest->getType())) { |
+ InstFcmp::FCond Condition = Inst->getCondition(); |
+ size_t Index = static_cast<size_t>(Condition); |
+ assert(Index < TableVectorFcmpSize); |
+ |
+ if (TableVectorFcmp[Index].SwapOperands) { |
+ Operand *T = Src0; |
+ Src0 = Src1; |
+ Src1 = T; |
+ } |
+ |
+ Variable *T = NULL; |
+ |
+ // ALIGNHACK: Without support for stack alignment, both operands to |
+ // cmpps need to be forced into registers. Once support for stack |
+ // alignment is implemented, remove LEGAL_HACK. |
+#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
+ switch (Condition) { |
+ default: { |
+ InstX8632Cmpps::CmppsCond Predicate = TableVectorFcmp[Index].Predicate; |
+ assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); |
+ T = makeReg(Src0->getType()); |
+ _movp(T, Src0); |
+ _cmpps(T, LEGAL_HACK(Src1), Predicate); |
+ } break; |
+ case InstFcmp::False: |
+ T = makeVectorOfZeros(Src0->getType()); |
+ break; |
+ case InstFcmp::One: { |
+ // Check both unequal and ordered. |
+ T = makeReg(Src0->getType()); |
+ Variable *T2 = makeReg(Src0->getType()); |
+ Src1 = LEGAL_HACK(Src1); |
+ _movp(T, Src0); |
+ _cmpps(T, Src1, InstX8632Cmpps::Cmpps_neq); |
+ _movp(T2, Src0); |
+ _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_ord); |
+ _pand(T, T2); |
+ } break; |
+ case InstFcmp::Ueq: { |
+ // Check both equal or unordered. |
+ T = makeReg(Src0->getType()); |
+ Variable *T2 = makeReg(Src0->getType()); |
+ Src1 = LEGAL_HACK(Src1); |
+ _movp(T, Src0); |
+ _cmpps(T, Src1, InstX8632Cmpps::Cmpps_eq); |
+ _movp(T2, Src0); |
+ _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_unord); |
+ _por(T, T2); |
+ } break; |
+ case InstFcmp::True: |
+ T = makeVectorOfMinusOnes(IceType_v4i32); |
+ break; |
+ } |
+#undef LEGAL_HACK |
+ |
+ _movp(Dest, T); |
+ eliminateNextVectorSextInstruction(Dest); |
+ return; |
+ } |
+ |
// Lowering a = fcmp cond, b, c |
// ucomiss b, c /* only if C1 != Br_None */ |
// /* but swap b,c order if SwapOperands==true */ |
@@ -2224,14 +2335,14 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
// label: /* only if C1 != Br_None */ |
InstFcmp::FCond Condition = Inst->getCondition(); |
size_t Index = static_cast<size_t>(Condition); |
- assert(Index < TableFcmpSize); |
- if (TableFcmp[Index].SwapOperands) { |
+ assert(Index < TableScalarFcmpSize); |
+ if (TableScalarFcmp[Index].SwapOperands) { |
Operand *Tmp = Src0; |
Src0 = Src1; |
Src1 = Tmp; |
} |
- bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None); |
- bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None); |
+ bool HasC1 = (TableScalarFcmp[Index].C1 != InstX8632Br::Br_None); |
+ bool HasC2 = (TableScalarFcmp[Index].C2 != InstX8632Br::Br_None); |
if (HasC1) { |
Src0 = legalize(Src0); |
Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
@@ -2240,17 +2351,17 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
_ucomiss(T, Src1RM); |
} |
Constant *Default = |
- Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default); |
+ Ctx->getConstantInt(IceType_i32, TableScalarFcmp[Index].Default); |
_mov(Dest, Default); |
if (HasC1) { |
InstX8632Label *Label = InstX8632Label::create(Func, this); |
- _br(TableFcmp[Index].C1, Label); |
+ _br(TableScalarFcmp[Index].C1, Label); |
if (HasC2) { |
- _br(TableFcmp[Index].C2, Label); |
+ _br(TableScalarFcmp[Index].C2, Label); |
} |
Context.insert(InstFakeUse::create(Func, Dest)); |
Constant *NonDefault = |
- Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default); |
+ Ctx->getConstantInt(IceType_i32, !TableScalarFcmp[Index].Default); |
_mov(Dest, NonDefault); |
Context.insert(Label); |
} |
@@ -2356,26 +2467,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
#undef LEGAL_HACK |
_movp(Dest, T); |
- |
- // The following pattern occurs often in lowered C and C++ code: |
- // |
- // %cmp = icmp pred <n x ty> %src0, %src1 |
- // %cmp.ext = sext <n x i1> %cmp to <n x ty> |
- // |
- // We can avoid the sext operation by copying the result from pcmpgt |
- // and pcmpeq, which is already sign extended, to the result of the |
- // sext operation |
- if (InstCast *NextCast = |
- llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
- if (NextCast->getCastKind() == InstCast::Sext && |
- NextCast->getSrc(0) == Dest) { |
- _movp(NextCast->getDest(), T); |
- // Skip over the instruction. |
- NextCast->setDeleted(); |
- Context.advanceNext(); |
- } |
- } |
- |
+ eliminateNextVectorSextInstruction(Dest); |
return; |
} |
@@ -3509,6 +3601,28 @@ void TargetX8632::lowerSwitch(const InstSwitch *Inst) { |
_br(Inst->getLabelDefault()); |
} |
+// The following pattern occurs often in lowered C and C++ code: |
+// |
+// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
+// %cmp.ext = sext <n x i1> %cmp to <n x ty> |
+// |
+// We can eliminate the sext operation by copying the result of pcmpeqd, |
+// pcmpgtd, or cmpps (which produce sign extended results) the result of |
+// the sext operation. |
+void |
+TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) { |
+ if (InstCast *NextCast = |
+ llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
+ if (NextCast->getCastKind() == InstCast::Sext && |
+ NextCast->getSrc(0) == SignExtendedResult) { |
+ _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); |
+ // Skip over the instruction. |
+ NextCast->setDeleted(); |
+ Context.advanceNext(); |
+ } |
+ } |
+} |
+ |
void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { |
const SizeT MaxSrcs = 0; |
Variable *Dest = NULL; |