Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index c8cf1703a70929d4d5d6906a91ee78f8f36af9e1..f540dc95c5f1f541fef1fe56de5e97786d4d441a 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -27,26 +27,38 @@ namespace Ice { |
namespace { |
-// The following table summarizes the logic for lowering the fcmp instruction. |
-// There is one table entry for each of the 16 conditions. A comment in |
-// lowerFcmp() describes the lowering template. In the most general case, there |
-// is a compare followed by two conditional branches, because some fcmp |
-// conditions don't map to a single x86 conditional branch. However, in many |
-// cases it is possible to swap the operands in the comparison and have a single |
-// conditional branch. Since it's quite tedious to validate the table by hand, |
-// good execution tests are helpful. |
- |
+// The following table summarizes the logic for lowering the fcmp |
+// instruction. There is one table entry for each of the 16 conditions. |
+// |
+// The first four columns describe the case when the operands are |
+// floating point scalar values. A comment in lowerFcmp() describes the |
+// lowering template. In the most general case, there is a compare |
+// followed by two conditional branches, because some fcmp conditions |
+// don't map to a single x86 conditional branch. However, in many cases |
+// it is possible to swap the operands in the comparison and have a |
+// single conditional branch. Since it's quite tedious to validate the |
+// table by hand, good execution tests are helpful. |
+// |
+// The last two columns describe the case when the operands are vectors |
+// of floating point values. For most fcmp conditions, there is a clear |
+// mapping to a single x86 cmpps instruction variant. Some fcmp |
+// conditions require special code to handle and these are marked in the |
+// table with a Cmpps_Invalid predicate. |
const struct TableFcmp_ { |
uint32_t Default; |
- bool SwapOperands; |
+ bool SwapScalarOperands; |
InstX8632::BrCond C1, C2; |
+ bool SwapVectorOperands; |
+ InstX8632Cmpps::CmppsCond Predicate; |
} TableFcmp[] = { |
-#define X(val, dflt, swap, C1, C2) \ |
- { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \ |
+#define X(val, dflt, swap, C1, C2, swap2, pred) \ |
+ { \ |
+ dflt, swap, InstX8632Br::C1, InstX8632Br::C2, swap2, InstX8632Cmpps::pred \ |
+ } \ |
, |
- FCMPX8632_TABLE |
+ FCMPX8632_TABLE |
#undef X |
- }; |
+}; |
const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); |
// The following table summarizes the logic for lowering the icmp instruction |
@@ -138,7 +150,7 @@ void xMacroIntegrityCheck() { |
// Define a temporary set of enum values based on low-level |
// table entries. |
enum _tmp_enum { |
-#define X(val, dflt, swap, C1, C2) _tmp_##val, |
+#define X(val, dflt, swap, C1, C2, swap2, pred) _tmp_##val, |
FCMPX8632_TABLE |
#undef X |
_num |
@@ -149,7 +161,7 @@ void xMacroIntegrityCheck() { |
#undef X |
// Define a set of constants based on low-level table entries, |
// and ensure the table entry keys are consistent. |
-#define X(val, dflt, swap, C1, C2) \ |
+#define X(val, dflt, swap, C1, C2, swap2, pred) \ |
static const int _table2_##val = _tmp_##val; \ |
STATIC_ASSERT(_table1_##val == _table2_##val); |
FCMPX8632_TABLE; |
@@ -2213,6 +2225,68 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
Operand *Src0 = Inst->getSrc(0); |
Operand *Src1 = Inst->getSrc(1); |
Variable *Dest = Inst->getDest(); |
+ |
+ if (isVectorType(Dest->getType())) { |
+ InstFcmp::FCond Condition = Inst->getCondition(); |
+ size_t Index = static_cast<size_t>(Condition); |
+ assert(Index < TableFcmpSize); |
+ |
+ if (TableFcmp[Index].SwapVectorOperands) { |
+ Operand *T = Src0; |
+ Src0 = Src1; |
+ Src1 = T; |
+ } |
+ |
+ Variable *T = NULL; |
+ |
+ // ALIGNHACK: Without support for stack alignment, both operands to |
+ // cmpps need to be forced into registers. Once support for stack |
+ // alignment is implemented, remove LEGAL_HACK. |
+#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
+ switch (Condition) { |
+ default: { |
+ InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; |
+ assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); |
+ T = makeReg(Src0->getType()); |
+ _movp(T, Src0); |
+ _cmpps(T, LEGAL_HACK(Src1), Predicate); |
+ } break; |
+ case InstFcmp::False: |
+ T = makeVectorOfZeros(Src0->getType()); |
+ break; |
+ case InstFcmp::One: { |
+ // Check both unequal and ordered. |
+ T = makeReg(Src0->getType()); |
+ Variable *T2 = makeReg(Src0->getType()); |
+ Src1 = LEGAL_HACK(Src1); |
+ _movp(T, Src0); |
+ _cmpps(T, Src1, InstX8632Cmpps::Cmpps_neq); |
+ _movp(T2, Src0); |
+ _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_ord); |
+ _pand(T, T2); |
+ } break; |
+ case InstFcmp::Ueq: { |
+ // Check both equal or unordered. |
+ T = makeReg(Src0->getType()); |
+ Variable *T2 = makeReg(Src0->getType()); |
+ Src1 = LEGAL_HACK(Src1); |
+ _movp(T, Src0); |
+ _cmpps(T, Src1, InstX8632Cmpps::Cmpps_eq); |
+ _movp(T2, Src0); |
+ _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_unord); |
+ _por(T, T2); |
+ } break; |
+ case InstFcmp::True: |
+ T = makeVectorOfMinusOnes(IceType_v4i32); |
+ break; |
+ } |
+#undef LEGAL_HACK |
+ |
+ _movp(Dest, T); |
+ eliminateNextVectorSextInstruction(Dest); |
+ return; |
+ } |
+ |
// Lowering a = fcmp cond, b, c |
// ucomiss b, c /* only if C1 != Br_None */ |
// /* but swap b,c order if SwapOperands==true */ |
@@ -2225,7 +2299,7 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
InstFcmp::FCond Condition = Inst->getCondition(); |
size_t Index = static_cast<size_t>(Condition); |
assert(Index < TableFcmpSize); |
- if (TableFcmp[Index].SwapOperands) { |
+ if (TableFcmp[Index].SwapScalarOperands) { |
Operand *Tmp = Src0; |
Src0 = Src1; |
Src1 = Tmp; |
@@ -2356,26 +2430,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
#undef LEGAL_HACK |
_movp(Dest, T); |
- |
- // The following pattern occurs often in lowered C and C++ code: |
- // |
- // %cmp = icmp pred <n x ty> %src0, %src1 |
- // %cmp.ext = sext <n x i1> %cmp to <n x ty> |
- // |
- // We can avoid the sext operation by copying the result from pcmpgt |
- // and pcmpeq, which is already sign extended, to the result of the |
- // sext operation |
- if (InstCast *NextCast = |
- llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
- if (NextCast->getCastKind() == InstCast::Sext && |
- NextCast->getSrc(0) == Dest) { |
- _movp(NextCast->getDest(), T); |
- // Skip over the instruction. |
- NextCast->setDeleted(); |
- Context.advanceNext(); |
- } |
- } |
- |
+ eliminateNextVectorSextInstruction(Dest); |
return; |
} |
@@ -3544,6 +3599,28 @@ void TargetX8632::lowerSwitch(const InstSwitch *Inst) { |
_br(Inst->getLabelDefault()); |
} |
+// The following pattern occurs often in lowered C and C++ code: |
+// |
+// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
+// %cmp.ext = sext <n x i1> %cmp to <n x ty> |
+// |
+// We can eliminate the sext operation by copying the result of pcmpeqd, |
+// pcmpgtd, or cmpps (which produce sign extended results) to the result |
+// of the sext operation. |
+void |
+TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) { |
+ if (InstCast *NextCast = |
+ llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
+ if (NextCast->getCastKind() == InstCast::Sext && |
+ NextCast->getSrc(0) == SignExtendedResult) { |
+ _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); |
+ // Skip over the instruction. |
+ NextCast->setDeleted(); |
+ Context.advanceNext(); |
+ } |
+ } |
+} |
+ |
void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { |
const SizeT MaxSrcs = 0; |
Variable *Dest = NULL; |