Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(25)

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 413053002: Lower the fcmp instruction for <4 x float> operands. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Improve table formatting and X macro parameter names Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8632.def » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceTargetLoweringX8632.cpp
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index c8cf1703a70929d4d5d6906a91ee78f8f36af9e1..4a719d447c26eeb3e59bc6cee1d1c5971d38ee45 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -27,26 +27,38 @@ namespace Ice {
namespace {
-// The following table summarizes the logic for lowering the fcmp instruction.
-// There is one table entry for each of the 16 conditions. A comment in
-// lowerFcmp() describes the lowering template. In the most general case, there
-// is a compare followed by two conditional branches, because some fcmp
-// conditions don't map to a single x86 conditional branch. However, in many
-// cases it is possible to swap the operands in the comparison and have a single
-// conditional branch. Since it's quite tedious to validate the table by hand,
-// good execution tests are helpful.
-
+// The following table summarizes the logic for lowering the fcmp
+// instruction. There is one table entry for each of the 16 conditions.
+//
+// The first four columns describe the case when the operands are
+// floating point scalar values. A comment in lowerFcmp() describes the
+// lowering template. In the most general case, there is a compare
+// followed by two conditional branches, because some fcmp conditions
+// don't map to a single x86 conditional branch. However, in many cases
+// it is possible to swap the operands in the comparison and have a
+// single conditional branch. Since it's quite tedious to validate the
+// table by hand, good execution tests are helpful.
+//
+// The last two columns describe the case when the operands are vectors
+// of floating point values. For most fcmp conditions, there is a clear
+// mapping to a single x86 cmpps instruction variant. Some fcmp
+// conditions require special code to handle and these are marked in the
+// table with a Cmpps_Invalid predicate.
const struct TableFcmp_ {
uint32_t Default;
- bool SwapOperands;
+ bool SwapScalarOperands;
InstX8632::BrCond C1, C2;
+ bool SwapVectorOperands;
+ InstX8632Cmpps::CmppsCond Predicate;
} TableFcmp[] = {
-#define X(val, dflt, swap, C1, C2) \
- { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \
+#define X(val, dflt, swapS, C1, C2, swapV, pred) \
+ { \
+ dflt, swapS, InstX8632Br::C1, InstX8632Br::C2, swapV, InstX8632Cmpps::pred \
+ } \
,
- FCMPX8632_TABLE
+ FCMPX8632_TABLE
#undef X
- };
+};
const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
// The following table summarizes the logic for lowering the icmp instruction
@@ -138,7 +150,7 @@ void xMacroIntegrityCheck() {
// Define a temporary set of enum values based on low-level
// table entries.
enum _tmp_enum {
-#define X(val, dflt, swap, C1, C2) _tmp_##val,
+#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
FCMPX8632_TABLE
#undef X
_num
@@ -149,7 +161,7 @@ void xMacroIntegrityCheck() {
#undef X
// Define a set of constants based on low-level table entries,
// and ensure the table entry keys are consistent.
-#define X(val, dflt, swap, C1, C2) \
+#define X(val, dflt, swapS, C1, C2, swapV, pred) \
static const int _table2_##val = _tmp_##val; \
STATIC_ASSERT(_table1_##val == _table2_##val);
FCMPX8632_TABLE;
@@ -2213,6 +2225,68 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
Operand *Src0 = Inst->getSrc(0);
Operand *Src1 = Inst->getSrc(1);
Variable *Dest = Inst->getDest();
+
+ if (isVectorType(Dest->getType())) {
+ InstFcmp::FCond Condition = Inst->getCondition();
+ size_t Index = static_cast<size_t>(Condition);
+ assert(Index < TableFcmpSize);
+
+ if (TableFcmp[Index].SwapVectorOperands) {
+ Operand *T = Src0;
+ Src0 = Src1;
+ Src1 = T;
+ }
+
+ Variable *T = NULL;
+
+ // ALIGNHACK: Without support for stack alignment, both operands to
+ // cmpps need to be forced into registers. Once support for stack
+ // alignment is implemented, remove LEGAL_HACK.
+#define LEGAL_HACK(Vect) legalizeToVar((Vect))
+ switch (Condition) {
+ default: {
+ InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;
+ assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
+ T = makeReg(Src0->getType());
+ _movp(T, Src0);
+ _cmpps(T, LEGAL_HACK(Src1), Predicate);
+ } break;
+ case InstFcmp::False:
+ T = makeVectorOfZeros(Src0->getType());
+ break;
+ case InstFcmp::One: {
+ // Check both unequal and ordered.
+ T = makeReg(Src0->getType());
+ Variable *T2 = makeReg(Src0->getType());
+ Src1 = LEGAL_HACK(Src1);
+ _movp(T, Src0);
+ _cmpps(T, Src1, InstX8632Cmpps::Cmpps_neq);
+ _movp(T2, Src0);
+ _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_ord);
+ _pand(T, T2);
+ } break;
+ case InstFcmp::Ueq: {
+ // Check both equal or unordered.
+ T = makeReg(Src0->getType());
+ Variable *T2 = makeReg(Src0->getType());
+ Src1 = LEGAL_HACK(Src1);
+ _movp(T, Src0);
+ _cmpps(T, Src1, InstX8632Cmpps::Cmpps_eq);
+ _movp(T2, Src0);
+ _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_unord);
+ _por(T, T2);
+ } break;
+ case InstFcmp::True:
+ T = makeVectorOfMinusOnes(IceType_v4i32);
+ break;
+ }
+#undef LEGAL_HACK
+
+ _movp(Dest, T);
+ eliminateNextVectorSextInstruction(Dest);
+ return;
+ }
+
// Lowering a = fcmp cond, b, c
// ucomiss b, c /* only if C1 != Br_None */
// /* but swap b,c order if SwapOperands==true */
@@ -2225,7 +2299,7 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
InstFcmp::FCond Condition = Inst->getCondition();
size_t Index = static_cast<size_t>(Condition);
assert(Index < TableFcmpSize);
- if (TableFcmp[Index].SwapOperands) {
+ if (TableFcmp[Index].SwapScalarOperands) {
Operand *Tmp = Src0;
Src0 = Src1;
Src1 = Tmp;
@@ -2356,26 +2430,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
#undef LEGAL_HACK
_movp(Dest, T);
-
- // The following pattern occurs often in lowered C and C++ code:
- //
- // %cmp = icmp pred <n x ty> %src0, %src1
- // %cmp.ext = sext <n x i1> %cmp to <n x ty>
- //
- // We can avoid the sext operation by copying the result from pcmpgt
- // and pcmpeq, which is already sign extended, to the result of the
- // sext operation
- if (InstCast *NextCast =
- llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
- if (NextCast->getCastKind() == InstCast::Sext &&
- NextCast->getSrc(0) == Dest) {
- _movp(NextCast->getDest(), T);
- // Skip over the instruction.
- NextCast->setDeleted();
- Context.advanceNext();
- }
- }
-
+ eliminateNextVectorSextInstruction(Dest);
return;
}
@@ -3544,6 +3599,28 @@ void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
_br(Inst->getLabelDefault());
}
+// The following pattern occurs often in lowered C and C++ code:
+//
+// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
+// %cmp.ext = sext <n x i1> %cmp to <n x ty>
+//
+// We can eliminate the sext operation by copying the result of pcmpeqd,
+// pcmpgtd, or cmpps (which produce sign extended results) to the result
+// of the sext operation.
+void
+TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {
+ if (InstCast *NextCast =
+ llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
+ if (NextCast->getCastKind() == InstCast::Sext &&
+ NextCast->getSrc(0) == SignExtendedResult) {
+ _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
+ // Skip over the instruction.
+ NextCast->setDeleted();
+ Context.advanceNext();
+ }
+ }
+}
+
void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
const SizeT MaxSrcs = 0;
Variable *Dest = NULL;
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8632.def » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698