Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(622)

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 413053002: Lower the fcmp instruction for <4 x float> operands. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Replace uses of std::endl with newlines. Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/IceTargetLoweringX8632.cpp
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 71b4c17304486264ecc21d315e8b63f3c3797d5a..32a41247aeb91ff29cd13c6fa9fa2086b66050f3 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -27,27 +27,48 @@ namespace Ice {
namespace {
-// The following table summarizes the logic for lowering the fcmp instruction.
+// The following table summarizes the logic for lowering the fcmp
+// instruction when the operands are floating point scalar values.
// There is one table entry for each of the 16 conditions. A comment in
-// lowerFcmp() describes the lowering template. In the most general case, there
-// is a compare followed by two conditional branches, because some fcmp
-// conditions don't map to a single x86 conditional branch. However, in many
-// cases it is possible to swap the operands in the comparison and have a single
-// conditional branch. Since it's quite tedious to validate the table by hand,
-// good execution tests are helpful.
-
-const struct TableFcmp_ {
+// lowerFcmp() describes the lowering template. In the most general
+// case, there is a compare followed by two conditional branches,
+// because some fcmp conditions don't map to a single x86 conditional
+// branch. However, in many cases it is possible to swap the operands
+// in the comparison and have a single conditional branch. Since it's
+// quite tedious to validate the table by hand, good execution tests are
+// helpful.
+
+const struct TableScalarFcmp_ {
uint32_t Default;
bool SwapOperands;
InstX8632::BrCond C1, C2;
-} TableFcmp[] = {
+} TableScalarFcmp[] = {
#define X(val, dflt, swap, C1, C2) \
{ dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \
,
- FCMPX8632_TABLE
+ SCALAR_FCMPX8632_TABLE
#undef X
};
-const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
+const size_t TableScalarFcmpSize = llvm::array_lengthof(TableScalarFcmp);
+
+// The following table summarizes the logic for lowering the fcmp
+// instruction when the operands are vectors of floating point values.
+// For most fcmp conditions, there is a clear mapping to a single x86
+// cmpps instruction variant. Some fcmp conditions require special code
+// to handle and these are marked in the table with a Cmpps_Invalid
+// predicate.
+
+const struct TableVectorFcmp_ {
+ bool SwapOperands;
+ InstX8632Cmpps::CmppsCond Predicate;
+} TableVectorFcmp[] = {
+#define X(val, swap, pred) \
+ { swap, InstX8632Cmpps::pred } \
+ ,
+ VECTOR_FCMPX8632_TABLE
+#undef X
+ };
+const size_t TableVectorFcmpSize = llvm::array_lengthof(TableVectorFcmp);
// The following table summarizes the logic for lowering the icmp instruction
// for i32 and narrower types. Each icmp condition has a clear mapping to an
@@ -133,13 +154,13 @@ IceString typeIdentString(const Type Ty) {
// are added or deleted. This dummy function uses static_assert to
// ensure everything is kept in sync.
void xMacroIntegrityCheck() {
- // Validate the enum values in FCMPX8632_TABLE.
+ // Validate the enum values in SCALAR_FCMPX8632_TABLE.
{
// Define a temporary set of enum values based on low-level
// table entries.
enum _tmp_enum {
#define X(val, dflt, swap, C1, C2) _tmp_##val,
- FCMPX8632_TABLE
+ SCALAR_FCMPX8632_TABLE
#undef X
_num
};
@@ -152,7 +173,35 @@ void xMacroIntegrityCheck() {
#define X(val, dflt, swap, C1, C2) \
static const int _table2_##val = _tmp_##val; \
STATIC_ASSERT(_table1_##val == _table2_##val);
- FCMPX8632_TABLE;
+ SCALAR_FCMPX8632_TABLE;
+#undef X
+// Repeat the static asserts with respect to the high-level
+// table entries in case the high-level table has extra entries.
+#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
+ ICEINSTFCMP_TABLE;
+#undef X
+ }
+
+ // Validate the enum values in VECTOR_FCMPX8632_TABLE.
+ {
+ // Define a temporary set of enum values based on low-level
+ // table entries.
+ enum _tmp_enum {
+#define X(val, swap, pred) _tmp_##val,
+ VECTOR_FCMPX8632_TABLE
+#undef X
+ _num
+ };
+// Define a set of constants based on high-level table entries.
+#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
+ ICEINSTFCMP_TABLE;
+#undef X
+// Define a set of constants based on low-level table entries,
+// and ensure the table entry keys are consistent.
+#define X(val, swap, pred) \
+ static const int _table2_##val = _tmp_##val; \
+ STATIC_ASSERT(_table1_##val == _table2_##val);
+ VECTOR_FCMPX8632_TABLE;
#undef X
// Repeat the static asserts with respect to the high-level
// table entries in case the high-level table has extra entries.
@@ -2213,6 +2262,68 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
Operand *Src0 = Inst->getSrc(0);
Operand *Src1 = Inst->getSrc(1);
Variable *Dest = Inst->getDest();
+
+ if (isVectorType(Dest->getType())) {
+ InstFcmp::FCond Condition = Inst->getCondition();
+ size_t Index = static_cast<size_t>(Condition);
+ assert(Index < TableVectorFcmpSize);
+
+ if (TableVectorFcmp[Index].SwapOperands) {
+ Operand *T = Src0;
+ Src0 = Src1;
+ Src1 = T;
+ }
+
+ Variable *T = NULL;
+
+ // ALIGNHACK: Without support for stack alignment, both operands to
+ // cmpps need to be forced into registers. Once support for stack
+ // alignment is implemented, remove LEGAL_HACK.
+#define LEGAL_HACK(Vect) legalizeToVar((Vect))
+ switch (Condition) {
+ default: {
+ InstX8632Cmpps::CmppsCond Predicate = TableVectorFcmp[Index].Predicate;
+ assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
+ T = makeReg(Src0->getType());
+ _movp(T, Src0);
+ _cmpps(T, LEGAL_HACK(Src1), Predicate);
+ } break;
+ case InstFcmp::False:
+ T = makeVectorOfZeros(Src0->getType());
+ break;
+ case InstFcmp::One: {
+ // Check both unequal and ordered.
+ T = makeReg(Src0->getType());
+ Variable *T2 = makeReg(Src0->getType());
+ Src1 = LEGAL_HACK(Src1);
+ _movp(T, Src0);
+ _cmpps(T, Src1, InstX8632Cmpps::Cmpps_neq);
+ _movp(T2, Src0);
+ _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_ord);
+ _pand(T, T2);
+ } break;
+ case InstFcmp::Ueq: {
+ // Check both equal or unordered.
+ T = makeReg(Src0->getType());
+ Variable *T2 = makeReg(Src0->getType());
+ Src1 = LEGAL_HACK(Src1);
+ _movp(T, Src0);
+ _cmpps(T, Src1, InstX8632Cmpps::Cmpps_eq);
+ _movp(T2, Src0);
+ _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_unord);
+ _por(T, T2);
+ } break;
+ case InstFcmp::True:
+ T = makeVectorOfMinusOnes(IceType_v4i32);
+ break;
+ }
+#undef LEGAL_HACK
+
+ _movp(Dest, T);
+ eliminateNextVectorSextInstruction(Dest);
+ return;
+ }
+
// Lowering a = fcmp cond, b, c
// ucomiss b, c /* only if C1 != Br_None */
// /* but swap b,c order if SwapOperands==true */
@@ -2224,14 +2335,14 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
// label: /* only if C1 != Br_None */
InstFcmp::FCond Condition = Inst->getCondition();
size_t Index = static_cast<size_t>(Condition);
- assert(Index < TableFcmpSize);
- if (TableFcmp[Index].SwapOperands) {
+ assert(Index < TableScalarFcmpSize);
+ if (TableScalarFcmp[Index].SwapOperands) {
Operand *Tmp = Src0;
Src0 = Src1;
Src1 = Tmp;
}
- bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
- bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
+ bool HasC1 = (TableScalarFcmp[Index].C1 != InstX8632Br::Br_None);
+ bool HasC2 = (TableScalarFcmp[Index].C2 != InstX8632Br::Br_None);
if (HasC1) {
Src0 = legalize(Src0);
Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
@@ -2240,17 +2351,17 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
_ucomiss(T, Src1RM);
}
Constant *Default =
- Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);
+ Ctx->getConstantInt(IceType_i32, TableScalarFcmp[Index].Default);
_mov(Dest, Default);
if (HasC1) {
InstX8632Label *Label = InstX8632Label::create(Func, this);
- _br(TableFcmp[Index].C1, Label);
+ _br(TableScalarFcmp[Index].C1, Label);
if (HasC2) {
- _br(TableFcmp[Index].C2, Label);
+ _br(TableScalarFcmp[Index].C2, Label);
}
Context.insert(InstFakeUse::create(Func, Dest));
Constant *NonDefault =
- Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);
+ Ctx->getConstantInt(IceType_i32, !TableScalarFcmp[Index].Default);
_mov(Dest, NonDefault);
Context.insert(Label);
}
@@ -2356,26 +2467,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
#undef LEGAL_HACK
_movp(Dest, T);
-
- // The following pattern occurs often in lowered C and C++ code:
- //
- // %cmp = icmp pred <n x ty> %src0, %src1
- // %cmp.ext = sext <n x i1> %cmp to <n x ty>
- //
- // We can avoid the sext operation by copying the result from pcmpgt
- // and pcmpeq, which is already sign extended, to the result of the
- // sext operation
- if (InstCast *NextCast =
- llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
- if (NextCast->getCastKind() == InstCast::Sext &&
- NextCast->getSrc(0) == Dest) {
- _movp(NextCast->getDest(), T);
- // Skip over the instruction.
- NextCast->setDeleted();
- Context.advanceNext();
- }
- }
-
+ eliminateNextVectorSextInstruction(Dest);
return;
}
@@ -3509,6 +3601,28 @@ void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
_br(Inst->getLabelDefault());
}
+// The following pattern occurs often in lowered C and C++ code:
+//
+// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
+// %cmp.ext = sext <n x i1> %cmp to <n x ty>
+//
+// We can eliminate the sext operation by copying the result of pcmpeqd,
+// pcmpgtd, or cmpps (which produce sign extended results) the result of
+// the sext operation.
+void
+TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {
+ if (InstCast *NextCast =
+ llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
+ if (NextCast->getCastKind() == InstCast::Sext &&
+ NextCast->getSrc(0) == SignExtendedResult) {
+ _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
+ // Skip over the instruction.
+ NextCast->setDeleted();
+ Context.advanceNext();
+ }
+ }
+}
+
void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
const SizeT MaxSrcs = 0;
Variable *Dest = NULL;

Powered by Google App Engine
This is Rietveld 408576698