src/IceTargetLoweringX8632.cpp - Issue 413053002: Lower the fcmp instruction for <4 x float> operands.

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 413053002: Lower the fcmp instruction for <4 x float> operands. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Replace uses of std::endl with newlines. Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringX8632.cpp

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp

index 71b4c17304486264ecc21d315e8b63f3c3797d5a..32a41247aeb91ff29cd13c6fa9fa2086b66050f3 100644

--- a/src/IceTargetLoweringX8632.cpp

+++ b/src/IceTargetLoweringX8632.cpp

@@ -27,27 +27,48 @@ namespace Ice {

namespace {

-// The following table summarizes the logic for lowering the fcmp instruction.

+// The following table summarizes the logic for lowering the fcmp

+// instruction when the operands are floating point scalar values.

// There is one table entry for each of the 16 conditions. A comment in

-// lowerFcmp() describes the lowering template. In the most general case, there

-// is a compare followed by two conditional branches, because some fcmp

-// conditions don't map to a single x86 conditional branch. However, in many

-// cases it is possible to swap the operands in the comparison and have a single

-// conditional branch. Since it's quite tedious to validate the table by hand,

-// good execution tests are helpful.

-const struct TableFcmp_ {

+// lowerFcmp() describes the lowering template. In the most general

+// case, there is a compare followed by two conditional branches,

+// because some fcmp conditions don't map to a single x86 conditional

+// branch. However, in many cases it is possible to swap the operands

+// in the comparison and have a single conditional branch. Since it's

+// quite tedious to validate the table by hand, good execution tests are

+// helpful.

+const struct TableScalarFcmp_ {

uint32_t Default;

bool SwapOperands;

InstX8632::BrCond C1, C2;

-} TableFcmp[] = {

+} TableScalarFcmp[] = {

#define X(val, dflt, swap, C1, C2) \

{ dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \

- FCMPX8632_TABLE

+ SCALAR_FCMPX8632_TABLE

#undef X

};

-const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);

+const size_t TableScalarFcmpSize = llvm::array_lengthof(TableScalarFcmp);

+// The following table summarizes the logic for lowering the fcmp

+// instruction when the operands are vectors of floating point values.

+// For most fcmp conditions, there is a clear mapping to a single x86

+// cmpps instruction variant. Some fcmp conditions require special code

+// to handle and these are marked in the table with a Cmpps_Invalid

+// predicate.

+const struct TableVectorFcmp_ {

+ bool SwapOperands;

+ InstX8632Cmpps::CmppsCond Predicate;

+} TableVectorFcmp[] = {

+#define X(val, swap, pred) \

+ { swap, InstX8632Cmpps::pred } \

+ ,

+ VECTOR_FCMPX8632_TABLE

+#undef X

+ };

+const size_t TableVectorFcmpSize = llvm::array_lengthof(TableVectorFcmp);

// The following table summarizes the logic for lowering the icmp instruction

// for i32 and narrower types. Each icmp condition has a clear mapping to an

@@ -133,13 +154,13 @@ IceString typeIdentString(const Type Ty) {

// are added or deleted. This dummy function uses static_assert to

// ensure everything is kept in sync.

void xMacroIntegrityCheck() {

- // Validate the enum values in FCMPX8632_TABLE.

+ // Validate the enum values in SCALAR_FCMPX8632_TABLE.

{

// Define a temporary set of enum values based on low-level

// table entries.

enum _tmp_enum {

#define X(val, dflt, swap, C1, C2) _tmp_##val,

- FCMPX8632_TABLE

+ SCALAR_FCMPX8632_TABLE

#undef X

_num

};

@@ -152,7 +173,35 @@ void xMacroIntegrityCheck() {

#define X(val, dflt, swap, C1, C2) \

static const int _table2_##val = _tmp_##val; \

STATIC_ASSERT(_table1_##val == _table2_##val);

- FCMPX8632_TABLE;

+ SCALAR_FCMPX8632_TABLE;

+#undef X

+// Repeat the static asserts with respect to the high-level

+// table entries in case the high-level table has extra entries.

+#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);

+ ICEINSTFCMP_TABLE;

+#undef X

+ }

+ // Validate the enum values in VECTOR_FCMPX8632_TABLE.

+ {

+ // Define a temporary set of enum values based on low-level

+ // table entries.

+ enum _tmp_enum {

+#define X(val, swap, pred) _tmp_##val,

+ VECTOR_FCMPX8632_TABLE

+#undef X

+ _num

+ };

+// Define a set of constants based on high-level table entries.

+#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;

+ ICEINSTFCMP_TABLE;

+#undef X

+// Define a set of constants based on low-level table entries,

+// and ensure the table entry keys are consistent.

+#define X(val, swap, pred) \

+ static const int _table2_##val = _tmp_##val; \

+ STATIC_ASSERT(_table1_##val == _table2_##val);

+ VECTOR_FCMPX8632_TABLE;

#undef X

// Repeat the static asserts with respect to the high-level

// table entries in case the high-level table has extra entries.

@@ -2213,6 +2262,68 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {

Operand *Src0 = Inst->getSrc(0);

Operand *Src1 = Inst->getSrc(1);

Variable *Dest = Inst->getDest();

+ if (isVectorType(Dest->getType())) {

+ InstFcmp::FCond Condition = Inst->getCondition();

+ size_t Index = static_cast<size_t>(Condition);

+ assert(Index < TableVectorFcmpSize);

+ if (TableVectorFcmp[Index].SwapOperands) {

+ Operand *T = Src0;

+ Src0 = Src1;

+ Src1 = T;

+ }

+ Variable *T = NULL;

+ // ALIGNHACK: Without support for stack alignment, both operands to

+ // cmpps need to be forced into registers. Once support for stack

+ // alignment is implemented, remove LEGAL_HACK.

+#define LEGAL_HACK(Vect) legalizeToVar((Vect))

+ switch (Condition) {

+ default: {

+ InstX8632Cmpps::CmppsCond Predicate = TableVectorFcmp[Index].Predicate;

+ assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);

+ T = makeReg(Src0->getType());

+ _movp(T, Src0);

+ _cmpps(T, LEGAL_HACK(Src1), Predicate);

+ } break;

+ case InstFcmp::False:

+ T = makeVectorOfZeros(Src0->getType());

+ break;

+ case InstFcmp::One: {

+ // Check both unequal and ordered.

+ T = makeReg(Src0->getType());

+ Variable *T2 = makeReg(Src0->getType());

+ Src1 = LEGAL_HACK(Src1);

+ _movp(T, Src0);

+ _cmpps(T, Src1, InstX8632Cmpps::Cmpps_neq);

+ _movp(T2, Src0);

+ _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_ord);

+ _pand(T, T2);

+ } break;

+ case InstFcmp::Ueq: {

+ // Check both equal or unordered.

+ T = makeReg(Src0->getType());

+ Variable *T2 = makeReg(Src0->getType());

+ Src1 = LEGAL_HACK(Src1);

+ _movp(T, Src0);

+ _cmpps(T, Src1, InstX8632Cmpps::Cmpps_eq);

+ _movp(T2, Src0);

+ _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_unord);

+ _por(T, T2);

+ } break;

+ case InstFcmp::True:

+ T = makeVectorOfMinusOnes(IceType_v4i32);

+ break;

+ }

+#undef LEGAL_HACK

+ _movp(Dest, T);

+ eliminateNextVectorSextInstruction(Dest);

+ return;

+ }

// Lowering a = fcmp cond, b, c

// ucomiss b, c /* only if C1 != Br_None */

// /* but swap b,c order if SwapOperands==true */

@@ -2224,14 +2335,14 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {

// label: /* only if C1 != Br_None */

InstFcmp::FCond Condition = Inst->getCondition();

size_t Index = static_cast<size_t>(Condition);

- assert(Index < TableFcmpSize);

- if (TableFcmp[Index].SwapOperands) {

+ assert(Index < TableScalarFcmpSize);

+ if (TableScalarFcmp[Index].SwapOperands) {

Operand *Tmp = Src0;

Src0 = Src1;

Src1 = Tmp;

}

- bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);

- bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);

+ bool HasC1 = (TableScalarFcmp[Index].C1 != InstX8632Br::Br_None);

+ bool HasC2 = (TableScalarFcmp[Index].C2 != InstX8632Br::Br_None);

if (HasC1) {

Src0 = legalize(Src0);

Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);

@@ -2240,17 +2351,17 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {

_ucomiss(T, Src1RM);

}

Constant *Default =

- Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);

+ Ctx->getConstantInt(IceType_i32, TableScalarFcmp[Index].Default);

_mov(Dest, Default);

if (HasC1) {

InstX8632Label *Label = InstX8632Label::create(Func, this);

- _br(TableFcmp[Index].C1, Label);

+ _br(TableScalarFcmp[Index].C1, Label);

if (HasC2) {

- _br(TableFcmp[Index].C2, Label);

+ _br(TableScalarFcmp[Index].C2, Label);

}

Context.insert(InstFakeUse::create(Func, Dest));

Constant *NonDefault =

- Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);

+ Ctx->getConstantInt(IceType_i32, !TableScalarFcmp[Index].Default);

_mov(Dest, NonDefault);

Context.insert(Label);

}

@@ -2356,26 +2467,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

#undef LEGAL_HACK

_movp(Dest, T);

- // The following pattern occurs often in lowered C and C++ code:

- //

- // %cmp = icmp pred <n x ty> %src0, %src1

- // %cmp.ext = sext <n x i1> %cmp to <n x ty>

- //

- // We can avoid the sext operation by copying the result from pcmpgt

- // and pcmpeq, which is already sign extended, to the result of the

- // sext operation

- if (InstCast *NextCast =

- llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {

- if (NextCast->getCastKind() == InstCast::Sext &&

- NextCast->getSrc(0) == Dest) {

- _movp(NextCast->getDest(), T);

- // Skip over the instruction.

- NextCast->setDeleted();

- Context.advanceNext();

- }

+ eliminateNextVectorSextInstruction(Dest);

return;

}

@@ -3509,6 +3601,28 @@ void TargetX8632::lowerSwitch(const InstSwitch *Inst) {

_br(Inst->getLabelDefault());

}

+// The following pattern occurs often in lowered C and C++ code:

+//

+// %cmp = fcmp/icmp pred <n x ty> %src0, %src1

+// %cmp.ext = sext <n x i1> %cmp to <n x ty>

+//

+// We can eliminate the sext operation by copying the result of pcmpeqd,

+// pcmpgtd, or cmpps (which produce sign extended results) the result of

+// the sext operation.

+void

+TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {

+ if (InstCast *NextCast =

+ llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {

+ if (NextCast->getCastKind() == InstCast::Sext &&

+ NextCast->getSrc(0) == SignExtendedResult) {

+ _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));

+ // Skip over the instruction.

+ NextCast->setDeleted();

+ Context.advanceNext();

+ }

void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {

const SizeT MaxSrcs = 0;

Variable *Dest = NULL;

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8632.def » ('j') | src/IceTargetLoweringX8632.def » ('J')