src/IceTargetLoweringX8632.cpp - Issue 413053002: Lower the fcmp instruction for <4 x float> operands.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 413053002: Lower the fcmp instruction for <4 x float> operands. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Improve table formatting and X macro parameter names Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

11 // consists almost entirely of the lowering sequence for each	11 // consists almost entirely of the lowering sequence for each

12 // high-level instruction. It also implements	12 // high-level instruction. It also implements

13 // TargetX8632Fast::postLower() which does the simplest possible	13 // TargetX8632Fast::postLower() which does the simplest possible

14 // register allocation for the "fast" target.	14 // register allocation for the "fast" target.

15 //	15 //

16 //===----------------------------------------------------------------------===//	16 //===----------------------------------------------------------------------===//

17	17

18 #include "IceDefs.h"	18 #include "IceDefs.h"

19 #include "IceCfg.h"	19 #include "IceCfg.h"

20 #include "IceCfgNode.h"	20 #include "IceCfgNode.h"

21 #include "IceInstX8632.h"	21 #include "IceInstX8632.h"

22 #include "IceOperand.h"	22 #include "IceOperand.h"

23 #include "IceTargetLoweringX8632.def"	23 #include "IceTargetLoweringX8632.def"

24 #include "IceTargetLoweringX8632.h"	24 #include "IceTargetLoweringX8632.h"

25	25

26 namespace Ice {	26 namespace Ice {

27	27

28 namespace {	28 namespace {

29	29

30 // The following table summarizes the logic for lowering the fcmp instruction.	30 // The following table summarizes the logic for lowering the fcmp

31 // There is one table entry for each of the 16 conditions. A comment in	31 // instruction. There is one table entry for each of the 16 conditions.

32 // lowerFcmp() describes the lowering template. In the most general case, there	32 //

33 // is a compare followed by two conditional branches, because some fcmp	33 // The first four columns describe the case when the operands are

34 // conditions don't map to a single x86 conditional branch. However, in many	34 // floating point scalar values. A comment in lowerFcmp() describes the

35 // cases it is possible to swap the operands in the comparison and have a single	35 // lowering template. In the most general case, there is a compare

36 // conditional branch. Since it's quite tedious to validate the table by hand,	36 // followed by two conditional branches, because some fcmp conditions

37 // good execution tests are helpful.	37 // don't map to a single x86 conditional branch. However, in many cases

38	38 // it is possible to swap the operands in the comparison and have a

	39 // single conditional branch. Since it's quite tedious to validate the

	40 // table by hand, good execution tests are helpful.

	41 //

	42 // The last two columns describe the case when the operands are vectors

	43 // of floating point values. For most fcmp conditions, there is a clear

	44 // mapping to a single x86 cmpps instruction variant. Some fcmp

	45 // conditions require special code to handle and these are marked in the

	46 // table with a Cmpps_Invalid predicate.

39 const struct TableFcmp_ {	47 const struct TableFcmp_ {

40 uint32_t Default;	48 uint32_t Default;

41 bool SwapOperands;	49 bool SwapScalarOperands;

42 InstX8632::BrCond C1, C2;	50 InstX8632::BrCond C1, C2;

	51 bool SwapVectorOperands;

	52 InstX8632Cmpps::CmppsCond Predicate;

43 } TableFcmp[] = {	53 } TableFcmp[] = {

44 #define X(val, dflt, swap, C1, C2) \	54 #define X(val, dflt, swapS, C1, C2, swapV, pred) \

45 { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \	55 { \

	56 dflt, swapS, InstX8632Br::C1, InstX8632Br::C2, swapV, InstX8632Cmpps::pred \

	57 } \

46 ,	58 ,

47 FCMPX8632_TABLE	59 FCMPX8632_TABLE

48 #undef X	60 #undef X

49 };	61 };

50 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);	62 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);

51	63

52 // The following table summarizes the logic for lowering the icmp instruction	64 // The following table summarizes the logic for lowering the icmp instruction

53 // for i32 and narrower types. Each icmp condition has a clear mapping to an	65 // for i32 and narrower types. Each icmp condition has a clear mapping to an

54 // x86 conditional branch instruction.	66 // x86 conditional branch instruction.

55	67

56 const struct TableIcmp32_ {	68 const struct TableIcmp32_ {

57 InstX8632::BrCond Mapping;	69 InstX8632::BrCond Mapping;

58 } TableIcmp32[] = {	70 } TableIcmp32[] = {

59 #define X(val, C_32, C1_64, C2_64, C3_64) \	71 #define X(val, C_32, C1_64, C2_64, C3_64) \

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
131 // between abstraction layers. There is a risk that the tables	143 // between abstraction layers. There is a risk that the tables

132 // could get out of sync if enum values are reordered or if entries	144 // could get out of sync if enum values are reordered or if entries

133 // are added or deleted. This dummy function uses static_assert to	145 // are added or deleted. This dummy function uses static_assert to

134 // ensure everything is kept in sync.	146 // ensure everything is kept in sync.

135 void xMacroIntegrityCheck() {	147 void xMacroIntegrityCheck() {

136 // Validate the enum values in FCMPX8632_TABLE.	148 // Validate the enum values in FCMPX8632_TABLE.

137 {	149 {

138 // Define a temporary set of enum values based on low-level	150 // Define a temporary set of enum values based on low-level

139 // table entries.	151 // table entries.

140 enum _tmp_enum {	152 enum _tmp_enum {

141 #define X(val, dflt, swap, C1, C2) _tmp_##val,	153 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,

142 FCMPX8632_TABLE	154 FCMPX8632_TABLE

143 #undef X	155 #undef X

144 _num	156 _num

145 };	157 };

146 // Define a set of constants based on high-level table entries.	158 // Define a set of constants based on high-level table entries.

147 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;	159 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;

148 ICEINSTFCMP_TABLE;	160 ICEINSTFCMP_TABLE;

149 #undef X	161 #undef X

150 // Define a set of constants based on low-level table entries,	162 // Define a set of constants based on low-level table entries,

151 // and ensure the table entry keys are consistent.	163 // and ensure the table entry keys are consistent.

152 #define X(val, dflt, swap, C1, C2) \	164 #define X(val, dflt, swapS, C1, C2, swapV, pred) \

153 static const int _table2_##val = _tmp_##val; \	165 static const int _table2_##val = _tmp_##val; \

154 STATIC_ASSERT(_table1_##val == _table2_##val);	166 STATIC_ASSERT(_table1_##val == _table2_##val);

155 FCMPX8632_TABLE;	167 FCMPX8632_TABLE;

156 #undef X	168 #undef X

157 // Repeat the static asserts with respect to the high-level	169 // Repeat the static asserts with respect to the high-level

158 // table entries in case the high-level table has extra entries.	170 // table entries in case the high-level table has extra entries.

159 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);	171 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);

160 ICEINSTFCMP_TABLE;	172 ICEINSTFCMP_TABLE;

161 #undef X	173 #undef X

162 }	174 }

(...skipping 2043 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2206	2218

2207 // Copy the element to the destination.	2219 // Copy the element to the destination.

2208 Variable *Dest = Inst->getDest();	2220 Variable *Dest = Inst->getDest();

2209 _mov(Dest, ExtractedElement);	2221 _mov(Dest, ExtractedElement);

2210 }	2222 }

2211	2223

2212 void TargetX8632::lowerFcmp(const InstFcmp *Inst) {	2224 void TargetX8632::lowerFcmp(const InstFcmp *Inst) {

2213 Operand *Src0 = Inst->getSrc(0);	2225 Operand *Src0 = Inst->getSrc(0);

2214 Operand *Src1 = Inst->getSrc(1);	2226 Operand *Src1 = Inst->getSrc(1);

2215 Variable *Dest = Inst->getDest();	2227 Variable *Dest = Inst->getDest();

	2228

	2229 if (isVectorType(Dest->getType())) {

	2230 InstFcmp::FCond Condition = Inst->getCondition();

	2231 size_t Index = static_cast<size_t>(Condition);

	2232 assert(Index < TableFcmpSize);

	2233

	2234 if (TableFcmp[Index].SwapVectorOperands) {

	2235 Operand *T = Src0;

	2236 Src0 = Src1;

	2237 Src1 = T;

	2238 }

	2239

	2240 Variable *T = NULL;

	2241

	2242 // ALIGNHACK: Without support for stack alignment, both operands to

	2243 // cmpps need to be forced into registers. Once support for stack

	2244 // alignment is implemented, remove LEGAL_HACK.

	2245 #define LEGAL_HACK(Vect) legalizeToVar((Vect))

	2246 switch (Condition) {

	2247 default: {

	2248 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;

	2249 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);

	2250 T = makeReg(Src0->getType());

	2251 _movp(T, Src0);

	2252 _cmpps(T, LEGAL_HACK(Src1), Predicate);

	2253 } break;

	2254 case InstFcmp::False:

	2255 T = makeVectorOfZeros(Src0->getType());

	2256 break;

	2257 case InstFcmp::One: {

	2258 // Check both unequal and ordered.

	2259 T = makeReg(Src0->getType());

	2260 Variable *T2 = makeReg(Src0->getType());

	2261 Src1 = LEGAL_HACK(Src1);

	2262 _movp(T, Src0);

	2263 _cmpps(T, Src1, InstX8632Cmpps::Cmpps_neq);

	2264 _movp(T2, Src0);

	2265 _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_ord);

	2266 _pand(T, T2);

	2267 } break;

	2268 case InstFcmp::Ueq: {

	2269 // Check both equal or unordered.

	2270 T = makeReg(Src0->getType());

	2271 Variable *T2 = makeReg(Src0->getType());

	2272 Src1 = LEGAL_HACK(Src1);

	2273 _movp(T, Src0);

	2274 _cmpps(T, Src1, InstX8632Cmpps::Cmpps_eq);

	2275 _movp(T2, Src0);

	2276 _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_unord);

	2277 _por(T, T2);

	2278 } break;

	2279 case InstFcmp::True:

	2280 T = makeVectorOfMinusOnes(IceType_v4i32);

	2281 break;

	2282 }

	2283 #undef LEGAL_HACK

	2284

	2285 _movp(Dest, T);

	2286 eliminateNextVectorSextInstruction(Dest);

	2287 return;

	2288 }

	2289

2216 // Lowering a = fcmp cond, b, c	2290 // Lowering a = fcmp cond, b, c

2217 // ucomiss b, c /* only if C1 != Br_None */	2291 // ucomiss b, c /* only if C1 != Br_None */

2218 // /* but swap b,c order if SwapOperands==true */	2292 // /* but swap b,c order if SwapOperands==true */

2219 // mov a, <default>	2293 // mov a, <default>

2220 // j<C1> label /* only if C1 != Br_None */	2294 // j<C1> label /* only if C1 != Br_None */

2221 // j<C2> label /* only if C2 != Br_None */	2295 // j<C2> label /* only if C2 != Br_None */

2222 // FakeUse(a) /* only if C1 != Br_None */	2296 // FakeUse(a) /* only if C1 != Br_None */

2223 // mov a, !<default> /* only if C1 != Br_None */	2297 // mov a, !<default> /* only if C1 != Br_None */

2224 // label: /* only if C1 != Br_None */	2298 // label: /* only if C1 != Br_None */

2225 InstFcmp::FCond Condition = Inst->getCondition();	2299 InstFcmp::FCond Condition = Inst->getCondition();

2226 size_t Index = static_cast<size_t>(Condition);	2300 size_t Index = static_cast<size_t>(Condition);

2227 assert(Index < TableFcmpSize);	2301 assert(Index < TableFcmpSize);

2228 if (TableFcmp[Index].SwapOperands) {	2302 if (TableFcmp[Index].SwapScalarOperands) {

2229 Operand *Tmp = Src0;	2303 Operand *Tmp = Src0;

2230 Src0 = Src1;	2304 Src0 = Src1;

2231 Src1 = Tmp;	2305 Src1 = Tmp;

2232 }	2306 }

2233 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);	2307 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);

2234 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);	2308 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);

2235 if (HasC1) {	2309 if (HasC1) {

2236 Src0 = legalize(Src0);	2310 Src0 = legalize(Src0);

2237 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);	2311 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

2238 Variable *T = NULL;	2312 Variable *T = NULL;

(...skipping 110 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2349 // !(Src0 > Src1)	2423 // !(Src0 > Src1)

2350 _movp(T, Src0);	2424 _movp(T, Src0);

2351 _pcmpgt(T, LEGAL_HACK(Src1));	2425 _pcmpgt(T, LEGAL_HACK(Src1));

2352 Variable *MinusOne = makeVectorOfMinusOnes(Ty);	2426 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

2353 _pxor(T, MinusOne);	2427 _pxor(T, MinusOne);

2354 } break;	2428 } break;

2355 }	2429 }

2356 #undef LEGAL_HACK	2430 #undef LEGAL_HACK

2357	2431

2358 _movp(Dest, T);	2432 _movp(Dest, T);

2359	2433 eliminateNextVectorSextInstruction(Dest);

2360 // The following pattern occurs often in lowered C and C++ code:

2361 //

2362 // %cmp = icmp pred <n x ty> %src0, %src1

2363 // %cmp.ext = sext <n x i1> %cmp to <n x ty>

2364 //

2365 // We can avoid the sext operation by copying the result from pcmpgt

2366 // and pcmpeq, which is already sign extended, to the result of the

2367 // sext operation

2368 if (InstCast *NextCast =

2369 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {

2370 if (NextCast->getCastKind() == InstCast::Sext &&

2371 NextCast->getSrc(0) == Dest) {

2372 _movp(NextCast->getDest(), T);

2373 // Skip over the instruction.

2374 NextCast->setDeleted();

2375 Context.advanceNext();

2376 }

2377 }

2378

2379 return;	2434 return;

2380 }	2435 }

2381	2436

2382 // If Src1 is an immediate, or known to be a physical register, we can	2437 // If Src1 is an immediate, or known to be a physical register, we can

2383 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into	2438 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into

2384 // a physical register. (Actually, either Src0 or Src1 can be chosen for	2439 // a physical register. (Actually, either Src0 or Src1 can be chosen for

2385 // the physical register, but unfortunately we have to commit to one or	2440 // the physical register, but unfortunately we have to commit to one or

2386 // the other before register allocation.)	2441 // the other before register allocation.)

2387 bool IsSrc1ImmOrReg = false;	2442 bool IsSrc1ImmOrReg = false;

2388 if (llvm::isa<Constant>(Src1)) {	2443 if (llvm::isa<Constant>(Src1)) {

(...skipping 1148 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3537 Src0 = legalize(Src0, Legal_All, true);	3592 Src0 = legalize(Src0, Legal_All, true);

3538 for (SizeT I = 0; I < NumCases; ++I) {	3593 for (SizeT I = 0; I < NumCases; ++I) {

3539 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));	3594 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));

3540 _cmp(Src0, Value);	3595 _cmp(Src0, Value);

3541 _br(InstX8632Br::Br_e, Inst->getLabel(I));	3596 _br(InstX8632Br::Br_e, Inst->getLabel(I));

3542 }	3597 }

3543	3598

3544 _br(Inst->getLabelDefault());	3599 _br(Inst->getLabelDefault());

3545 }	3600 }

3546	3601

	3602 // The following pattern occurs often in lowered C and C++ code:

	3603 //

	3604 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1

	3605 // %cmp.ext = sext <n x i1> %cmp to <n x ty>

	3606 //

	3607 // We can eliminate the sext operation by copying the result of pcmpeqd,

	3608 // pcmpgtd, or cmpps (which produce sign extended results) to the result

	3609 // of the sext operation.

	3610 void

	3611 TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {

	3612 if (InstCast *NextCast =

	3613 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {

	3614 if (NextCast->getCastKind() == InstCast::Sext &&

	3615 NextCast->getSrc(0) == SignExtendedResult) {

	3616 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));

	3617 // Skip over the instruction.

	3618 NextCast->setDeleted();

	3619 Context.advanceNext();

	3620 }

	3621 }

	3622 }

	3623

3547 void TargetX8632::lowerUnreachable(const InstUnreachable * /Inst/) {	3624 void TargetX8632::lowerUnreachable(const InstUnreachable * /Inst/) {

3548 const SizeT MaxSrcs = 0;	3625 const SizeT MaxSrcs = 0;

3549 Variable *Dest = NULL;	3626 Variable *Dest = NULL;

3550 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);	3627 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);

3551 lowerCall(Call);	3628 lowerCall(Call);

3552 }	3629 }

3553	3630

3554 // There is no support for loading or emitting vector constants, so the	3631 // There is no support for loading or emitting vector constants, so the

3555 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,	3632 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,

3556 // etc. are initialized with register operations.	3633 // etc. are initialized with register operations.

(...skipping 376 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3933 for (SizeT i = 0; i < Size; ++i) {	4010 for (SizeT i = 0; i < Size; ++i) {

3934 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";	4011 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";

3935 }	4012 }

3936 Str << "\t.size\t" << MangledName << ", " << Size << "\n";	4013 Str << "\t.size\t" << MangledName << ", " << Size << "\n";

3937 }	4014 }

3938 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName	4015 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName

3939 << "\n";	4016 << "\n";

3940 }	4017 }

3941	4018

3942 } // end of namespace Ice	4019 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8632.def » ('j') | no next file with comments »