Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(312)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 413053002: Lower the fcmp instruction for <4 x float> operands. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Improve table formatting and X macro parameter names Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8632.def » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
11 // consists almost entirely of the lowering sequence for each 11 // consists almost entirely of the lowering sequence for each
12 // high-level instruction. It also implements 12 // high-level instruction. It also implements
13 // TargetX8632Fast::postLower() which does the simplest possible 13 // TargetX8632Fast::postLower() which does the simplest possible
14 // register allocation for the "fast" target. 14 // register allocation for the "fast" target.
15 // 15 //
16 //===----------------------------------------------------------------------===// 16 //===----------------------------------------------------------------------===//
17 17
18 #include "IceDefs.h" 18 #include "IceDefs.h"
19 #include "IceCfg.h" 19 #include "IceCfg.h"
20 #include "IceCfgNode.h" 20 #include "IceCfgNode.h"
21 #include "IceInstX8632.h" 21 #include "IceInstX8632.h"
22 #include "IceOperand.h" 22 #include "IceOperand.h"
23 #include "IceTargetLoweringX8632.def" 23 #include "IceTargetLoweringX8632.def"
24 #include "IceTargetLoweringX8632.h" 24 #include "IceTargetLoweringX8632.h"
25 25
26 namespace Ice { 26 namespace Ice {
27 27
28 namespace { 28 namespace {
29 29
30 // The following table summarizes the logic for lowering the fcmp instruction. 30 // The following table summarizes the logic for lowering the fcmp
31 // There is one table entry for each of the 16 conditions. A comment in 31 // instruction. There is one table entry for each of the 16 conditions.
32 // lowerFcmp() describes the lowering template. In the most general case, there 32 //
33 // is a compare followed by two conditional branches, because some fcmp 33 // The first four columns describe the case when the operands are
34 // conditions don't map to a single x86 conditional branch. However, in many 34 // floating point scalar values. A comment in lowerFcmp() describes the
35 // cases it is possible to swap the operands in the comparison and have a single 35 // lowering template. In the most general case, there is a compare
36 // conditional branch. Since it's quite tedious to validate the table by hand, 36 // followed by two conditional branches, because some fcmp conditions
37 // good execution tests are helpful. 37 // don't map to a single x86 conditional branch. However, in many cases
38 38 // it is possible to swap the operands in the comparison and have a
39 // single conditional branch. Since it's quite tedious to validate the
40 // table by hand, good execution tests are helpful.
41 //
42 // The last two columns describe the case when the operands are vectors
43 // of floating point values. For most fcmp conditions, there is a clear
44 // mapping to a single x86 cmpps instruction variant. Some fcmp
45 // conditions require special code to handle and these are marked in the
46 // table with a Cmpps_Invalid predicate.
39 const struct TableFcmp_ { 47 const struct TableFcmp_ {
40 uint32_t Default; 48 uint32_t Default;
41 bool SwapOperands; 49 bool SwapScalarOperands;
42 InstX8632::BrCond C1, C2; 50 InstX8632::BrCond C1, C2;
51 bool SwapVectorOperands;
52 InstX8632Cmpps::CmppsCond Predicate;
43 } TableFcmp[] = { 53 } TableFcmp[] = {
44 #define X(val, dflt, swap, C1, C2) \ 54 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
45 { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \ 55 { \
56 dflt, swapS, InstX8632Br::C1, InstX8632Br::C2, swapV, InstX8632Cmpps::pred \
57 } \
46 , 58 ,
47 FCMPX8632_TABLE 59 FCMPX8632_TABLE
48 #undef X 60 #undef X
49 }; 61 };
50 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); 62 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
51 63
52 // The following table summarizes the logic for lowering the icmp instruction 64 // The following table summarizes the logic for lowering the icmp instruction
53 // for i32 and narrower types. Each icmp condition has a clear mapping to an 65 // for i32 and narrower types. Each icmp condition has a clear mapping to an
54 // x86 conditional branch instruction. 66 // x86 conditional branch instruction.
55 67
56 const struct TableIcmp32_ { 68 const struct TableIcmp32_ {
57 InstX8632::BrCond Mapping; 69 InstX8632::BrCond Mapping;
58 } TableIcmp32[] = { 70 } TableIcmp32[] = {
59 #define X(val, C_32, C1_64, C2_64, C3_64) \ 71 #define X(val, C_32, C1_64, C2_64, C3_64) \
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
131 // between abstraction layers. There is a risk that the tables 143 // between abstraction layers. There is a risk that the tables
132 // could get out of sync if enum values are reordered or if entries 144 // could get out of sync if enum values are reordered or if entries
133 // are added or deleted. This dummy function uses static_assert to 145 // are added or deleted. This dummy function uses static_assert to
134 // ensure everything is kept in sync. 146 // ensure everything is kept in sync.
135 void xMacroIntegrityCheck() { 147 void xMacroIntegrityCheck() {
136 // Validate the enum values in FCMPX8632_TABLE. 148 // Validate the enum values in FCMPX8632_TABLE.
137 { 149 {
138 // Define a temporary set of enum values based on low-level 150 // Define a temporary set of enum values based on low-level
139 // table entries. 151 // table entries.
140 enum _tmp_enum { 152 enum _tmp_enum {
141 #define X(val, dflt, swap, C1, C2) _tmp_##val, 153 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
142 FCMPX8632_TABLE 154 FCMPX8632_TABLE
143 #undef X 155 #undef X
144 _num 156 _num
145 }; 157 };
146 // Define a set of constants based on high-level table entries. 158 // Define a set of constants based on high-level table entries.
147 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; 159 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
148 ICEINSTFCMP_TABLE; 160 ICEINSTFCMP_TABLE;
149 #undef X 161 #undef X
150 // Define a set of constants based on low-level table entries, 162 // Define a set of constants based on low-level table entries,
151 // and ensure the table entry keys are consistent. 163 // and ensure the table entry keys are consistent.
152 #define X(val, dflt, swap, C1, C2) \ 164 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
153 static const int _table2_##val = _tmp_##val; \ 165 static const int _table2_##val = _tmp_##val; \
154 STATIC_ASSERT(_table1_##val == _table2_##val); 166 STATIC_ASSERT(_table1_##val == _table2_##val);
155 FCMPX8632_TABLE; 167 FCMPX8632_TABLE;
156 #undef X 168 #undef X
157 // Repeat the static asserts with respect to the high-level 169 // Repeat the static asserts with respect to the high-level
158 // table entries in case the high-level table has extra entries. 170 // table entries in case the high-level table has extra entries.
159 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); 171 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
160 ICEINSTFCMP_TABLE; 172 ICEINSTFCMP_TABLE;
161 #undef X 173 #undef X
162 } 174 }
(...skipping 2043 matching lines...) Expand 10 before | Expand all | Expand 10 after
2206 2218
2207 // Copy the element to the destination. 2219 // Copy the element to the destination.
2208 Variable *Dest = Inst->getDest(); 2220 Variable *Dest = Inst->getDest();
2209 _mov(Dest, ExtractedElement); 2221 _mov(Dest, ExtractedElement);
2210 } 2222 }
2211 2223
2212 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { 2224 void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
2213 Operand *Src0 = Inst->getSrc(0); 2225 Operand *Src0 = Inst->getSrc(0);
2214 Operand *Src1 = Inst->getSrc(1); 2226 Operand *Src1 = Inst->getSrc(1);
2215 Variable *Dest = Inst->getDest(); 2227 Variable *Dest = Inst->getDest();
2228
2229 if (isVectorType(Dest->getType())) {
2230 InstFcmp::FCond Condition = Inst->getCondition();
2231 size_t Index = static_cast<size_t>(Condition);
2232 assert(Index < TableFcmpSize);
2233
2234 if (TableFcmp[Index].SwapVectorOperands) {
2235 Operand *T = Src0;
2236 Src0 = Src1;
2237 Src1 = T;
2238 }
2239
2240 Variable *T = NULL;
2241
2242 // ALIGNHACK: Without support for stack alignment, both operands to
2243 // cmpps need to be forced into registers. Once support for stack
2244 // alignment is implemented, remove LEGAL_HACK.
2245 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
2246 switch (Condition) {
2247 default: {
2248 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;
2249 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
2250 T = makeReg(Src0->getType());
2251 _movp(T, Src0);
2252 _cmpps(T, LEGAL_HACK(Src1), Predicate);
2253 } break;
2254 case InstFcmp::False:
2255 T = makeVectorOfZeros(Src0->getType());
2256 break;
2257 case InstFcmp::One: {
2258 // Check both unequal and ordered.
2259 T = makeReg(Src0->getType());
2260 Variable *T2 = makeReg(Src0->getType());
2261 Src1 = LEGAL_HACK(Src1);
2262 _movp(T, Src0);
2263 _cmpps(T, Src1, InstX8632Cmpps::Cmpps_neq);
2264 _movp(T2, Src0);
2265 _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_ord);
2266 _pand(T, T2);
2267 } break;
2268 case InstFcmp::Ueq: {
2269 // Check both equal or unordered.
2270 T = makeReg(Src0->getType());
2271 Variable *T2 = makeReg(Src0->getType());
2272 Src1 = LEGAL_HACK(Src1);
2273 _movp(T, Src0);
2274 _cmpps(T, Src1, InstX8632Cmpps::Cmpps_eq);
2275 _movp(T2, Src0);
2276 _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_unord);
2277 _por(T, T2);
2278 } break;
2279 case InstFcmp::True:
2280 T = makeVectorOfMinusOnes(IceType_v4i32);
2281 break;
2282 }
2283 #undef LEGAL_HACK
2284
2285 _movp(Dest, T);
2286 eliminateNextVectorSextInstruction(Dest);
2287 return;
2288 }
2289
2216 // Lowering a = fcmp cond, b, c 2290 // Lowering a = fcmp cond, b, c
2217 // ucomiss b, c /* only if C1 != Br_None */ 2291 // ucomiss b, c /* only if C1 != Br_None */
2218 // /* but swap b,c order if SwapOperands==true */ 2292 // /* but swap b,c order if SwapOperands==true */
2219 // mov a, <default> 2293 // mov a, <default>
2220 // j<C1> label /* only if C1 != Br_None */ 2294 // j<C1> label /* only if C1 != Br_None */
2221 // j<C2> label /* only if C2 != Br_None */ 2295 // j<C2> label /* only if C2 != Br_None */
2222 // FakeUse(a) /* only if C1 != Br_None */ 2296 // FakeUse(a) /* only if C1 != Br_None */
2223 // mov a, !<default> /* only if C1 != Br_None */ 2297 // mov a, !<default> /* only if C1 != Br_None */
2224 // label: /* only if C1 != Br_None */ 2298 // label: /* only if C1 != Br_None */
2225 InstFcmp::FCond Condition = Inst->getCondition(); 2299 InstFcmp::FCond Condition = Inst->getCondition();
2226 size_t Index = static_cast<size_t>(Condition); 2300 size_t Index = static_cast<size_t>(Condition);
2227 assert(Index < TableFcmpSize); 2301 assert(Index < TableFcmpSize);
2228 if (TableFcmp[Index].SwapOperands) { 2302 if (TableFcmp[Index].SwapScalarOperands) {
2229 Operand *Tmp = Src0; 2303 Operand *Tmp = Src0;
2230 Src0 = Src1; 2304 Src0 = Src1;
2231 Src1 = Tmp; 2305 Src1 = Tmp;
2232 } 2306 }
2233 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None); 2307 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
2234 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None); 2308 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
2235 if (HasC1) { 2309 if (HasC1) {
2236 Src0 = legalize(Src0); 2310 Src0 = legalize(Src0);
2237 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2311 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2238 Variable *T = NULL; 2312 Variable *T = NULL;
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
2349 // !(Src0 > Src1) 2423 // !(Src0 > Src1)
2350 _movp(T, Src0); 2424 _movp(T, Src0);
2351 _pcmpgt(T, LEGAL_HACK(Src1)); 2425 _pcmpgt(T, LEGAL_HACK(Src1));
2352 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2426 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2353 _pxor(T, MinusOne); 2427 _pxor(T, MinusOne);
2354 } break; 2428 } break;
2355 } 2429 }
2356 #undef LEGAL_HACK 2430 #undef LEGAL_HACK
2357 2431
2358 _movp(Dest, T); 2432 _movp(Dest, T);
2359 2433 eliminateNextVectorSextInstruction(Dest);
2360 // The following pattern occurs often in lowered C and C++ code:
2361 //
2362 // %cmp = icmp pred <n x ty> %src0, %src1
2363 // %cmp.ext = sext <n x i1> %cmp to <n x ty>
2364 //
2365 // We can avoid the sext operation by copying the result from pcmpgt
2366 // and pcmpeq, which is already sign extended, to the result of the
2367 // sext operation
2368 if (InstCast *NextCast =
2369 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
2370 if (NextCast->getCastKind() == InstCast::Sext &&
2371 NextCast->getSrc(0) == Dest) {
2372 _movp(NextCast->getDest(), T);
2373 // Skip over the instruction.
2374 NextCast->setDeleted();
2375 Context.advanceNext();
2376 }
2377 }
2378
2379 return; 2434 return;
2380 } 2435 }
2381 2436
2382 // If Src1 is an immediate, or known to be a physical register, we can 2437 // If Src1 is an immediate, or known to be a physical register, we can
2383 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into 2438 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2384 // a physical register. (Actually, either Src0 or Src1 can be chosen for 2439 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2385 // the physical register, but unfortunately we have to commit to one or 2440 // the physical register, but unfortunately we have to commit to one or
2386 // the other before register allocation.) 2441 // the other before register allocation.)
2387 bool IsSrc1ImmOrReg = false; 2442 bool IsSrc1ImmOrReg = false;
2388 if (llvm::isa<Constant>(Src1)) { 2443 if (llvm::isa<Constant>(Src1)) {
(...skipping 1148 matching lines...) Expand 10 before | Expand all | Expand 10 after
3537 Src0 = legalize(Src0, Legal_All, true); 3592 Src0 = legalize(Src0, Legal_All, true);
3538 for (SizeT I = 0; I < NumCases; ++I) { 3593 for (SizeT I = 0; I < NumCases; ++I) {
3539 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I)); 3594 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));
3540 _cmp(Src0, Value); 3595 _cmp(Src0, Value);
3541 _br(InstX8632Br::Br_e, Inst->getLabel(I)); 3596 _br(InstX8632Br::Br_e, Inst->getLabel(I));
3542 } 3597 }
3543 3598
3544 _br(Inst->getLabelDefault()); 3599 _br(Inst->getLabelDefault());
3545 } 3600 }
3546 3601
3602 // The following pattern occurs often in lowered C and C++ code:
3603 //
3604 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1
3605 // %cmp.ext = sext <n x i1> %cmp to <n x ty>
3606 //
3607 // We can eliminate the sext operation by copying the result of pcmpeqd,
3608 // pcmpgtd, or cmpps (which produce sign extended results) to the result
3609 // of the sext operation.
3610 void
3611 TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {
3612 if (InstCast *NextCast =
3613 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
3614 if (NextCast->getCastKind() == InstCast::Sext &&
3615 NextCast->getSrc(0) == SignExtendedResult) {
3616 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
3617 // Skip over the instruction.
3618 NextCast->setDeleted();
3619 Context.advanceNext();
3620 }
3621 }
3622 }
3623
3547 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { 3624 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
3548 const SizeT MaxSrcs = 0; 3625 const SizeT MaxSrcs = 0;
3549 Variable *Dest = NULL; 3626 Variable *Dest = NULL;
3550 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); 3627 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
3551 lowerCall(Call); 3628 lowerCall(Call);
3552 } 3629 }
3553 3630
3554 // There is no support for loading or emitting vector constants, so the 3631 // There is no support for loading or emitting vector constants, so the
3555 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, 3632 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,
3556 // etc. are initialized with register operations. 3633 // etc. are initialized with register operations.
(...skipping 376 matching lines...) Expand 10 before | Expand all | Expand 10 after
3933 for (SizeT i = 0; i < Size; ++i) { 4010 for (SizeT i = 0; i < Size; ++i) {
3934 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 4011 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
3935 } 4012 }
3936 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4013 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
3937 } 4014 }
3938 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 4015 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
3939 << "\n"; 4016 << "\n";
3940 } 4017 }
3941 4018
3942 } // end of namespace Ice 4019 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8632.def » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698