OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
11 // consists almost entirely of the lowering sequence for each | 11 // consists almost entirely of the lowering sequence for each |
12 // high-level instruction. It also implements | 12 // high-level instruction. It also implements |
13 // TargetX8632Fast::postLower() which does the simplest possible | 13 // TargetX8632Fast::postLower() which does the simplest possible |
14 // register allocation for the "fast" target. | 14 // register allocation for the "fast" target. |
15 // | 15 // |
16 //===----------------------------------------------------------------------===// | 16 //===----------------------------------------------------------------------===// |
17 | 17 |
18 #include "IceDefs.h" | 18 #include "IceDefs.h" |
19 #include "IceCfg.h" | 19 #include "IceCfg.h" |
20 #include "IceCfgNode.h" | 20 #include "IceCfgNode.h" |
21 #include "IceInstX8632.h" | 21 #include "IceInstX8632.h" |
22 #include "IceOperand.h" | 22 #include "IceOperand.h" |
23 #include "IceTargetLoweringX8632.def" | 23 #include "IceTargetLoweringX8632.def" |
24 #include "IceTargetLoweringX8632.h" | 24 #include "IceTargetLoweringX8632.h" |
25 | 25 |
26 namespace Ice { | 26 namespace Ice { |
27 | 27 |
28 namespace { | 28 namespace { |
29 | 29 |
30 // The following table summarizes the logic for lowering the fcmp instruction. | 30 // The following table summarizes the logic for lowering the fcmp |
31 // There is one table entry for each of the 16 conditions. A comment in | 31 // instruction. There is one table entry for each of the 16 conditions. |
32 // lowerFcmp() describes the lowering template. In the most general case, there | 32 // |
33 // is a compare followed by two conditional branches, because some fcmp | 33 // The first four columns describe the case when the operands are |
34 // conditions don't map to a single x86 conditional branch. However, in many | 34 // floating point scalar values. A comment in lowerFcmp() describes the |
35 // cases it is possible to swap the operands in the comparison and have a single | 35 // lowering template. In the most general case, there is a compare |
36 // conditional branch. Since it's quite tedious to validate the table by hand, | 36 // followed by two conditional branches, because some fcmp conditions |
37 // good execution tests are helpful. | 37 // don't map to a single x86 conditional branch. However, in many cases |
38 | 38 // it is possible to swap the operands in the comparison and have a |
| 39 // single conditional branch. Since it's quite tedious to validate the |
| 40 // table by hand, good execution tests are helpful. |
| 41 // |
| 42 // The last two columns describe the case when the operands are vectors |
| 43 // of floating point values. For most fcmp conditions, there is a clear |
| 44 // mapping to a single x86 cmpps instruction variant. Some fcmp |
| 45 // conditions require special code to handle and these are marked in the |
| 46 // table with a Cmpps_Invalid predicate. |
39 const struct TableFcmp_ { | 47 const struct TableFcmp_ { |
40 uint32_t Default; | 48 uint32_t Default; |
41 bool SwapOperands; | 49 bool SwapScalarOperands; |
42 InstX8632::BrCond C1, C2; | 50 InstX8632::BrCond C1, C2; |
| 51 bool SwapVectorOperands; |
| 52 InstX8632Cmpps::CmppsCond Predicate; |
43 } TableFcmp[] = { | 53 } TableFcmp[] = { |
44 #define X(val, dflt, swap, C1, C2) \ | 54 #define X(val, dflt, swap, C1, C2, swap2, pred) \ |
45 { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \ | 55 { \ |
| 56 dflt, swap, InstX8632Br::C1, InstX8632Br::C2, swap2, InstX8632Cmpps::pred \ |
| 57 } \ |
46 , | 58 , |
47 FCMPX8632_TABLE | 59 FCMPX8632_TABLE |
48 #undef X | 60 #undef X |
49 }; | 61 }; |
50 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); | 62 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); |
51 | 63 |
52 // The following table summarizes the logic for lowering the icmp instruction | 64 // The following table summarizes the logic for lowering the icmp instruction |
53 // for i32 and narrower types. Each icmp condition has a clear mapping to an | 65 // for i32 and narrower types. Each icmp condition has a clear mapping to an |
54 // x86 conditional branch instruction. | 66 // x86 conditional branch instruction. |
55 | 67 |
56 const struct TableIcmp32_ { | 68 const struct TableIcmp32_ { |
57 InstX8632::BrCond Mapping; | 69 InstX8632::BrCond Mapping; |
58 } TableIcmp32[] = { | 70 } TableIcmp32[] = { |
59 #define X(val, C_32, C1_64, C2_64, C3_64) \ | 71 #define X(val, C_32, C1_64, C2_64, C3_64) \ |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
131 // between abstraction layers. There is a risk that the tables | 143 // between abstraction layers. There is a risk that the tables |
132 // could get out of sync if enum values are reordered or if entries | 144 // could get out of sync if enum values are reordered or if entries |
133 // are added or deleted. This dummy function uses static_assert to | 145 // are added or deleted. This dummy function uses static_assert to |
134 // ensure everything is kept in sync. | 146 // ensure everything is kept in sync. |
135 void xMacroIntegrityCheck() { | 147 void xMacroIntegrityCheck() { |
136 // Validate the enum values in FCMPX8632_TABLE. | 148 // Validate the enum values in FCMPX8632_TABLE. |
137 { | 149 { |
138 // Define a temporary set of enum values based on low-level | 150 // Define a temporary set of enum values based on low-level |
139 // table entries. | 151 // table entries. |
140 enum _tmp_enum { | 152 enum _tmp_enum { |
141 #define X(val, dflt, swap, C1, C2) _tmp_##val, | 153 #define X(val, dflt, swap, C1, C2, swap2, pred) _tmp_##val, |
142 FCMPX8632_TABLE | 154 FCMPX8632_TABLE |
143 #undef X | 155 #undef X |
144 _num | 156 _num |
145 }; | 157 }; |
146 // Define a set of constants based on high-level table entries. | 158 // Define a set of constants based on high-level table entries. |
147 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; | 159 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; |
148 ICEINSTFCMP_TABLE; | 160 ICEINSTFCMP_TABLE; |
149 #undef X | 161 #undef X |
150 // Define a set of constants based on low-level table entries, | 162 // Define a set of constants based on low-level table entries, |
151 // and ensure the table entry keys are consistent. | 163 // and ensure the table entry keys are consistent. |
152 #define X(val, dflt, swap, C1, C2) \ | 164 #define X(val, dflt, swap, C1, C2, swap2, pred) \ |
153 static const int _table2_##val = _tmp_##val; \ | 165 static const int _table2_##val = _tmp_##val; \ |
154 STATIC_ASSERT(_table1_##val == _table2_##val); | 166 STATIC_ASSERT(_table1_##val == _table2_##val); |
155 FCMPX8632_TABLE; | 167 FCMPX8632_TABLE; |
156 #undef X | 168 #undef X |
157 // Repeat the static asserts with respect to the high-level | 169 // Repeat the static asserts with respect to the high-level |
158 // table entries in case the high-level table has extra entries. | 170 // table entries in case the high-level table has extra entries. |
159 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | 171 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); |
160 ICEINSTFCMP_TABLE; | 172 ICEINSTFCMP_TABLE; |
161 #undef X | 173 #undef X |
162 } | 174 } |
(...skipping 2043 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2206 | 2218 |
2207 // Copy the element to the destination. | 2219 // Copy the element to the destination. |
2208 Variable *Dest = Inst->getDest(); | 2220 Variable *Dest = Inst->getDest(); |
2209 _mov(Dest, ExtractedElement); | 2221 _mov(Dest, ExtractedElement); |
2210 } | 2222 } |
2211 | 2223 |
2212 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { | 2224 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
2213 Operand *Src0 = Inst->getSrc(0); | 2225 Operand *Src0 = Inst->getSrc(0); |
2214 Operand *Src1 = Inst->getSrc(1); | 2226 Operand *Src1 = Inst->getSrc(1); |
2215 Variable *Dest = Inst->getDest(); | 2227 Variable *Dest = Inst->getDest(); |
| 2228 |
| 2229 if (isVectorType(Dest->getType())) { |
| 2230 InstFcmp::FCond Condition = Inst->getCondition(); |
| 2231 size_t Index = static_cast<size_t>(Condition); |
| 2232 assert(Index < TableFcmpSize); |
| 2233 |
| 2234 if (TableFcmp[Index].SwapVectorOperands) { |
| 2235 Operand *T = Src0; |
| 2236 Src0 = Src1; |
| 2237 Src1 = T; |
| 2238 } |
| 2239 |
| 2240 Variable *T = NULL; |
| 2241 |
| 2242 // ALIGNHACK: Without support for stack alignment, both operands to |
| 2243 // cmpps need to be forced into registers. Once support for stack |
| 2244 // alignment is implemented, remove LEGAL_HACK. |
| 2245 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
| 2246 switch (Condition) { |
| 2247 default: { |
| 2248 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; |
| 2249 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); |
| 2250 T = makeReg(Src0->getType()); |
| 2251 _movp(T, Src0); |
| 2252 _cmpps(T, LEGAL_HACK(Src1), Predicate); |
| 2253 } break; |
| 2254 case InstFcmp::False: |
| 2255 T = makeVectorOfZeros(Src0->getType()); |
| 2256 break; |
| 2257 case InstFcmp::One: { |
| 2258 // Check both unequal and ordered. |
| 2259 T = makeReg(Src0->getType()); |
| 2260 Variable *T2 = makeReg(Src0->getType()); |
| 2261 Src1 = LEGAL_HACK(Src1); |
| 2262 _movp(T, Src0); |
| 2263 _cmpps(T, Src1, InstX8632Cmpps::Cmpps_neq); |
| 2264 _movp(T2, Src0); |
| 2265 _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_ord); |
| 2266 _pand(T, T2); |
| 2267 } break; |
| 2268 case InstFcmp::Ueq: { |
| 2269 // Check both equal or unordered. |
| 2270 T = makeReg(Src0->getType()); |
| 2271 Variable *T2 = makeReg(Src0->getType()); |
| 2272 Src1 = LEGAL_HACK(Src1); |
| 2273 _movp(T, Src0); |
| 2274 _cmpps(T, Src1, InstX8632Cmpps::Cmpps_eq); |
| 2275 _movp(T2, Src0); |
| 2276 _cmpps(T2, Src1, InstX8632Cmpps::Cmpps_unord); |
| 2277 _por(T, T2); |
| 2278 } break; |
| 2279 case InstFcmp::True: |
| 2280 T = makeVectorOfMinusOnes(IceType_v4i32); |
| 2281 break; |
| 2282 } |
| 2283 #undef LEGAL_HACK |
| 2284 |
| 2285 _movp(Dest, T); |
| 2286 eliminateNextVectorSextInstruction(Dest); |
| 2287 return; |
| 2288 } |
| 2289 |
2216 // Lowering a = fcmp cond, b, c | 2290 // Lowering a = fcmp cond, b, c |
2217 // ucomiss b, c /* only if C1 != Br_None */ | 2291 // ucomiss b, c /* only if C1 != Br_None */ |
2218 // /* but swap b,c order if SwapOperands==true */ | 2292 // /* but swap b,c order if SwapOperands==true */ |
2219 // mov a, <default> | 2293 // mov a, <default> |
2220 // j<C1> label /* only if C1 != Br_None */ | 2294 // j<C1> label /* only if C1 != Br_None */ |
2221 // j<C2> label /* only if C2 != Br_None */ | 2295 // j<C2> label /* only if C2 != Br_None */ |
2222 // FakeUse(a) /* only if C1 != Br_None */ | 2296 // FakeUse(a) /* only if C1 != Br_None */ |
2223 // mov a, !<default> /* only if C1 != Br_None */ | 2297 // mov a, !<default> /* only if C1 != Br_None */ |
2224 // label: /* only if C1 != Br_None */ | 2298 // label: /* only if C1 != Br_None */ |
2225 InstFcmp::FCond Condition = Inst->getCondition(); | 2299 InstFcmp::FCond Condition = Inst->getCondition(); |
2226 size_t Index = static_cast<size_t>(Condition); | 2300 size_t Index = static_cast<size_t>(Condition); |
2227 assert(Index < TableFcmpSize); | 2301 assert(Index < TableFcmpSize); |
2228 if (TableFcmp[Index].SwapOperands) { | 2302 if (TableFcmp[Index].SwapScalarOperands) { |
2229 Operand *Tmp = Src0; | 2303 Operand *Tmp = Src0; |
2230 Src0 = Src1; | 2304 Src0 = Src1; |
2231 Src1 = Tmp; | 2305 Src1 = Tmp; |
2232 } | 2306 } |
2233 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None); | 2307 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None); |
2234 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None); | 2308 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None); |
2235 if (HasC1) { | 2309 if (HasC1) { |
2236 Src0 = legalize(Src0); | 2310 Src0 = legalize(Src0); |
2237 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2311 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
2238 Variable *T = NULL; | 2312 Variable *T = NULL; |
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2349 // !(Src0 > Src1) | 2423 // !(Src0 > Src1) |
2350 _movp(T, Src0); | 2424 _movp(T, Src0); |
2351 _pcmpgt(T, LEGAL_HACK(Src1)); | 2425 _pcmpgt(T, LEGAL_HACK(Src1)); |
2352 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 2426 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
2353 _pxor(T, MinusOne); | 2427 _pxor(T, MinusOne); |
2354 } break; | 2428 } break; |
2355 } | 2429 } |
2356 #undef LEGAL_HACK | 2430 #undef LEGAL_HACK |
2357 | 2431 |
2358 _movp(Dest, T); | 2432 _movp(Dest, T); |
2359 | 2433 eliminateNextVectorSextInstruction(Dest); |
2360 // The following pattern occurs often in lowered C and C++ code: | |
2361 // | |
2362 // %cmp = icmp pred <n x ty> %src0, %src1 | |
2363 // %cmp.ext = sext <n x i1> %cmp to <n x ty> | |
2364 // | |
2365 // We can avoid the sext operation by copying the result from pcmpgt | |
2366 // and pcmpeq, which is already sign extended, to the result of the | |
2367 // sext operation | |
2368 if (InstCast *NextCast = | |
2369 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { | |
2370 if (NextCast->getCastKind() == InstCast::Sext && | |
2371 NextCast->getSrc(0) == Dest) { | |
2372 _movp(NextCast->getDest(), T); | |
2373 // Skip over the instruction. | |
2374 NextCast->setDeleted(); | |
2375 Context.advanceNext(); | |
2376 } | |
2377 } | |
2378 | |
2379 return; | 2434 return; |
2380 } | 2435 } |
2381 | 2436 |
2382 // If Src1 is an immediate, or known to be a physical register, we can | 2437 // If Src1 is an immediate, or known to be a physical register, we can |
2383 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into | 2438 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
2384 // a physical register. (Actually, either Src0 or Src1 can be chosen for | 2439 // a physical register. (Actually, either Src0 or Src1 can be chosen for |
2385 // the physical register, but unfortunately we have to commit to one or | 2440 // the physical register, but unfortunately we have to commit to one or |
2386 // the other before register allocation.) | 2441 // the other before register allocation.) |
2387 bool IsSrc1ImmOrReg = false; | 2442 bool IsSrc1ImmOrReg = false; |
2388 if (llvm::isa<Constant>(Src1)) { | 2443 if (llvm::isa<Constant>(Src1)) { |
(...skipping 1148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3537 Src0 = legalize(Src0, Legal_All, true); | 3592 Src0 = legalize(Src0, Legal_All, true); |
3538 for (SizeT I = 0; I < NumCases; ++I) { | 3593 for (SizeT I = 0; I < NumCases; ++I) { |
3539 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I)); | 3594 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I)); |
3540 _cmp(Src0, Value); | 3595 _cmp(Src0, Value); |
3541 _br(InstX8632Br::Br_e, Inst->getLabel(I)); | 3596 _br(InstX8632Br::Br_e, Inst->getLabel(I)); |
3542 } | 3597 } |
3543 | 3598 |
3544 _br(Inst->getLabelDefault()); | 3599 _br(Inst->getLabelDefault()); |
3545 } | 3600 } |
3546 | 3601 |
| 3602 // The following pattern occurs often in lowered C and C++ code: |
| 3603 // |
| 3604 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
| 3605 // %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| 3606 // |
| 3607 // We can eliminate the sext operation by copying the result of pcmpeqd, |
| 3608 // pcmpgtd, or cmpps (which produce sign extended results) to the result |
| 3609 // of the sext operation. |
| 3610 void |
| 3611 TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) { |
| 3612 if (InstCast *NextCast = |
| 3613 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
| 3614 if (NextCast->getCastKind() == InstCast::Sext && |
| 3615 NextCast->getSrc(0) == SignExtendedResult) { |
| 3616 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); |
| 3617 // Skip over the instruction. |
| 3618 NextCast->setDeleted(); |
| 3619 Context.advanceNext(); |
| 3620 } |
| 3621 } |
| 3622 } |
| 3623 |
3547 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { | 3624 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { |
3548 const SizeT MaxSrcs = 0; | 3625 const SizeT MaxSrcs = 0; |
3549 Variable *Dest = NULL; | 3626 Variable *Dest = NULL; |
3550 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); | 3627 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); |
3551 lowerCall(Call); | 3628 lowerCall(Call); |
3552 } | 3629 } |
3553 | 3630 |
3554 // There is no support for loading or emitting vector constants, so the | 3631 // There is no support for loading or emitting vector constants, so the |
3555 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, | 3632 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, |
3556 // etc. are initialized with register operations. | 3633 // etc. are initialized with register operations. |
(...skipping 376 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3933 for (SizeT i = 0; i < Size; ++i) { | 4010 for (SizeT i = 0; i < Size; ++i) { |
3934 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4011 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
3935 } | 4012 } |
3936 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4013 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
3937 } | 4014 } |
3938 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 4015 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
3939 << "\n"; | 4016 << "\n"; |
3940 } | 4017 } |
3941 | 4018 |
3942 } // end of namespace Ice | 4019 } // end of namespace Ice |
OLD | NEW |