OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
165 { | 165 { |
166 // Define a temporary set of enum values based on low-level | 166 // Define a temporary set of enum values based on low-level |
167 // table entries. | 167 // table entries. |
168 enum _tmp_enum { | 168 enum _tmp_enum { |
169 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, | 169 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, |
170 ICMPX8632_TABLE | 170 ICMPX8632_TABLE |
171 #undef X | 171 #undef X |
172 _num | 172 _num |
173 }; | 173 }; |
174 // Define a set of constants based on high-level table entries. | 174 // Define a set of constants based on high-level table entries. |
175 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; | 175 #define X(tag, str, isunsigned) static const int _table1_##tag = InstIcmp::tag; |
176 ICEINSTICMP_TABLE; | 176 ICEINSTICMP_TABLE; |
177 #undef X | 177 #undef X |
178 // Define a set of constants based on low-level table entries, | 178 // Define a set of constants based on low-level table entries, |
179 // and ensure the table entry keys are consistent. | 179 // and ensure the table entry keys are consistent. |
180 #define X(val, C_32, C1_64, C2_64, C3_64) \ | 180 #define X(val, C_32, C1_64, C2_64, C3_64) \ |
181 static const int _table2_##val = _tmp_##val; \ | 181 static const int _table2_##val = _tmp_##val; \ |
182 STATIC_ASSERT(_table1_##val == _table2_##val); | 182 STATIC_ASSERT(_table1_##val == _table2_##val); |
183 ICMPX8632_TABLE; | 183 ICMPX8632_TABLE; |
184 #undef X | 184 #undef X |
185 // Repeat the static asserts with respect to the high-level | 185 // Repeat the static asserts with respect to the high-level |
186 // table entries in case the high-level table has extra entries. | 186 // table entries in case the high-level table has extra entries. |
187 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | 187 #define X(tag, str, isunsigned) STATIC_ASSERT(_table1_##tag == _table2_##tag); |
188 ICEINSTICMP_TABLE; | 188 ICEINSTICMP_TABLE; |
189 #undef X | 189 #undef X |
190 } | 190 } |
191 | 191 |
192 // Validate the enum values in ICETYPEX8632_TABLE. | 192 // Validate the enum values in ICETYPEX8632_TABLE. |
193 { | 193 { |
194 // Define a temporary set of enum values based on low-level | 194 // Define a temporary set of enum values based on low-level |
195 // table entries. | 195 // table entries. |
196 enum _tmp_enum { | 196 enum _tmp_enum { |
197 #define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag, | 197 #define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag, |
(...skipping 1074 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1272 // Sz_mul_v16i8 | 1272 // Sz_mul_v16i8 |
1273 const IceString Helper = "Sz_mul_v16i8"; | 1273 const IceString Helper = "Sz_mul_v16i8"; |
1274 const SizeT MaxSrcs = 2; | 1274 const SizeT MaxSrcs = 2; |
1275 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | 1275 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
1276 Call->addArg(Src0); | 1276 Call->addArg(Src0); |
1277 Call->addArg(Src1); | 1277 Call->addArg(Src1); |
1278 lowerCall(Call); | 1278 lowerCall(Call); |
1279 } | 1279 } |
1280 } break; | 1280 } break; |
1281 case InstArithmetic::Shl: { | 1281 case InstArithmetic::Shl: { |
1282 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8 | 1282 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8 |
jvoung (off chromium)
2014/07/23 18:54:55
Could this have used _psll then, for i32 and i16 e
wala
2014/07/23 20:40:36
No. Unfortunately _psll shifts all the fields in t
| |
1283 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType()); | 1283 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType()); |
1284 const SizeT MaxSrcs = 2; | 1284 const SizeT MaxSrcs = 2; |
1285 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | 1285 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
1286 Call->addArg(Src0); | 1286 Call->addArg(Src0); |
1287 Call->addArg(Src1); | 1287 Call->addArg(Src1); |
1288 lowerCall(Call); | 1288 lowerCall(Call); |
1289 } break; | 1289 } break; |
1290 case InstArithmetic::Lshr: { | 1290 case InstArithmetic::Lshr: { |
1291 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8 | 1291 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8 |
1292 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType()); | 1292 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType()); |
(...skipping 961 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2254 _mov(Dest, NonDefault); | 2254 _mov(Dest, NonDefault); |
2255 Context.insert(Label); | 2255 Context.insert(Label); |
2256 } | 2256 } |
2257 } | 2257 } |
2258 | 2258 |
2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { | 2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
2260 Operand *Src0 = legalize(Inst->getSrc(0)); | 2260 Operand *Src0 = legalize(Inst->getSrc(0)); |
2261 Operand *Src1 = legalize(Inst->getSrc(1)); | 2261 Operand *Src1 = legalize(Inst->getSrc(1)); |
2262 Variable *Dest = Inst->getDest(); | 2262 Variable *Dest = Inst->getDest(); |
2263 | 2263 |
2264 // If Src1 is an immediate, or known to be a physical register, we can | 2264 if (isVectorType(Dest->getType())) { |
2265 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into | 2265 Type Ty = Src0->getType(); |
2266 // a physical register. (Actually, either Src0 or Src1 can be chosen for | 2266 // Promote i1 vectors to 128 bit integer vector types. |
2267 // the physical register, but unfortunately we have to commit to one or | 2267 if (typeElementType(Ty) == IceType_i1) { |
2268 // the other before register allocation.) | 2268 Type NewTy = IceType_NUM; |
2269 bool IsSrc1ImmOrReg = false; | 2269 switch (Ty) { |
2270 if (llvm::isa<Constant>(Src1)) { | 2270 default: |
2271 IsSrc1ImmOrReg = true; | 2271 llvm_unreachable("unexpected type"); |
2272 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { | 2272 break; |
2273 if (Var->hasReg()) | 2273 case IceType_v4i1: |
2274 NewTy = IceType_v4i32; | |
2275 break; | |
2276 case IceType_v8i1: | |
2277 NewTy = IceType_v8i16; | |
2278 break; | |
2279 case IceType_v16i1: | |
2280 NewTy = IceType_v16i8; | |
2281 break; | |
2282 } | |
2283 Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode()); | |
2284 Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode()); | |
2285 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0)); | |
2286 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1)); | |
2287 Src0 = NewSrc0; | |
2288 Src1 = NewSrc1; | |
2289 Ty = NewTy; | |
2290 } | |
2291 | |
2292 // SSE2 only has signed comparison operations. Transform unsigned | |
2293 // inputs in a manner that allows for the use of signed comparison | |
2294 // operations by flipping the high order bits. | |
2295 if (Inst->isUnsigned()) { | |
Jim Stichnoth
2014/07/23 17:29:28
I have a minor problem and a major problem with is
wala
2014/07/23 20:40:36
Done.
Removed the unsigned attribute.
| |
2296 Variable *T0 = makeReg(Ty); | |
2297 Variable *T1 = makeReg(Ty); | |
2298 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); | |
2299 _movp(T0, Src0); | |
2300 _pxor(T0, HighOrderBits); | |
2301 _movp(T1, Src1); | |
2302 _pxor(T1, HighOrderBits); | |
2303 Src0 = T0; | |
2304 Src1 = T1; | |
2305 } | |
2306 | |
2307 // TODO: ALIGNHACK: Both operands to compare instructions need to be | |
2308 // in registers until stack alignment support is implemented. Once | |
2309 // there is support for stack alignment, LEGAL_HACK can be removed. | |
2310 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | |
2311 Variable *T = makeReg(Ty); | |
2312 switch (Inst->getCondition()) { | |
2313 default: | |
2314 llvm_unreachable("unexpected condition"); | |
2315 break; | |
2316 case InstIcmp::Eq: { | |
2317 _movp(T, Src0); | |
2318 _pcmpeq(T, LEGAL_HACK(Src1)); | |
2319 } break; | |
2320 case InstIcmp::Ne: { | |
2321 _movp(T, Src0); | |
2322 _pcmpeq(T, LEGAL_HACK(Src1)); | |
2323 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | |
2324 _pxor(T, MinusOne); | |
2325 } break; | |
2326 case InstIcmp::Ugt: | |
2327 case InstIcmp::Sgt: { | |
2328 _movp(T, Src0); | |
2329 _pcmpgt(T, LEGAL_HACK(Src1)); | |
2330 } break; | |
2331 case InstIcmp::Uge: | |
2332 case InstIcmp::Sge: { | |
2333 // !(Src1 > Src0) | |
2334 _movp(T, Src1); | |
2335 _pcmpgt(T, LEGAL_HACK(Src0)); | |
2336 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | |
2337 _pxor(T, MinusOne); | |
2338 } break; | |
2339 case InstIcmp::Ult: | |
2340 case InstIcmp::Slt: { | |
2341 _movp(T, Src1); | |
2342 _pcmpgt(T, LEGAL_HACK(Src0)); | |
2343 } break; | |
2344 case InstIcmp::Ule: | |
2345 case InstIcmp::Sle: { | |
2346 // !(Src0 > Src1) | |
2347 _movp(T, Src0); | |
2348 _pcmpgt(T, LEGAL_HACK(Src1)); | |
2349 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | |
2350 _pxor(T, MinusOne); | |
2351 } break; | |
2352 } | |
2353 #undef LEGAL_HACK | |
2354 | |
2355 _movp(Dest, T); | |
2356 | |
2357 // The following pattern occurs often in lowered C and C++ code: | |
2358 // | |
2359 // %cmp = icmp pred <n x ty> %src0, %src1 | |
2360 // %cmp.ext = sext <n x i1> %cmp to <n x ty> | |
2361 // | |
2362 // We can avoid the sext operation by copying the result from pcmpgt | |
2363 // and pcmpeq, which is already sign extended, to the result of the | |
2364 // sext operation | |
2365 if (InstCast *NextCast = | |
2366 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { | |
2367 if (NextCast->getCastKind() == InstCast::Sext && | |
2368 NextCast->getSrc(0) == Dest) { | |
2369 _movp(NextCast->getDest(), T); | |
2370 // Skip over the instruction. | |
2371 NextCast->setDeleted(); | |
2372 Context.advanceNext(); | |
2373 } | |
2374 } | |
jvoung (off chromium)
2014/07/23 18:54:55
could this just return;
and then the scalar versi
wala
2014/07/23 20:40:36
Done.
| |
2375 } else { | |
2376 // If Src1 is an immediate, or known to be a physical register, we can | |
2377 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into | |
2378 // a physical register. (Actually, either Src0 or Src1 can be chosen for | |
2379 // the physical register, but unfortunately we have to commit to one or | |
2380 // the other before register allocation.) | |
2381 bool IsSrc1ImmOrReg = false; | |
2382 if (llvm::isa<Constant>(Src1)) { | |
2274 IsSrc1ImmOrReg = true; | 2383 IsSrc1ImmOrReg = true; |
2275 } | 2384 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
2385 if (Var->hasReg()) | |
2386 IsSrc1ImmOrReg = true; | |
2387 } | |
2276 | 2388 |
2277 // Try to fuse a compare immediately followed by a conditional branch. This | 2389 // Try to fuse a compare immediately followed by a conditional branch. This |
2278 // is possible when the compare dest and the branch source operands are the | 2390 // is possible when the compare dest and the branch source operands are the |
2279 // same, and are their only uses. TODO: implement this optimization for i64. | 2391 // same, and are their only uses. TODO: implement this optimization for |
2280 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { | 2392 // i64. |
2281 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && | 2393 if (InstBr *NextBr = |
2282 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { | 2394 llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { |
2283 Operand *Src0New = | 2395 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && |
2284 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); | 2396 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { |
2285 _cmp(Src0New, Src1); | 2397 Operand *Src0New = |
2286 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), | 2398 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
2287 NextBr->getTargetFalse()); | 2399 _cmp(Src0New, Src1); |
2288 // Skip over the following branch instruction. | 2400 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), |
2289 NextBr->setDeleted(); | 2401 NextBr->getTargetFalse()); |
2290 Context.advanceNext(); | 2402 // Skip over the following branch instruction. |
2403 NextBr->setDeleted(); | |
2404 Context.advanceNext(); | |
2405 return; | |
2406 } | |
2407 } | |
2408 | |
2409 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | |
2410 Constant *Zero = Ctx->getConstantZero(IceType_i32); | |
2411 Constant *One = Ctx->getConstantInt(IceType_i32, 1); | |
2412 if (Src0->getType() == IceType_i64) { | |
2413 InstIcmp::ICond Condition = Inst->getCondition(); | |
2414 size_t Index = static_cast<size_t>(Condition); | |
2415 assert(Index < TableIcmp64Size); | |
2416 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | |
2417 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | |
2418 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { | |
2419 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
2420 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One)); | |
2421 _cmp(loOperand(Src0), Src1LoRI); | |
2422 _br(InstX8632Br::Br_ne, Label); | |
2423 _cmp(hiOperand(Src0), Src1HiRI); | |
2424 _br(InstX8632Br::Br_ne, Label); | |
2425 Context.insert(InstFakeUse::create(Func, Dest)); | |
2426 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero)); | |
2427 Context.insert(Label); | |
2428 } else { | |
2429 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); | |
2430 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); | |
2431 _mov(Dest, One); | |
2432 _cmp(hiOperand(Src0), Src1HiRI); | |
2433 _br(TableIcmp64[Index].C1, LabelTrue); | |
2434 _br(TableIcmp64[Index].C2, LabelFalse); | |
2435 _cmp(loOperand(Src0), Src1LoRI); | |
2436 _br(TableIcmp64[Index].C3, LabelTrue); | |
2437 Context.insert(LabelFalse); | |
2438 Context.insert(InstFakeUse::create(Func, Dest)); | |
2439 _mov(Dest, Zero); | |
2440 Context.insert(LabelTrue); | |
2441 } | |
2291 return; | 2442 return; |
2292 } | 2443 } |
2444 | |
2445 // cmp b, c | |
2446 Operand *Src0New = | |
2447 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); | |
2448 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
2449 _cmp(Src0New, Src1); | |
2450 _mov(Dest, One); | |
2451 _br(getIcmp32Mapping(Inst->getCondition()), Label); | |
2452 Context.insert(InstFakeUse::create(Func, Dest)); | |
2453 _mov(Dest, Zero); | |
2454 Context.insert(Label); | |
2293 } | 2455 } |
2294 | |
2295 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | |
2296 Constant *Zero = Ctx->getConstantZero(IceType_i32); | |
2297 Constant *One = Ctx->getConstantInt(IceType_i32, 1); | |
2298 if (Src0->getType() == IceType_i64) { | |
2299 InstIcmp::ICond Condition = Inst->getCondition(); | |
2300 size_t Index = static_cast<size_t>(Condition); | |
2301 assert(Index < TableIcmp64Size); | |
2302 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | |
2303 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | |
2304 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { | |
2305 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
2306 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One)); | |
2307 _cmp(loOperand(Src0), Src1LoRI); | |
2308 _br(InstX8632Br::Br_ne, Label); | |
2309 _cmp(hiOperand(Src0), Src1HiRI); | |
2310 _br(InstX8632Br::Br_ne, Label); | |
2311 Context.insert(InstFakeUse::create(Func, Dest)); | |
2312 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero)); | |
2313 Context.insert(Label); | |
2314 } else { | |
2315 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); | |
2316 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); | |
2317 _mov(Dest, One); | |
2318 _cmp(hiOperand(Src0), Src1HiRI); | |
2319 _br(TableIcmp64[Index].C1, LabelTrue); | |
2320 _br(TableIcmp64[Index].C2, LabelFalse); | |
2321 _cmp(loOperand(Src0), Src1LoRI); | |
2322 _br(TableIcmp64[Index].C3, LabelTrue); | |
2323 Context.insert(LabelFalse); | |
2324 Context.insert(InstFakeUse::create(Func, Dest)); | |
2325 _mov(Dest, Zero); | |
2326 Context.insert(LabelTrue); | |
2327 } | |
2328 return; | |
2329 } | |
2330 | |
2331 // cmp b, c | |
2332 Operand *Src0New = | |
2333 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); | |
2334 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
2335 _cmp(Src0New, Src1); | |
2336 _mov(Dest, One); | |
2337 _br(getIcmp32Mapping(Inst->getCondition()), Label); | |
2338 Context.insert(InstFakeUse::create(Func, Dest)); | |
2339 _mov(Dest, Zero); | |
2340 Context.insert(Label); | |
2341 } | 2456 } |
2342 | 2457 |
2343 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { | 2458 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
2344 Operand *SourceVectOperand = Inst->getSrc(0); | 2459 Operand *SourceVectOperand = Inst->getSrc(0); |
2345 Operand *ElementToInsert = Inst->getSrc(1); | 2460 Operand *ElementToInsert = Inst->getSrc(1); |
2346 ConstantInteger *ElementIndex = | 2461 ConstantInteger *ElementIndex = |
2347 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2)); | 2462 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2)); |
2348 // Only constant indices are allowed in PNaCl IR. | 2463 // Only constant indices are allowed in PNaCl IR. |
2349 assert(ElementIndex); | 2464 assert(ElementIndex); |
2350 unsigned Index = ElementIndex->getValue(); | 2465 unsigned Index = ElementIndex->getValue(); |
(...skipping 1040 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3391 _br(Inst->getLabelDefault()); | 3506 _br(Inst->getLabelDefault()); |
3392 } | 3507 } |
3393 | 3508 |
3394 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { | 3509 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { |
3395 const SizeT MaxSrcs = 0; | 3510 const SizeT MaxSrcs = 0; |
3396 Variable *Dest = NULL; | 3511 Variable *Dest = NULL; |
3397 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); | 3512 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); |
3398 lowerCall(Call); | 3513 lowerCall(Call); |
3399 } | 3514 } |
3400 | 3515 |
3516 // There is no support for loading or emitting vector constants, so the | |
3517 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, | |
3518 // etc. are initialized with register operations. | |
3519 // | |
3520 // TODO(wala): Add limited support for vector constants so that | |
3521 // complex initialization in registers is unnecessary. | |
3522 | |
3401 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 3523 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
3402 // There is no support for loading or emitting vector constants, so | |
3403 // this value is initialized using register operations. | |
3404 Variable *Reg = makeReg(Ty, RegNum); | 3524 Variable *Reg = makeReg(Ty, RegNum); |
3405 // Insert a FakeDef, since otherwise the live range of Reg might | 3525 // Insert a FakeDef, since otherwise the live range of Reg might |
3406 // be overestimated. | 3526 // be overestimated. |
3407 Context.insert(InstFakeDef::create(Func, Reg)); | 3527 Context.insert(InstFakeDef::create(Func, Reg)); |
3408 _pxor(Reg, Reg); | 3528 _pxor(Reg, Reg); |
3409 return Reg; | 3529 return Reg; |
3410 } | 3530 } |
3411 | 3531 |
3532 Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) { | |
3533 Variable *MinusOnes = makeReg(Ty, RegNum); | |
3534 // Insert a FakeDef so the live range of MinusOnes is not overestimated. | |
3535 Context.insert(InstFakeDef::create(Func, MinusOnes)); | |
3536 _pcmpeq(MinusOnes, MinusOnes); | |
3537 return MinusOnes; | |
3538 } | |
3539 | |
3412 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 3540 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
3413 // There is no support for loading or emitting vector constants, so | |
3414 // this value is initialized using register operations. | |
3415 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 3541 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
3416 Variable *MinusOne = makeReg(Ty); | 3542 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
3417 // Insert a FakeDef so the live range of MinusOne is not overestimated. | |
3418 Context.insert(InstFakeDef::create(Func, MinusOne)); | |
3419 _pcmpeq(MinusOne, MinusOne); | |
3420 _psub(Dest, MinusOne); | 3543 _psub(Dest, MinusOne); |
3421 return Dest; | 3544 return Dest; |
3422 } | 3545 } |
3423 | 3546 |
3547 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { | |
3548 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || | |
3549 Ty == IceType_v16i8); | |
3550 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { | |
3551 Variable *Reg = makeVectorOfOnes(Ty, RegNum); | |
3552 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; | |
3553 _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift)); | |
3554 return Reg; | |
3555 } else { | |
3556 // SSE has no left shift operation for vectors of 8 bit integers. | |
3557 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | |
3558 Constant *ConstantMask = | |
3559 Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK); | |
3560 Variable *Reg = makeReg(Ty, RegNum); | |
3561 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | |
3562 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | |
3563 return Reg; | |
3564 } | |
3565 } | |
3566 | |
3424 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, | 3567 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, |
3425 Variable *Slot, | 3568 Variable *Slot, |
3426 uint32_t Offset) { | 3569 uint32_t Offset) { |
3427 // Ensure that Loc is a stack slot. | 3570 // Ensure that Loc is a stack slot. |
3428 assert(Slot->getWeight() == RegWeight::Zero); | 3571 assert(Slot->getWeight() == RegWeight::Zero); |
3429 assert(Slot->getRegNum() == Variable::NoRegister); | 3572 assert(Slot->getRegNum() == Variable::NoRegister); |
3430 // Compute the location of Loc in memory. | 3573 // Compute the location of Loc in memory. |
3431 // TODO(wala,stichnot): lea should not be required. The address of | 3574 // TODO(wala,stichnot): lea should not be required. The address of |
3432 // the stack slot is known at compile time (although not until after | 3575 // the stack slot is known at compile time (although not until after |
3433 // addProlog()). | 3576 // addProlog()). |
(...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3752 for (SizeT i = 0; i < Size; ++i) { | 3895 for (SizeT i = 0; i < Size; ++i) { |
3753 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 3896 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
3754 } | 3897 } |
3755 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 3898 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
3756 } | 3899 } |
3757 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 3900 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
3758 << "\n"; | 3901 << "\n"; |
3759 } | 3902 } |
3760 | 3903 |
3761 } // end of namespace Ice | 3904 } // end of namespace Ice |
OLD | NEW |