OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 2243 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2254 _mov(Dest, NonDefault); | 2254 _mov(Dest, NonDefault); |
2255 Context.insert(Label); | 2255 Context.insert(Label); |
2256 } | 2256 } |
2257 } | 2257 } |
2258 | 2258 |
2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { | 2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
2260 Operand *Src0 = legalize(Inst->getSrc(0)); | 2260 Operand *Src0 = legalize(Inst->getSrc(0)); |
2261 Operand *Src1 = legalize(Inst->getSrc(1)); | 2261 Operand *Src1 = legalize(Inst->getSrc(1)); |
2262 Variable *Dest = Inst->getDest(); | 2262 Variable *Dest = Inst->getDest(); |
2263 | 2263 |
| 2264 if (isVectorType(Dest->getType())) { |
| 2265 Type Ty = Src0->getType(); |
| 2266 // Promote i1 vectors to 128 bit integer vector types. |
| 2267 if (typeElementType(Ty) == IceType_i1) { |
| 2268 Type NewTy = IceType_NUM; |
| 2269 switch (Ty) { |
| 2270 default: |
| 2271 llvm_unreachable("unexpected type"); |
| 2272 break; |
| 2273 case IceType_v4i1: |
| 2274 NewTy = IceType_v4i32; |
| 2275 break; |
| 2276 case IceType_v8i1: |
| 2277 NewTy = IceType_v8i16; |
| 2278 break; |
| 2279 case IceType_v16i1: |
| 2280 NewTy = IceType_v16i8; |
| 2281 break; |
| 2282 } |
| 2283 Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode()); |
| 2284 Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode()); |
| 2285 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0)); |
| 2286 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1)); |
| 2287 Src0 = NewSrc0; |
| 2288 Src1 = NewSrc1; |
| 2289 Ty = NewTy; |
| 2290 } |
| 2291 |
| 2292 InstIcmp::ICond Condition = Inst->getCondition(); |
| 2293 |
| 2294 // SSE2 only has signed comparison operations. Transform unsigned |
| 2295 // inputs in a manner that allows for the use of signed comparison |
| 2296 // operations by flipping the high order bits. |
| 2297 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge || |
| 2298 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) { |
| 2299 Variable *T0 = makeReg(Ty); |
| 2300 Variable *T1 = makeReg(Ty); |
| 2301 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); |
| 2302 _movp(T0, Src0); |
| 2303 _pxor(T0, HighOrderBits); |
| 2304 _movp(T1, Src1); |
| 2305 _pxor(T1, HighOrderBits); |
| 2306 Src0 = T0; |
| 2307 Src1 = T1; |
| 2308 } |
| 2309 |
| 2310 // TODO: ALIGNHACK: Both operands to compare instructions need to be |
| 2311 // in registers until stack alignment support is implemented. Once |
| 2312 // there is support for stack alignment, LEGAL_HACK can be removed. |
| 2313 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
| 2314 Variable *T = makeReg(Ty); |
| 2315 switch (Condition) { |
| 2316 default: |
| 2317 llvm_unreachable("unexpected condition"); |
| 2318 break; |
| 2319 case InstIcmp::Eq: { |
| 2320 _movp(T, Src0); |
| 2321 _pcmpeq(T, LEGAL_HACK(Src1)); |
| 2322 } break; |
| 2323 case InstIcmp::Ne: { |
| 2324 _movp(T, Src0); |
| 2325 _pcmpeq(T, LEGAL_HACK(Src1)); |
| 2326 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2327 _pxor(T, MinusOne); |
| 2328 } break; |
| 2329 case InstIcmp::Ugt: |
| 2330 case InstIcmp::Sgt: { |
| 2331 _movp(T, Src0); |
| 2332 _pcmpgt(T, LEGAL_HACK(Src1)); |
| 2333 } break; |
| 2334 case InstIcmp::Uge: |
| 2335 case InstIcmp::Sge: { |
| 2336 // !(Src1 > Src0) |
| 2337 _movp(T, Src1); |
| 2338 _pcmpgt(T, LEGAL_HACK(Src0)); |
| 2339 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2340 _pxor(T, MinusOne); |
| 2341 } break; |
| 2342 case InstIcmp::Ult: |
| 2343 case InstIcmp::Slt: { |
| 2344 _movp(T, Src1); |
| 2345 _pcmpgt(T, LEGAL_HACK(Src0)); |
| 2346 } break; |
| 2347 case InstIcmp::Ule: |
| 2348 case InstIcmp::Sle: { |
| 2349 // !(Src0 > Src1) |
| 2350 _movp(T, Src0); |
| 2351 _pcmpgt(T, LEGAL_HACK(Src1)); |
| 2352 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2353 _pxor(T, MinusOne); |
| 2354 } break; |
| 2355 } |
| 2356 #undef LEGAL_HACK |
| 2357 |
| 2358 _movp(Dest, T); |
| 2359 |
| 2360 // The following pattern occurs often in lowered C and C++ code: |
| 2361 // |
| 2362 // %cmp = icmp pred <n x ty> %src0, %src1 |
| 2363 // %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| 2364 // |
| 2365 // We can avoid the sext operation by copying the result from pcmpgt |
| 2366 // and pcmpeq, which is already sign extended, to the result of the |
| 2367 // sext operation |
| 2368 if (InstCast *NextCast = |
| 2369 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
| 2370 if (NextCast->getCastKind() == InstCast::Sext && |
| 2371 NextCast->getSrc(0) == Dest) { |
| 2372 _movp(NextCast->getDest(), T); |
| 2373 // Skip over the instruction. |
| 2374 NextCast->setDeleted(); |
| 2375 Context.advanceNext(); |
| 2376 } |
| 2377 } |
| 2378 |
| 2379 return; |
| 2380 } |
| 2381 |
2264 // If Src1 is an immediate, or known to be a physical register, we can | 2382 // If Src1 is an immediate, or known to be a physical register, we can |
2265 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into | 2383 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
2266 // a physical register. (Actually, either Src0 or Src1 can be chosen for | 2384 // a physical register. (Actually, either Src0 or Src1 can be chosen for |
2267 // the physical register, but unfortunately we have to commit to one or | 2385 // the physical register, but unfortunately we have to commit to one or |
2268 // the other before register allocation.) | 2386 // the other before register allocation.) |
2269 bool IsSrc1ImmOrReg = false; | 2387 bool IsSrc1ImmOrReg = false; |
2270 if (llvm::isa<Constant>(Src1)) { | 2388 if (llvm::isa<Constant>(Src1)) { |
2271 IsSrc1ImmOrReg = true; | 2389 IsSrc1ImmOrReg = true; |
2272 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { | 2390 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
2273 if (Var->hasReg()) | 2391 if (Var->hasReg()) |
(...skipping 1117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3391 _br(Inst->getLabelDefault()); | 3509 _br(Inst->getLabelDefault()); |
3392 } | 3510 } |
3393 | 3511 |
3394 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { | 3512 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { |
3395 const SizeT MaxSrcs = 0; | 3513 const SizeT MaxSrcs = 0; |
3396 Variable *Dest = NULL; | 3514 Variable *Dest = NULL; |
3397 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); | 3515 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); |
3398 lowerCall(Call); | 3516 lowerCall(Call); |
3399 } | 3517 } |
3400 | 3518 |
| 3519 // There is no support for loading or emitting vector constants, so the |
| 3520 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, |
| 3521 // etc. are initialized with register operations. |
| 3522 // |
| 3523 // TODO(wala): Add limited support for vector constants so that |
| 3524 // complex initialization in registers is unnecessary. |
| 3525 |
3401 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 3526 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
3402 // There is no support for loading or emitting vector constants, so | |
3403 // this value is initialized using register operations. | |
3404 Variable *Reg = makeReg(Ty, RegNum); | 3527 Variable *Reg = makeReg(Ty, RegNum); |
3405 // Insert a FakeDef, since otherwise the live range of Reg might | 3528 // Insert a FakeDef, since otherwise the live range of Reg might |
3406 // be overestimated. | 3529 // be overestimated. |
3407 Context.insert(InstFakeDef::create(Func, Reg)); | 3530 Context.insert(InstFakeDef::create(Func, Reg)); |
3408 _pxor(Reg, Reg); | 3531 _pxor(Reg, Reg); |
3409 return Reg; | 3532 return Reg; |
3410 } | 3533 } |
3411 | 3534 |
| 3535 Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) { |
| 3536 Variable *MinusOnes = makeReg(Ty, RegNum); |
| 3537 // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
| 3538 Context.insert(InstFakeDef::create(Func, MinusOnes)); |
| 3539 _pcmpeq(MinusOnes, MinusOnes); |
| 3540 return MinusOnes; |
| 3541 } |
| 3542 |
3412 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 3543 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
3413 // There is no support for loading or emitting vector constants, so | |
3414 // this value is initialized using register operations. | |
3415 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 3544 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
3416 Variable *MinusOne = makeReg(Ty); | 3545 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
3417 // Insert a FakeDef so the live range of MinusOne is not overestimated. | |
3418 Context.insert(InstFakeDef::create(Func, MinusOne)); | |
3419 _pcmpeq(MinusOne, MinusOne); | |
3420 _psub(Dest, MinusOne); | 3546 _psub(Dest, MinusOne); |
3421 return Dest; | 3547 return Dest; |
3422 } | 3548 } |
3423 | 3549 |
| 3550 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { |
| 3551 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || |
| 3552 Ty == IceType_v16i8); |
| 3553 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { |
| 3554 Variable *Reg = makeVectorOfOnes(Ty, RegNum); |
| 3555 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; |
| 3556 _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift)); |
| 3557 return Reg; |
| 3558 } else { |
| 3559 // SSE has no left shift operation for vectors of 8 bit integers. |
| 3560 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
| 3561 Constant *ConstantMask = |
| 3562 Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK); |
| 3563 Variable *Reg = makeReg(Ty, RegNum); |
| 3564 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
| 3565 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
| 3566 return Reg; |
| 3567 } |
| 3568 } |
| 3569 |
3424 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, | 3570 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, |
3425 Variable *Slot, | 3571 Variable *Slot, |
3426 uint32_t Offset) { | 3572 uint32_t Offset) { |
3427 // Ensure that Loc is a stack slot. | 3573 // Ensure that Loc is a stack slot. |
3428 assert(Slot->getWeight() == RegWeight::Zero); | 3574 assert(Slot->getWeight() == RegWeight::Zero); |
3429 assert(Slot->getRegNum() == Variable::NoRegister); | 3575 assert(Slot->getRegNum() == Variable::NoRegister); |
3430 // Compute the location of Loc in memory. | 3576 // Compute the location of Loc in memory. |
3431 // TODO(wala,stichnot): lea should not be required. The address of | 3577 // TODO(wala,stichnot): lea should not be required. The address of |
3432 // the stack slot is known at compile time (although not until after | 3578 // the stack slot is known at compile time (although not until after |
3433 // addProlog()). | 3579 // addProlog()). |
(...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3752 for (SizeT i = 0; i < Size; ++i) { | 3898 for (SizeT i = 0; i < Size; ++i) { |
3753 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 3899 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
3754 } | 3900 } |
3755 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 3901 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
3756 } | 3902 } |
3757 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 3903 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
3758 << "\n"; | 3904 << "\n"; |
3759 } | 3905 } |
3760 | 3906 |
3761 } // end of namespace Ice | 3907 } // end of namespace Ice |
OLD | NEW |