OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 2243 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2254 _mov(Dest, NonDefault); | 2254 _mov(Dest, NonDefault); |
2255 Context.insert(Label); | 2255 Context.insert(Label); |
2256 } | 2256 } |
2257 } | 2257 } |
2258 | 2258 |
2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { | 2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
2260 Operand *Src0 = legalize(Inst->getSrc(0)); | 2260 Operand *Src0 = legalize(Inst->getSrc(0)); |
2261 Operand *Src1 = legalize(Inst->getSrc(1)); | 2261 Operand *Src1 = legalize(Inst->getSrc(1)); |
2262 Variable *Dest = Inst->getDest(); | 2262 Variable *Dest = Inst->getDest(); |
2263 | 2263 |
2264 if (isVectorType(Dest->getType())) { | |
2265 Type Ty = Src0->getType(); | |
2266 // Promote i1 vectors to 128 bit integer vector types. | |
2267 if (typeElementType(Ty) == IceType_i1) { | |
2268 Type NewTy = IceType_NUM; | |
2269 switch (Ty) { | |
2270 default: | |
2271 llvm_unreachable("unexpected type"); | |
2272 break; | |
2273 case IceType_v4i1: | |
2274 NewTy = IceType_v4i32; | |
2275 break; | |
2276 case IceType_v8i1: | |
2277 NewTy = IceType_v8i16; | |
2278 break; | |
2279 case IceType_v16i1: | |
2280 NewTy = IceType_v16i8; | |
2281 break; | |
2282 } | |
2283 Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode()); | |
2284 Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode()); | |
2285 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0)); | |
2286 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1)); | |
2287 Src0 = NewSrc0; | |
2288 Src1 = NewSrc1; | |
2289 Ty = NewTy; | |
2290 } | |
2291 | |
2292 InstIcmp::ICond Condition = Inst->getCondition(); | |
2293 | |
2294 // SSE2 only has signed comparison operations. Transform unsigned | |
2295 // inputs in a manner that allows for the use of signed comparison | |
2296 // operations by flipping the high order bits. | |
2297 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge || | |
2298 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) { | |
2299 Variable *T0 = makeReg(Ty); | |
2300 Variable *T1 = makeReg(Ty); | |
2301 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); | |
2302 _movp(T0, Src0); | |
2303 _pxor(T0, HighOrderBits); | |
2304 _movp(T1, Src1); | |
2305 _pxor(T1, HighOrderBits); | |
2306 Src0 = T0; | |
2307 Src1 = T1; | |
2308 } | |
2309 | |
2310 // TODO: ALIGNHACK: Both operands to compare instructions need to be | |
2311 // in registers until stack alignment support is implemented. Once | |
2312 // there is support for stack alignment, LEGAL_HACK can be removed. | |
2313 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | |
2314 Variable *T = makeReg(Ty); | |
2315 switch (Condition) { | |
2316 default: | |
2317 llvm_unreachable("unexpected condition"); | |
2318 break; | |
2319 case InstIcmp::Eq: { | |
2320 _movp(T, Src0); | |
2321 _pcmpeq(T, LEGAL_HACK(Src1)); | |
2322 } break; | |
2323 case InstIcmp::Ne: { | |
2324 _movp(T, Src0); | |
2325 _pcmpeq(T, LEGAL_HACK(Src1)); | |
2326 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | |
2327 _pxor(T, MinusOne); | |
2328 } break; | |
2329 case InstIcmp::Ugt: | |
2330 case InstIcmp::Sgt: { | |
2331 _movp(T, Src0); | |
2332 _pcmpgt(T, LEGAL_HACK(Src1)); | |
2333 } break; | |
2334 case InstIcmp::Uge: | |
2335 case InstIcmp::Sge: { | |
2336 // !(Src1 > Src0) | |
2337 _movp(T, Src1); | |
2338 _pcmpgt(T, LEGAL_HACK(Src0)); | |
2339 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | |
2340 _pxor(T, MinusOne); | |
2341 } break; | |
2342 case InstIcmp::Ult: | |
2343 case InstIcmp::Slt: { | |
2344 _movp(T, Src1); | |
2345 _pcmpgt(T, LEGAL_HACK(Src0)); | |
2346 } break; | |
2347 case InstIcmp::Ule: | |
2348 case InstIcmp::Sle: { | |
2349 // !(Src0 > Src1) | |
2350 _movp(T, Src0); | |
2351 _pcmpgt(T, LEGAL_HACK(Src1)); | |
2352 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | |
2353 _pxor(T, MinusOne); | |
2354 } break; | |
2355 } | |
2356 #undef LEGAL_HACK | |
2357 | |
2358 _movp(Dest, T); | |
2359 | |
2360 // The following pattern occurs often in lowered C and C++ code: | |
2361 // | |
2362 // %cmp = icmp pred <n x ty> %src0, %src1 | |
2363 // %cmp.ext = sext <n x i1> %cmp to <n x ty> | |
2364 // | |
2365 // We can avoid the sext operation by copying the result from pcmpgt | |
2366 // and pcmpeq, which is already sign extended, to the result of the | |
2367 // sext operation | |
2368 if (InstCast *NextCast = | |
2369 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { | |
2370 if (NextCast->getCastKind() == InstCast::Sext && | |
2371 NextCast->getSrc(0) == Dest) { | |
2372 _movp(NextCast->getDest(), T); | |
2373 // Skip over the instruction. | |
2374 NextCast->setDeleted(); | |
2375 Context.advanceNext(); | |
2376 } | |
2377 } | |
2378 | |
2379 return; | |
2380 } | |
2381 | |
2264 // If Src1 is an immediate, or known to be a physical register, we can | 2382 // If Src1 is an immediate, or known to be a physical register, we can |
2265 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into | 2383 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
2266 // a physical register. (Actually, either Src0 or Src1 can be chosen for | 2384 // a physical register. (Actually, either Src0 or Src1 can be chosen for |
2267 // the physical register, but unfortunately we have to commit to one or | 2385 // the physical register, but unfortunately we have to commit to one or |
2268 // the other before register allocation.) | 2386 // the other before register allocation.) |
2269 bool IsSrc1ImmOrReg = false; | 2387 bool IsSrc1ImmOrReg = false; |
2270 if (llvm::isa<Constant>(Src1)) { | 2388 if (llvm::isa<Constant>(Src1)) { |
2271 IsSrc1ImmOrReg = true; | 2389 IsSrc1ImmOrReg = true; |
2272 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { | 2390 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
2273 if (Var->hasReg()) | 2391 if (Var->hasReg()) |
2274 IsSrc1ImmOrReg = true; | 2392 IsSrc1ImmOrReg = true; |
2275 } | 2393 } |
2276 | 2394 |
2277 // Try to fuse a compare immediately followed by a conditional branch. This | 2395 // Try to fuse a compare immediately followed by a conditional branch. This |
2278 // is possible when the compare dest and the branch source operands are the | 2396 // is possible when the compare dest and the branch source operands are the |
2279 // same, and are their only uses. TODO: implement this optimization for i64. | 2397 // same, and are their only uses. TODO: implement this optimization for |
2280 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { | 2398 // i64. |
Jim Stichnoth
2014/07/23 20:58:01
Could revert this whitespace-only change. :)
wala
2014/07/23 21:02:22
Done.
| |
2399 if (InstBr *NextBr = | |
2400 llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { | |
2281 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && | 2401 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && |
2282 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { | 2402 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { |
2283 Operand *Src0New = | 2403 Operand *Src0New = |
2284 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); | 2404 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
2285 _cmp(Src0New, Src1); | 2405 _cmp(Src0New, Src1); |
2286 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), | 2406 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), |
2287 NextBr->getTargetFalse()); | 2407 NextBr->getTargetFalse()); |
2288 // Skip over the following branch instruction. | 2408 // Skip over the following branch instruction. |
2289 NextBr->setDeleted(); | 2409 NextBr->setDeleted(); |
2290 Context.advanceNext(); | 2410 Context.advanceNext(); |
(...skipping 1100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3391 _br(Inst->getLabelDefault()); | 3511 _br(Inst->getLabelDefault()); |
3392 } | 3512 } |
3393 | 3513 |
3394 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { | 3514 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { |
3395 const SizeT MaxSrcs = 0; | 3515 const SizeT MaxSrcs = 0; |
3396 Variable *Dest = NULL; | 3516 Variable *Dest = NULL; |
3397 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); | 3517 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); |
3398 lowerCall(Call); | 3518 lowerCall(Call); |
3399 } | 3519 } |
3400 | 3520 |
3521 // There is no support for loading or emitting vector constants, so the | |
3522 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, | |
3523 // etc. are initialized with register operations. | |
3524 // | |
3525 // TODO(wala): Add limited support for vector constants so that | |
3526 // complex initialization in registers is unnecessary. | |
3527 | |
3401 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 3528 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
3402 // There is no support for loading or emitting vector constants, so | |
3403 // this value is initialized using register operations. | |
3404 Variable *Reg = makeReg(Ty, RegNum); | 3529 Variable *Reg = makeReg(Ty, RegNum); |
3405 // Insert a FakeDef, since otherwise the live range of Reg might | 3530 // Insert a FakeDef, since otherwise the live range of Reg might |
3406 // be overestimated. | 3531 // be overestimated. |
3407 Context.insert(InstFakeDef::create(Func, Reg)); | 3532 Context.insert(InstFakeDef::create(Func, Reg)); |
3408 _pxor(Reg, Reg); | 3533 _pxor(Reg, Reg); |
3409 return Reg; | 3534 return Reg; |
3410 } | 3535 } |
3411 | 3536 |
3537 Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) { | |
3538 Variable *MinusOnes = makeReg(Ty, RegNum); | |
3539 // Insert a FakeDef so the live range of MinusOnes is not overestimated. | |
3540 Context.insert(InstFakeDef::create(Func, MinusOnes)); | |
3541 _pcmpeq(MinusOnes, MinusOnes); | |
3542 return MinusOnes; | |
3543 } | |
3544 | |
3412 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 3545 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
3413 // There is no support for loading or emitting vector constants, so | |
3414 // this value is initialized using register operations. | |
3415 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 3546 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
3416 Variable *MinusOne = makeReg(Ty); | 3547 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
3417 // Insert a FakeDef so the live range of MinusOne is not overestimated. | |
3418 Context.insert(InstFakeDef::create(Func, MinusOne)); | |
3419 _pcmpeq(MinusOne, MinusOne); | |
3420 _psub(Dest, MinusOne); | 3548 _psub(Dest, MinusOne); |
3421 return Dest; | 3549 return Dest; |
3422 } | 3550 } |
3423 | 3551 |
3552 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { | |
3553 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || | |
3554 Ty == IceType_v16i8); | |
3555 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { | |
3556 Variable *Reg = makeVectorOfOnes(Ty, RegNum); | |
3557 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; | |
3558 _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift)); | |
3559 return Reg; | |
3560 } else { | |
3561 // SSE has no left shift operation for vectors of 8 bit integers. | |
3562 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | |
3563 Constant *ConstantMask = | |
3564 Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK); | |
3565 Variable *Reg = makeReg(Ty, RegNum); | |
3566 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | |
3567 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | |
3568 return Reg; | |
3569 } | |
3570 } | |
3571 | |
3424 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, | 3572 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, |
3425 Variable *Slot, | 3573 Variable *Slot, |
3426 uint32_t Offset) { | 3574 uint32_t Offset) { |
3427 // Ensure that Loc is a stack slot. | 3575 // Ensure that Loc is a stack slot. |
3428 assert(Slot->getWeight() == RegWeight::Zero); | 3576 assert(Slot->getWeight() == RegWeight::Zero); |
3429 assert(Slot->getRegNum() == Variable::NoRegister); | 3577 assert(Slot->getRegNum() == Variable::NoRegister); |
3430 // Compute the location of Loc in memory. | 3578 // Compute the location of Loc in memory. |
3431 // TODO(wala,stichnot): lea should not be required. The address of | 3579 // TODO(wala,stichnot): lea should not be required. The address of |
3432 // the stack slot is known at compile time (although not until after | 3580 // the stack slot is known at compile time (although not until after |
3433 // addProlog()). | 3581 // addProlog()). |
(...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3752 for (SizeT i = 0; i < Size; ++i) { | 3900 for (SizeT i = 0; i < Size; ++i) { |
3753 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 3901 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
3754 } | 3902 } |
3755 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 3903 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
3756 } | 3904 } |
3757 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 3905 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
3758 << "\n"; | 3906 << "\n"; |
3759 } | 3907 } |
3760 | 3908 |
3761 } // end of namespace Ice | 3909 } // end of namespace Ice |
OLD | NEW |