Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
| (...skipping 2243 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2254 _mov(Dest, NonDefault); | 2254 _mov(Dest, NonDefault); |
| 2255 Context.insert(Label); | 2255 Context.insert(Label); |
| 2256 } | 2256 } |
| 2257 } | 2257 } |
| 2258 | 2258 |
| 2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { | 2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
| 2260 Operand *Src0 = legalize(Inst->getSrc(0)); | 2260 Operand *Src0 = legalize(Inst->getSrc(0)); |
| 2261 Operand *Src1 = legalize(Inst->getSrc(1)); | 2261 Operand *Src1 = legalize(Inst->getSrc(1)); |
| 2262 Variable *Dest = Inst->getDest(); | 2262 Variable *Dest = Inst->getDest(); |
| 2263 | 2263 |
| 2264 if (isVectorType(Dest->getType())) { | |
| 2265 Type Ty = Src0->getType(); | |
| 2266 // Promote i1 vectors to 128 bit integer vector types. | |
| 2267 if (typeElementType(Ty) == IceType_i1) { | |
| 2268 Type NewTy = IceType_NUM; | |
| 2269 switch (Ty) { | |
| 2270 default: | |
| 2271 llvm_unreachable("unexpected type"); | |
| 2272 break; | |
| 2273 case IceType_v4i1: | |
| 2274 NewTy = IceType_v4i32; | |
| 2275 break; | |
| 2276 case IceType_v8i1: | |
| 2277 NewTy = IceType_v8i16; | |
| 2278 break; | |
| 2279 case IceType_v16i1: | |
| 2280 NewTy = IceType_v16i8; | |
| 2281 break; | |
| 2282 } | |
| 2283 Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode()); | |
| 2284 Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode()); | |
| 2285 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0)); | |
| 2286 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1)); | |
| 2287 Src0 = NewSrc0; | |
| 2288 Src1 = NewSrc1; | |
| 2289 Ty = NewTy; | |
| 2290 } | |
| 2291 | |
| 2292 InstIcmp::ICond Condition = Inst->getCondition(); | |
| 2293 | |
| 2294 // SSE2 only has signed comparison operations. Transform unsigned | |
| 2295 // inputs in a manner that allows for the use of signed comparison | |
| 2296 // operations by flipping the high order bits. | |
| 2297 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge || | |
| 2298 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) { | |
| 2299 Variable *T0 = makeReg(Ty); | |
| 2300 Variable *T1 = makeReg(Ty); | |
| 2301 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); | |
| 2302 _movp(T0, Src0); | |
| 2303 _pxor(T0, HighOrderBits); | |
| 2304 _movp(T1, Src1); | |
| 2305 _pxor(T1, HighOrderBits); | |
| 2306 Src0 = T0; | |
| 2307 Src1 = T1; | |
| 2308 } | |
| 2309 | |
| 2310 // TODO: ALIGNHACK: Both operands to compare instructions need to be | |
| 2311 // in registers until stack alignment support is implemented. Once | |
| 2312 // there is support for stack alignment, LEGAL_HACK can be removed. | |
| 2313 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | |
| 2314 Variable *T = makeReg(Ty); | |
| 2315 switch (Condition) { | |
| 2316 default: | |
| 2317 llvm_unreachable("unexpected condition"); | |
| 2318 break; | |
| 2319 case InstIcmp::Eq: { | |
| 2320 _movp(T, Src0); | |
| 2321 _pcmpeq(T, LEGAL_HACK(Src1)); | |
| 2322 } break; | |
| 2323 case InstIcmp::Ne: { | |
| 2324 _movp(T, Src0); | |
| 2325 _pcmpeq(T, LEGAL_HACK(Src1)); | |
| 2326 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | |
| 2327 _pxor(T, MinusOne); | |
| 2328 } break; | |
| 2329 case InstIcmp::Ugt: | |
| 2330 case InstIcmp::Sgt: { | |
| 2331 _movp(T, Src0); | |
| 2332 _pcmpgt(T, LEGAL_HACK(Src1)); | |
| 2333 } break; | |
| 2334 case InstIcmp::Uge: | |
| 2335 case InstIcmp::Sge: { | |
| 2336 // !(Src1 > Src0) | |
| 2337 _movp(T, Src1); | |
| 2338 _pcmpgt(T, LEGAL_HACK(Src0)); | |
| 2339 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | |
| 2340 _pxor(T, MinusOne); | |
| 2341 } break; | |
| 2342 case InstIcmp::Ult: | |
| 2343 case InstIcmp::Slt: { | |
| 2344 _movp(T, Src1); | |
| 2345 _pcmpgt(T, LEGAL_HACK(Src0)); | |
| 2346 } break; | |
| 2347 case InstIcmp::Ule: | |
| 2348 case InstIcmp::Sle: { | |
| 2349 // !(Src0 > Src1) | |
| 2350 _movp(T, Src0); | |
| 2351 _pcmpgt(T, LEGAL_HACK(Src1)); | |
| 2352 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | |
| 2353 _pxor(T, MinusOne); | |
| 2354 } break; | |
| 2355 } | |
| 2356 #undef LEGAL_HACK | |
| 2357 | |
| 2358 _movp(Dest, T); | |
| 2359 | |
| 2360 // The following pattern occurs often in lowered C and C++ code: | |
| 2361 // | |
| 2362 // %cmp = icmp pred <n x ty> %src0, %src1 | |
| 2363 // %cmp.ext = sext <n x i1> %cmp to <n x ty> | |
| 2364 // | |
| 2365 // We can avoid the sext operation by copying the result from pcmpgt | |
| 2366 // and pcmpeq, which is already sign extended, to the result of the | |
| 2367 // sext operation | |
| 2368 if (InstCast *NextCast = | |
| 2369 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { | |
| 2370 if (NextCast->getCastKind() == InstCast::Sext && | |
| 2371 NextCast->getSrc(0) == Dest) { | |
| 2372 _movp(NextCast->getDest(), T); | |
| 2373 // Skip over the instruction. | |
| 2374 NextCast->setDeleted(); | |
| 2375 Context.advanceNext(); | |
| 2376 } | |
| 2377 } | |
| 2378 | |
| 2379 return; | |
| 2380 } | |
| 2381 | |
| 2264 // If Src1 is an immediate, or known to be a physical register, we can | 2382 // If Src1 is an immediate, or known to be a physical register, we can |
| 2265 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into | 2383 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
| 2266 // a physical register. (Actually, either Src0 or Src1 can be chosen for | 2384 // a physical register. (Actually, either Src0 or Src1 can be chosen for |
| 2267 // the physical register, but unfortunately we have to commit to one or | 2385 // the physical register, but unfortunately we have to commit to one or |
| 2268 // the other before register allocation.) | 2386 // the other before register allocation.) |
| 2269 bool IsSrc1ImmOrReg = false; | 2387 bool IsSrc1ImmOrReg = false; |
| 2270 if (llvm::isa<Constant>(Src1)) { | 2388 if (llvm::isa<Constant>(Src1)) { |
| 2271 IsSrc1ImmOrReg = true; | 2389 IsSrc1ImmOrReg = true; |
| 2272 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { | 2390 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
| 2273 if (Var->hasReg()) | 2391 if (Var->hasReg()) |
| 2274 IsSrc1ImmOrReg = true; | 2392 IsSrc1ImmOrReg = true; |
| 2275 } | 2393 } |
| 2276 | 2394 |
| 2277 // Try to fuse a compare immediately followed by a conditional branch. This | 2395 // Try to fuse a compare immediately followed by a conditional branch. This |
| 2278 // is possible when the compare dest and the branch source operands are the | 2396 // is possible when the compare dest and the branch source operands are the |
| 2279 // same, and are their only uses. TODO: implement this optimization for i64. | 2397 // same, and are their only uses. TODO: implement this optimization for |
| 2280 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { | 2398 // i64. |
|
Jim Stichnoth
2014/07/23 20:58:01
Could revert this whitespace-only change. :)
wala
2014/07/23 21:02:22
Done.
| |
| 2399 if (InstBr *NextBr = | |
| 2400 llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { | |
| 2281 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && | 2401 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && |
| 2282 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { | 2402 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { |
| 2283 Operand *Src0New = | 2403 Operand *Src0New = |
| 2284 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); | 2404 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
| 2285 _cmp(Src0New, Src1); | 2405 _cmp(Src0New, Src1); |
| 2286 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), | 2406 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), |
| 2287 NextBr->getTargetFalse()); | 2407 NextBr->getTargetFalse()); |
| 2288 // Skip over the following branch instruction. | 2408 // Skip over the following branch instruction. |
| 2289 NextBr->setDeleted(); | 2409 NextBr->setDeleted(); |
| 2290 Context.advanceNext(); | 2410 Context.advanceNext(); |
| (...skipping 1100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3391 _br(Inst->getLabelDefault()); | 3511 _br(Inst->getLabelDefault()); |
| 3392 } | 3512 } |
| 3393 | 3513 |
| 3394 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { | 3514 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { |
| 3395 const SizeT MaxSrcs = 0; | 3515 const SizeT MaxSrcs = 0; |
| 3396 Variable *Dest = NULL; | 3516 Variable *Dest = NULL; |
| 3397 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); | 3517 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); |
| 3398 lowerCall(Call); | 3518 lowerCall(Call); |
| 3399 } | 3519 } |
| 3400 | 3520 |
| 3521 // There is no support for loading or emitting vector constants, so the | |
| 3522 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, | |
| 3523 // etc. are initialized with register operations. | |
| 3524 // | |
| 3525 // TODO(wala): Add limited support for vector constants so that | |
| 3526 // complex initialization in registers is unnecessary. | |
| 3527 | |
| 3401 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 3528 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
| 3402 // There is no support for loading or emitting vector constants, so | |
| 3403 // this value is initialized using register operations. | |
| 3404 Variable *Reg = makeReg(Ty, RegNum); | 3529 Variable *Reg = makeReg(Ty, RegNum); |
| 3405 // Insert a FakeDef, since otherwise the live range of Reg might | 3530 // Insert a FakeDef, since otherwise the live range of Reg might |
| 3406 // be overestimated. | 3531 // be overestimated. |
| 3407 Context.insert(InstFakeDef::create(Func, Reg)); | 3532 Context.insert(InstFakeDef::create(Func, Reg)); |
| 3408 _pxor(Reg, Reg); | 3533 _pxor(Reg, Reg); |
| 3409 return Reg; | 3534 return Reg; |
| 3410 } | 3535 } |
| 3411 | 3536 |
| 3537 Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) { | |
| 3538 Variable *MinusOnes = makeReg(Ty, RegNum); | |
| 3539 // Insert a FakeDef so the live range of MinusOnes is not overestimated. | |
| 3540 Context.insert(InstFakeDef::create(Func, MinusOnes)); | |
| 3541 _pcmpeq(MinusOnes, MinusOnes); | |
| 3542 return MinusOnes; | |
| 3543 } | |
| 3544 | |
| 3412 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 3545 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
| 3413 // There is no support for loading or emitting vector constants, so | |
| 3414 // this value is initialized using register operations. | |
| 3415 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 3546 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
| 3416 Variable *MinusOne = makeReg(Ty); | 3547 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 3417 // Insert a FakeDef so the live range of MinusOne is not overestimated. | |
| 3418 Context.insert(InstFakeDef::create(Func, MinusOne)); | |
| 3419 _pcmpeq(MinusOne, MinusOne); | |
| 3420 _psub(Dest, MinusOne); | 3548 _psub(Dest, MinusOne); |
| 3421 return Dest; | 3549 return Dest; |
| 3422 } | 3550 } |
| 3423 | 3551 |
| 3552 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { | |
| 3553 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || | |
| 3554 Ty == IceType_v16i8); | |
| 3555 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { | |
| 3556 Variable *Reg = makeVectorOfOnes(Ty, RegNum); | |
| 3557 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; | |
| 3558 _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift)); | |
| 3559 return Reg; | |
| 3560 } else { | |
| 3561 // SSE has no left shift operation for vectors of 8 bit integers. | |
| 3562 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | |
| 3563 Constant *ConstantMask = | |
| 3564 Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK); | |
| 3565 Variable *Reg = makeReg(Ty, RegNum); | |
| 3566 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | |
| 3567 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | |
| 3568 return Reg; | |
| 3569 } | |
| 3570 } | |
| 3571 | |
| 3424 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, | 3572 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, |
| 3425 Variable *Slot, | 3573 Variable *Slot, |
| 3426 uint32_t Offset) { | 3574 uint32_t Offset) { |
| 3427 // Ensure that Loc is a stack slot. | 3575 // Ensure that Loc is a stack slot. |
| 3428 assert(Slot->getWeight() == RegWeight::Zero); | 3576 assert(Slot->getWeight() == RegWeight::Zero); |
| 3429 assert(Slot->getRegNum() == Variable::NoRegister); | 3577 assert(Slot->getRegNum() == Variable::NoRegister); |
| 3430 // Compute the location of Loc in memory. | 3578 // Compute the location of Loc in memory. |
| 3431 // TODO(wala,stichnot): lea should not be required. The address of | 3579 // TODO(wala,stichnot): lea should not be required. The address of |
| 3432 // the stack slot is known at compile time (although not until after | 3580 // the stack slot is known at compile time (although not until after |
| 3433 // addProlog()). | 3581 // addProlog()). |
| (...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3752 for (SizeT i = 0; i < Size; ++i) { | 3900 for (SizeT i = 0; i < Size; ++i) { |
| 3753 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 3901 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
| 3754 } | 3902 } |
| 3755 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 3903 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
| 3756 } | 3904 } |
| 3757 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 3905 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
| 3758 << "\n"; | 3906 << "\n"; |
| 3759 } | 3907 } |
| 3760 | 3908 |
| 3761 } // end of namespace Ice | 3909 } // end of namespace Ice |
| OLD | NEW |