Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(171)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 412593002: Lower icmp operations between vector values. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Remove unused typedefs. Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-icmp.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 2243 matching lines...) Expand 10 before | Expand all | Expand 10 after
2254 _mov(Dest, NonDefault); 2254 _mov(Dest, NonDefault);
2255 Context.insert(Label); 2255 Context.insert(Label);
2256 } 2256 }
2257 } 2257 }
2258 2258
2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { 2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
2260 Operand *Src0 = legalize(Inst->getSrc(0)); 2260 Operand *Src0 = legalize(Inst->getSrc(0));
2261 Operand *Src1 = legalize(Inst->getSrc(1)); 2261 Operand *Src1 = legalize(Inst->getSrc(1));
2262 Variable *Dest = Inst->getDest(); 2262 Variable *Dest = Inst->getDest();
2263 2263
2264 if (isVectorType(Dest->getType())) {
2265 Type Ty = Src0->getType();
2266 // Promote i1 vectors to 128 bit integer vector types.
2267 if (typeElementType(Ty) == IceType_i1) {
2268 Type NewTy = IceType_NUM;
2269 switch (Ty) {
2270 default:
2271 llvm_unreachable("unexpected type");
2272 break;
2273 case IceType_v4i1:
2274 NewTy = IceType_v4i32;
2275 break;
2276 case IceType_v8i1:
2277 NewTy = IceType_v8i16;
2278 break;
2279 case IceType_v16i1:
2280 NewTy = IceType_v16i8;
2281 break;
2282 }
2283 Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode());
2284 Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode());
2285 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2286 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2287 Src0 = NewSrc0;
2288 Src1 = NewSrc1;
2289 Ty = NewTy;
2290 }
2291
2292 InstIcmp::ICond Condition = Inst->getCondition();
2293
2294 // SSE2 only has signed comparison operations. Transform unsigned
2295 // inputs in a manner that allows for the use of signed comparison
2296 // operations by flipping the high order bits.
2297 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2298 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2299 Variable *T0 = makeReg(Ty);
2300 Variable *T1 = makeReg(Ty);
2301 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2302 _movp(T0, Src0);
2303 _pxor(T0, HighOrderBits);
2304 _movp(T1, Src1);
2305 _pxor(T1, HighOrderBits);
2306 Src0 = T0;
2307 Src1 = T1;
2308 }
2309
2310 // TODO: ALIGNHACK: Both operands to compare instructions need to be
2311 // in registers until stack alignment support is implemented. Once
2312 // there is support for stack alignment, LEGAL_HACK can be removed.
2313 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
2314 Variable *T = makeReg(Ty);
2315 switch (Condition) {
2316 default:
2317 llvm_unreachable("unexpected condition");
2318 break;
2319 case InstIcmp::Eq: {
2320 _movp(T, Src0);
2321 _pcmpeq(T, LEGAL_HACK(Src1));
2322 } break;
2323 case InstIcmp::Ne: {
2324 _movp(T, Src0);
2325 _pcmpeq(T, LEGAL_HACK(Src1));
2326 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2327 _pxor(T, MinusOne);
2328 } break;
2329 case InstIcmp::Ugt:
2330 case InstIcmp::Sgt: {
2331 _movp(T, Src0);
2332 _pcmpgt(T, LEGAL_HACK(Src1));
2333 } break;
2334 case InstIcmp::Uge:
2335 case InstIcmp::Sge: {
2336 // !(Src1 > Src0)
2337 _movp(T, Src1);
2338 _pcmpgt(T, LEGAL_HACK(Src0));
2339 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2340 _pxor(T, MinusOne);
2341 } break;
2342 case InstIcmp::Ult:
2343 case InstIcmp::Slt: {
2344 _movp(T, Src1);
2345 _pcmpgt(T, LEGAL_HACK(Src0));
2346 } break;
2347 case InstIcmp::Ule:
2348 case InstIcmp::Sle: {
2349 // !(Src0 > Src1)
2350 _movp(T, Src0);
2351 _pcmpgt(T, LEGAL_HACK(Src1));
2352 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2353 _pxor(T, MinusOne);
2354 } break;
2355 }
2356 #undef LEGAL_HACK
2357
2358 _movp(Dest, T);
2359
2360 // The following pattern occurs often in lowered C and C++ code:
2361 //
2362 // %cmp = icmp pred <n x ty> %src0, %src1
2363 // %cmp.ext = sext <n x i1> %cmp to <n x ty>
2364 //
2365 // We can avoid the sext operation by copying the result from pcmpgt
2366 // and pcmpeq, which is already sign extended, to the result of the
2367 // sext operation
2368 if (InstCast *NextCast =
2369 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
2370 if (NextCast->getCastKind() == InstCast::Sext &&
2371 NextCast->getSrc(0) == Dest) {
2372 _movp(NextCast->getDest(), T);
2373 // Skip over the instruction.
2374 NextCast->setDeleted();
2375 Context.advanceNext();
2376 }
2377 }
2378
2379 return;
2380 }
2381
2264 // If Src1 is an immediate, or known to be a physical register, we can 2382 // If Src1 is an immediate, or known to be a physical register, we can
2265 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into 2383 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2266 // a physical register. (Actually, either Src0 or Src1 can be chosen for 2384 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2267 // the physical register, but unfortunately we have to commit to one or 2385 // the physical register, but unfortunately we have to commit to one or
2268 // the other before register allocation.) 2386 // the other before register allocation.)
2269 bool IsSrc1ImmOrReg = false; 2387 bool IsSrc1ImmOrReg = false;
2270 if (llvm::isa<Constant>(Src1)) { 2388 if (llvm::isa<Constant>(Src1)) {
2271 IsSrc1ImmOrReg = true; 2389 IsSrc1ImmOrReg = true;
2272 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { 2390 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
2273 if (Var->hasReg()) 2391 if (Var->hasReg())
(...skipping 1117 matching lines...) Expand 10 before | Expand all | Expand 10 after
3391 _br(Inst->getLabelDefault()); 3509 _br(Inst->getLabelDefault());
3392 } 3510 }
3393 3511
3394 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { 3512 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
3395 const SizeT MaxSrcs = 0; 3513 const SizeT MaxSrcs = 0;
3396 Variable *Dest = NULL; 3514 Variable *Dest = NULL;
3397 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); 3515 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
3398 lowerCall(Call); 3516 lowerCall(Call);
3399 } 3517 }
3400 3518
3519 // There is no support for loading or emitting vector constants, so the
3520 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,
3521 // etc. are initialized with register operations.
3522 //
3523 // TODO(wala): Add limited support for vector constants so that
3524 // complex initialization in registers is unnecessary.
3525
3401 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { 3526 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
3402 // There is no support for loading or emitting vector constants, so
3403 // this value is initialized using register operations.
3404 Variable *Reg = makeReg(Ty, RegNum); 3527 Variable *Reg = makeReg(Ty, RegNum);
3405 // Insert a FakeDef, since otherwise the live range of Reg might 3528 // Insert a FakeDef, since otherwise the live range of Reg might
3406 // be overestimated. 3529 // be overestimated.
3407 Context.insert(InstFakeDef::create(Func, Reg)); 3530 Context.insert(InstFakeDef::create(Func, Reg));
3408 _pxor(Reg, Reg); 3531 _pxor(Reg, Reg);
3409 return Reg; 3532 return Reg;
3410 } 3533 }
3411 3534
3535 Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
3536 Variable *MinusOnes = makeReg(Ty, RegNum);
3537 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
3538 Context.insert(InstFakeDef::create(Func, MinusOnes));
3539 _pcmpeq(MinusOnes, MinusOnes);
3540 return MinusOnes;
3541 }
3542
3412 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { 3543 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
3413 // There is no support for loading or emitting vector constants, so
3414 // this value is initialized using register operations.
3415 Variable *Dest = makeVectorOfZeros(Ty, RegNum); 3544 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
3416 Variable *MinusOne = makeReg(Ty); 3545 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3417 // Insert a FakeDef so the live range of MinusOne is not overestimated.
3418 Context.insert(InstFakeDef::create(Func, MinusOne));
3419 _pcmpeq(MinusOne, MinusOne);
3420 _psub(Dest, MinusOne); 3546 _psub(Dest, MinusOne);
3421 return Dest; 3547 return Dest;
3422 } 3548 }
3423 3549
3550 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
3551 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
3552 Ty == IceType_v16i8);
3553 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
3554 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
3555 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
3556 _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift));
3557 return Reg;
3558 } else {
3559 // SSE has no left shift operation for vectors of 8 bit integers.
3560 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
3561 Constant *ConstantMask =
3562 Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK);
3563 Variable *Reg = makeReg(Ty, RegNum);
3564 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
3565 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
3566 return Reg;
3567 }
3568 }
3569
3424 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, 3570 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
3425 Variable *Slot, 3571 Variable *Slot,
3426 uint32_t Offset) { 3572 uint32_t Offset) {
3427 // Ensure that Loc is a stack slot. 3573 // Ensure that Loc is a stack slot.
3428 assert(Slot->getWeight() == RegWeight::Zero); 3574 assert(Slot->getWeight() == RegWeight::Zero);
3429 assert(Slot->getRegNum() == Variable::NoRegister); 3575 assert(Slot->getRegNum() == Variable::NoRegister);
3430 // Compute the location of Loc in memory. 3576 // Compute the location of Loc in memory.
3431 // TODO(wala,stichnot): lea should not be required. The address of 3577 // TODO(wala,stichnot): lea should not be required. The address of
3432 // the stack slot is known at compile time (although not until after 3578 // the stack slot is known at compile time (although not until after
3433 // addProlog()). 3579 // addProlog()).
(...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after
3752 for (SizeT i = 0; i < Size; ++i) { 3898 for (SizeT i = 0; i < Size; ++i) {
3753 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 3899 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
3754 } 3900 }
3755 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 3901 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
3756 } 3902 }
3757 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 3903 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
3758 << "\n"; 3904 << "\n";
3759 } 3905 }
3760 3906
3761 } // end of namespace Ice 3907 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-icmp.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698