src/IceTargetLoweringX8632.cpp - Issue 412593002: Lower icmp operations between vector values.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 412593002: Lower icmp operations between vector values. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Address comments, add CHECK-LABEL directives to test Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

(...skipping 2243 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2254 _mov(Dest, NonDefault);	2254 _mov(Dest, NonDefault);

2255 Context.insert(Label);	2255 Context.insert(Label);

2256 }	2256 }

2257 }	2257 }

2258	2258

2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {	2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

2260 Operand *Src0 = legalize(Inst->getSrc(0));	2260 Operand *Src0 = legalize(Inst->getSrc(0));

2261 Operand *Src1 = legalize(Inst->getSrc(1));	2261 Operand *Src1 = legalize(Inst->getSrc(1));

2262 Variable *Dest = Inst->getDest();	2262 Variable *Dest = Inst->getDest();

2263	2263

	2264 if (isVectorType(Dest->getType())) {

	2265 Type Ty = Src0->getType();

	2266 // Promote i1 vectors to 128 bit integer vector types.

	2267 if (typeElementType(Ty) == IceType_i1) {

	2268 Type NewTy = IceType_NUM;

	2269 switch (Ty) {

	2270 default:

	2271 llvm_unreachable("unexpected type");

	2272 break;

	2273 case IceType_v4i1:

	2274 NewTy = IceType_v4i32;

	2275 break;

	2276 case IceType_v8i1:

	2277 NewTy = IceType_v8i16;

	2278 break;

	2279 case IceType_v16i1:

	2280 NewTy = IceType_v16i8;

	2281 break;

	2282 }

	2283 Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode());

	2284 Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode());

	2285 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));

	2286 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));

	2287 Src0 = NewSrc0;

	2288 Src1 = NewSrc1;

	2289 Ty = NewTy;

	2290 }

	2291

	2292 InstIcmp::ICond Condition = Inst->getCondition();

	2293

	2294 // SSE2 only has signed comparison operations. Transform unsigned

	2295 // inputs in a manner that allows for the use of signed comparison

	2296 // operations by flipping the high order bits.

	2297 if (Condition == InstIcmp::Ugt \|\| Condition == InstIcmp::Uge \|\|

	2298 Condition == InstIcmp::Ult \|\| Condition == InstIcmp::Ule) {

	2299 Variable *T0 = makeReg(Ty);

	2300 Variable *T1 = makeReg(Ty);

	2301 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);

	2302 _movp(T0, Src0);

	2303 _pxor(T0, HighOrderBits);

	2304 _movp(T1, Src1);

	2305 _pxor(T1, HighOrderBits);

	2306 Src0 = T0;

	2307 Src1 = T1;

	2308 }

	2309

	2310 // TODO: ALIGNHACK: Both operands to compare instructions need to be

	2311 // in registers until stack alignment support is implemented. Once

	2312 // there is support for stack alignment, LEGAL_HACK can be removed.

	2313 #define LEGAL_HACK(Vect) legalizeToVar((Vect))

	2314 Variable *T = makeReg(Ty);

	2315 switch (Condition) {

	2316 default:

	2317 llvm_unreachable("unexpected condition");

	2318 break;

	2319 case InstIcmp::Eq: {

	2320 _movp(T, Src0);

	2321 _pcmpeq(T, LEGAL_HACK(Src1));

	2322 } break;

	2323 case InstIcmp::Ne: {

	2324 _movp(T, Src0);

	2325 _pcmpeq(T, LEGAL_HACK(Src1));

	2326 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

	2327 _pxor(T, MinusOne);

	2328 } break;

	2329 case InstIcmp::Ugt:

	2330 case InstIcmp::Sgt: {

	2331 _movp(T, Src0);

	2332 _pcmpgt(T, LEGAL_HACK(Src1));

	2333 } break;

	2334 case InstIcmp::Uge:

	2335 case InstIcmp::Sge: {

	2336 // !(Src1 > Src0)

	2337 _movp(T, Src1);

	2338 _pcmpgt(T, LEGAL_HACK(Src0));

	2339 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

	2340 _pxor(T, MinusOne);

	2341 } break;

	2342 case InstIcmp::Ult:

	2343 case InstIcmp::Slt: {

	2344 _movp(T, Src1);

	2345 _pcmpgt(T, LEGAL_HACK(Src0));

	2346 } break;

	2347 case InstIcmp::Ule:

	2348 case InstIcmp::Sle: {

	2349 // !(Src0 > Src1)

	2350 _movp(T, Src0);

	2351 _pcmpgt(T, LEGAL_HACK(Src1));

	2352 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

	2353 _pxor(T, MinusOne);

	2354 } break;

	2355 }

	2356 #undef LEGAL_HACK

	2357

	2358 _movp(Dest, T);

	2359

	2360 // The following pattern occurs often in lowered C and C++ code:

	2361 //

	2362 // %cmp = icmp pred <n x ty> %src0, %src1

	2363 // %cmp.ext = sext <n x i1> %cmp to <n x ty>

	2364 //

	2365 // We can avoid the sext operation by copying the result from pcmpgt

	2366 // and pcmpeq, which is already sign extended, to the result of the

	2367 // sext operation

	2368 if (InstCast *NextCast =

	2369 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {

	2370 if (NextCast->getCastKind() == InstCast::Sext &&

	2371 NextCast->getSrc(0) == Dest) {

	2372 _movp(NextCast->getDest(), T);

	2373 // Skip over the instruction.

	2374 NextCast->setDeleted();

	2375 Context.advanceNext();

	2376 }

	2377 }

	2378

	2379 return;

	2380 }

	2381

2264 // If Src1 is an immediate, or known to be a physical register, we can	2382 // If Src1 is an immediate, or known to be a physical register, we can

2265 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into	2383 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into

2266 // a physical register. (Actually, either Src0 or Src1 can be chosen for	2384 // a physical register. (Actually, either Src0 or Src1 can be chosen for

2267 // the physical register, but unfortunately we have to commit to one or	2385 // the physical register, but unfortunately we have to commit to one or

2268 // the other before register allocation.)	2386 // the other before register allocation.)

2269 bool IsSrc1ImmOrReg = false;	2387 bool IsSrc1ImmOrReg = false;

2270 if (llvm::isa<Constant>(Src1)) {	2388 if (llvm::isa<Constant>(Src1)) {

2271 IsSrc1ImmOrReg = true;	2389 IsSrc1ImmOrReg = true;

2272 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {	2390 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {

2273 if (Var->hasReg())	2391 if (Var->hasReg())

2274 IsSrc1ImmOrReg = true;	2392 IsSrc1ImmOrReg = true;

2275 }	2393 }

2276	2394

2277 // Try to fuse a compare immediately followed by a conditional branch. This	2395 // Try to fuse a compare immediately followed by a conditional branch. This

2278 // is possible when the compare dest and the branch source operands are the	2396 // is possible when the compare dest and the branch source operands are the

2279 // same, and are their only uses. TODO: implement this optimization for i64.	2397 // same, and are their only uses. TODO: implement this optimization for

2280 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {	2398 // i64.
	Jim Stichnoth 2014/07/23 20:58:01 Could revert this whitespace-only change. :) Could revert this whitespace-only change. :) wala 2014/07/23 21:02:22 Done. Show quoted text On 2014/07/23 20:58:01, stichnot wrote: > Could revert this whitespace-only change. :) Done.
	2399 if (InstBr *NextBr =

	2400 llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {

2281 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&	2401 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&

2282 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {	2402 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {

2283 Operand *Src0New =	2403 Operand *Src0New =

2284 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);	2404 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);

2285 _cmp(Src0New, Src1);	2405 _cmp(Src0New, Src1);

2286 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),	2406 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),

2287 NextBr->getTargetFalse());	2407 NextBr->getTargetFalse());

2288 // Skip over the following branch instruction.	2408 // Skip over the following branch instruction.

2289 NextBr->setDeleted();	2409 NextBr->setDeleted();

2290 Context.advanceNext();	2410 Context.advanceNext();

(...skipping 1100 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3391 _br(Inst->getLabelDefault());	3511 _br(Inst->getLabelDefault());

3392 }	3512 }

3393	3513

3394 void TargetX8632::lowerUnreachable(const InstUnreachable * /Inst/) {	3514 void TargetX8632::lowerUnreachable(const InstUnreachable * /Inst/) {

3395 const SizeT MaxSrcs = 0;	3515 const SizeT MaxSrcs = 0;

3396 Variable *Dest = NULL;	3516 Variable *Dest = NULL;

3397 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);	3517 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);

3398 lowerCall(Call);	3518 lowerCall(Call);

3399 }	3519 }

3400	3520

	3521 // There is no support for loading or emitting vector constants, so the

	3522 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,

	3523 // etc. are initialized with register operations.

	3524 //

	3525 // TODO(wala): Add limited support for vector constants so that

	3526 // complex initialization in registers is unnecessary.

	3527

3401 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {	3528 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {

3402 // There is no support for loading or emitting vector constants, so

3403 // this value is initialized using register operations.

3404 Variable *Reg = makeReg(Ty, RegNum);	3529 Variable *Reg = makeReg(Ty, RegNum);

3405 // Insert a FakeDef, since otherwise the live range of Reg might	3530 // Insert a FakeDef, since otherwise the live range of Reg might

3406 // be overestimated.	3531 // be overestimated.

3407 Context.insert(InstFakeDef::create(Func, Reg));	3532 Context.insert(InstFakeDef::create(Func, Reg));

3408 _pxor(Reg, Reg);	3533 _pxor(Reg, Reg);

3409 return Reg;	3534 return Reg;

3410 }	3535 }

3411	3536

	3537 Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {

	3538 Variable *MinusOnes = makeReg(Ty, RegNum);

	3539 // Insert a FakeDef so the live range of MinusOnes is not overestimated.

	3540 Context.insert(InstFakeDef::create(Func, MinusOnes));

	3541 _pcmpeq(MinusOnes, MinusOnes);

	3542 return MinusOnes;

	3543 }

	3544

3412 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {	3545 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {

3413 // There is no support for loading or emitting vector constants, so

3414 // this value is initialized using register operations.

3415 Variable *Dest = makeVectorOfZeros(Ty, RegNum);	3546 Variable *Dest = makeVectorOfZeros(Ty, RegNum);

3416 Variable *MinusOne = makeReg(Ty);	3547 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

3417 // Insert a FakeDef so the live range of MinusOne is not overestimated.

3418 Context.insert(InstFakeDef::create(Func, MinusOne));

3419 _pcmpeq(MinusOne, MinusOne);

3420 _psub(Dest, MinusOne);	3548 _psub(Dest, MinusOne);

3421 return Dest;	3549 return Dest;

3422 }	3550 }

3423	3551

	3552 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {

	3553 assert(Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v8i16 \|\|

	3554 Ty == IceType_v16i8);

	3555 if (Ty == IceType_v4f32 \|\| Ty == IceType_v4i32 \|\| Ty == IceType_v8i16) {

	3556 Variable *Reg = makeVectorOfOnes(Ty, RegNum);

	3557 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;

	3558 _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift));

	3559 return Reg;

	3560 } else {

	3561 // SSE has no left shift operation for vectors of 8 bit integers.

	3562 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;

	3563 Constant *ConstantMask =

	3564 Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK);

	3565 Variable *Reg = makeReg(Ty, RegNum);

	3566 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));

	3567 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));

	3568 return Reg;

	3569 }

	3570 }

	3571

3424 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,	3572 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,

3425 Variable *Slot,	3573 Variable *Slot,

3426 uint32_t Offset) {	3574 uint32_t Offset) {

3427 // Ensure that Loc is a stack slot.	3575 // Ensure that Loc is a stack slot.

3428 assert(Slot->getWeight() == RegWeight::Zero);	3576 assert(Slot->getWeight() == RegWeight::Zero);

3429 assert(Slot->getRegNum() == Variable::NoRegister);	3577 assert(Slot->getRegNum() == Variable::NoRegister);

3430 // Compute the location of Loc in memory.	3578 // Compute the location of Loc in memory.

3431 // TODO(wala,stichnot): lea should not be required. The address of	3579 // TODO(wala,stichnot): lea should not be required. The address of

3432 // the stack slot is known at compile time (although not until after	3580 // the stack slot is known at compile time (although not until after

3433 // addProlog()).	3581 // addProlog()).

(...skipping 318 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3752 for (SizeT i = 0; i < Size; ++i) {	3900 for (SizeT i = 0; i < Size; ++i) {

3753 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";	3901 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";

3754 }	3902 }

3755 Str << "\t.size\t" << MangledName << ", " << Size << "\n";	3903 Str << "\t.size\t" << MangledName << ", " << Size << "\n";

3756 }	3904 }

3757 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName	3905 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName

3758 << "\n";	3906 << "\n";

3759 }	3907 }

3760	3908

3761 } // end of namespace Ice	3909 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-icmp.ll » ('j') | no next file with comments »