src/IceTargetLoweringX86BaseImpl.h - Issue 1775253003: Cache common constants before lowering.

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1775253003: Cache common constants before lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Clean up code. Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 1507 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1518 return false;	1518 return false;

1519 Type Ty = Dest->getType();	1519 Type Ty = Dest->getType();

1520 if (Src1 == -1) {	1520 if (Src1 == -1) {

1521 Variable *T = nullptr;	1521 Variable *T = nullptr;

1522 _mov(T, Src0);	1522 _mov(T, Src0);

1523 _neg(T);	1523 _neg(T);

1524 _mov(Dest, T);	1524 _mov(Dest, T);

1525 return true;	1525 return true;

1526 }	1526 }

1527 if (Src1 == 0) {	1527 if (Src1 == 0) {

1528 _mov(Dest, Ctx->getConstantZero(Ty));	1528 _mov(Dest, getConstantZero(Ty));

1529 return true;	1529 return true;

1530 }	1530 }

1531 if (Src1 == 1) {	1531 if (Src1 == 1) {

1532 Variable *T = nullptr;	1532 Variable *T = nullptr;

1533 _mov(T, Src0);	1533 _mov(T, Src0);

1534 _mov(Dest, T);	1534 _mov(Dest, T);

1535 return true;	1535 return true;

1536 }	1536 }

1537 // Don't bother with the edge case where Src1 == MININT.	1537 // Don't bother with the edge case where Src1 == MININT.

1538 if (Src1 == -Src1)	1538 if (Src1 == -Src1)

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1575 // somewhat arbitrary choice of 3.	1575 // somewhat arbitrary choice of 3.

1576 constexpr uint32_t MaxOpsForOptimizedMul = 3;	1576 constexpr uint32_t MaxOpsForOptimizedMul = 3;

1577 if (CountOps > MaxOpsForOptimizedMul)	1577 if (CountOps > MaxOpsForOptimizedMul)

1578 return false;	1578 return false;

1579 Variable *T = makeReg(Traits::WordType);	1579 Variable *T = makeReg(Traits::WordType);

1580 if (typeWidthInBytes(Src0->getType()) < typeWidthInBytes(T->getType())) {	1580 if (typeWidthInBytes(Src0->getType()) < typeWidthInBytes(T->getType())) {

1581 _movzx(T, Src0);	1581 _movzx(T, Src0);

1582 } else {	1582 } else {

1583 _mov(T, Src0);	1583 _mov(T, Src0);

1584 }	1584 }

1585 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1585 Constant *Zero = getConstantZero(IceType_i32);

1586 for (uint32_t i = 0; i < Count9; ++i) {	1586 for (uint32_t i = 0; i < Count9; ++i) {

1587 constexpr uint16_t Shift = 3; // log2(9-1)	1587 constexpr uint16_t Shift = 3; // log2(9-1)

1588 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));	1588 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));

1589 }	1589 }

1590 for (uint32_t i = 0; i < Count5; ++i) {	1590 for (uint32_t i = 0; i < Count5; ++i) {

1591 constexpr uint16_t Shift = 2; // log2(5-1)	1591 constexpr uint16_t Shift = 2; // log2(5-1)

1592 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));	1592 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));

1593 }	1593 }

1594 for (uint32_t i = 0; i < Count3; ++i) {	1594 for (uint32_t i = 0; i < Count3; ++i) {

1595 constexpr uint16_t Shift = 1; // log2(3-1)	1595 constexpr uint16_t Shift = 1; // log2(3-1)

1596 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));	1596 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));

1597 }	1597 }

1598 if (Count2) {	1598 if (Count2) {

1599 _shl(T, Ctx->getConstantInt(Ty, Count2));	1599 _shl(T, Ctx->getConstantInt(Ty, Count2));

1600 }	1600 }

1601 if (Src1IsNegative)	1601 if (Src1IsNegative)

1602 _neg(T);	1602 _neg(T);

1603 _mov(Dest, T);	1603 _mov(Dest, T);

1604 return true;	1604 return true;

1605 }	1605 }

1606	1606

1607 template <typename TraitsType>	1607 template <typename TraitsType>

1608 void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op,	1608 void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op,

1609 Operand Src0Lo, Operand Src0Hi,	1609 Operand Src0Lo, Operand Src0Hi,

1610 Operand Src1Lo, Variable DestLo,	1610 Operand Src1Lo, Variable DestLo,

1611 Variable *DestHi) {	1611 Variable *DestHi) {

1612 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.	1612 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.

1613 Variable T_1 = nullptr, T_2 = nullptr, *T_3 = nullptr;	1613 Variable T_1 = nullptr, T_2 = nullptr, *T_3 = nullptr;

1614 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1614 Constant *Zero = getConstantZero(IceType_i32);

1615 Constant *SignExtend = Ctx->getConstantInt32(0x1f);	1615 Constant *SignExtend = Ctx->getConstantInt32(0x1f);

1616 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {	1616 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {

1617 uint32_t ShiftAmount = ConstantShiftAmount->getValue();	1617 uint32_t ShiftAmount = ConstantShiftAmount->getValue();

1618 if (ShiftAmount > 32) {	1618 if (ShiftAmount > 32) {

1619 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32);	1619 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32);

1620 switch (Op) {	1620 switch (Op) {

1621 default:	1621 default:

1622 assert(0 && "non-shift op");	1622 assert(0 && "non-shift op");

1623 break;	1623 break;

1624 case InstArithmetic::Shl: {	1624 case InstArithmetic::Shl: {

(...skipping 569 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2194 Eax = Traits::RegisterSet::Reg_ax;	2194 Eax = Traits::RegisterSet::Reg_ax;

2195 Edx = Traits::RegisterSet::Reg_dx;	2195 Edx = Traits::RegisterSet::Reg_dx;

2196 break;	2196 break;

2197 case IceType_i8:	2197 case IceType_i8:

2198 Eax = Traits::RegisterSet::Reg_al;	2198 Eax = Traits::RegisterSet::Reg_al;

2199 Edx = Traits::RegisterSet::Reg_ah;	2199 Edx = Traits::RegisterSet::Reg_ah;

2200 break;	2200 break;

2201 }	2201 }

2202 T_edx = makeReg(Ty, Edx);	2202 T_edx = makeReg(Ty, Edx);

2203 _mov(T, Src0, Eax);	2203 _mov(T, Src0, Eax);

2204 _mov(T_edx, Ctx->getConstantZero(Ty));	2204 _mov(T_edx, getConstantZero(Ty));

2205 _div(T, Src1, T_edx);	2205 _div(T, Src1, T_edx);

2206 _mov(Dest, T);	2206 _mov(Dest, T);

2207 } break;	2207 } break;

2208 case InstArithmetic::Sdiv:	2208 case InstArithmetic::Sdiv:

2209 // TODO(stichnot): Enable this after doing better performance and cross	2209 // TODO(stichnot): Enable this after doing better performance and cross

2210 // testing.	2210 // testing.

2211 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {	2211 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

2212 // Optimize division by constant power of 2, but not for Om1 or O0, just	2212 // Optimize division by constant power of 2, but not for Om1 or O0, just

2213 // to keep things simple there.	2213 // to keep things simple there.

2214 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {	2214 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2283 case IceType_i16:	2283 case IceType_i16:

2284 Eax = Traits::RegisterSet::Reg_ax;	2284 Eax = Traits::RegisterSet::Reg_ax;

2285 Edx = Traits::RegisterSet::Reg_dx;	2285 Edx = Traits::RegisterSet::Reg_dx;

2286 break;	2286 break;

2287 case IceType_i8:	2287 case IceType_i8:

2288 Eax = Traits::RegisterSet::Reg_al;	2288 Eax = Traits::RegisterSet::Reg_al;

2289 Edx = Traits::RegisterSet::Reg_ah;	2289 Edx = Traits::RegisterSet::Reg_ah;

2290 break;	2290 break;

2291 }	2291 }

2292 T_edx = makeReg(Ty, Edx);	2292 T_edx = makeReg(Ty, Edx);

2293 _mov(T_edx, Ctx->getConstantZero(Ty));	2293 _mov(T_edx, getConstantZero(Ty));

2294 _mov(T, Src0, Eax);	2294 _mov(T, Src0, Eax);

2295 _div(T_edx, Src1, T);	2295 _div(T_edx, Src1, T);

2296 _mov(Dest, T_edx);	2296 _mov(Dest, T_edx);

2297 } break;	2297 } break;

2298 case InstArithmetic::Srem: {	2298 case InstArithmetic::Srem: {

2299 // TODO(stichnot): Enable this after doing better performance and cross	2299 // TODO(stichnot): Enable this after doing better performance and cross

2300 // testing.	2300 // testing.

2301 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {	2301 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

2302 // Optimize mod by constant power of 2, but not for Om1 or O0, just to	2302 // Optimize mod by constant power of 2, but not for Om1 or O0, just to

2303 // keep things simple there.	2303 // keep things simple there.

2304 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {	2304 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

2305 const int32_t Divisor = C->getValue();	2305 const int32_t Divisor = C->getValue();

2306 const uint32_t UDivisor = Divisor;	2306 const uint32_t UDivisor = Divisor;

2307 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {	2307 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

2308 uint32_t LogDiv = llvm::Log2_32(UDivisor);	2308 uint32_t LogDiv = llvm::Log2_32(UDivisor);

2309 // LLVM does the following for dest=src%(1<<log):	2309 // LLVM does the following for dest=src%(1<<log):

2310 // t=src	2310 // t=src

2311 // sar t,typewidth-1 // -1 if src is negative, 0 if not	2311 // sar t,typewidth-1 // -1 if src is negative, 0 if not

2312 // shr t,typewidth-log	2312 // shr t,typewidth-log

2313 // add t,src	2313 // add t,src

2314 // and t, -(1<<log)	2314 // and t, -(1<<log)

2315 // sub t,src	2315 // sub t,src

2316 // neg t	2316 // neg t

2317 // dest=t	2317 // dest=t

2318 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);	2318 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);

2319 // If for some reason we are dividing by 1, just assign 0.	2319 // If for some reason we are dividing by 1, just assign 0.

2320 if (LogDiv == 0) {	2320 if (LogDiv == 0) {

2321 _mov(Dest, Ctx->getConstantZero(Ty));	2321 _mov(Dest, getConstantZero(Ty));

2322 return;	2322 return;

2323 }	2323 }

2324 _mov(T, Src0);	2324 _mov(T, Src0);

2325 // The initial sar is unnecessary when dividing by 2.	2325 // The initial sar is unnecessary when dividing by 2.

2326 if (LogDiv > 1)	2326 if (LogDiv > 1)

2327 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));	2327 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));

2328 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));	2328 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));

2329 _add(T, Src0);	2329 _add(T, Src0);

2330 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));	2330 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));

2331 _sub(T, Src0);	2331 _sub(T, Src0);

(...skipping 93 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2425 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Br);	2425 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Br);

2426 return;	2426 return;

2427 }	2427 }

2428 case BoolFolding<Traits>::PK_Arith: {	2428 case BoolFolding<Traits>::PK_Arith: {

2429 lowerArithAndConsumer(llvm::cast<InstArithmetic>(Producer), Br);	2429 lowerArithAndConsumer(llvm::cast<InstArithmetic>(Producer), Br);

2430 return;	2430 return;

2431 }	2431 }

2432 }	2432 }

2433 }	2433 }

2434 Operand *Src0 = legalize(Cond, Legal_Reg \| Legal_Mem);	2434 Operand *Src0 = legalize(Cond, Legal_Reg \| Legal_Mem);

2435 Constant *Zero = Ctx->getConstantZero(IceType_i32);	2435 Constant *Zero = getConstantZero(IceType_i32);

2436 _cmp(Src0, Zero);	2436 _cmp(Src0, Zero);

2437 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());	2437 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());

2438 }	2438 }

2439	2439

2440 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining	2440 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining

2441 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't.	2441 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't.

2442 inline constexpr SizeT constexprMax(SizeT S0, SizeT S1) {	2442 inline constexpr SizeT constexprMax(SizeT S0, SizeT S1) {

2443 return S0 < S1 ? S1 : S0;	2443 return S0 < S1 ? S1 : S0;

2444 }	2444 }

2445	2445

(...skipping 278 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2724 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg \| Legal_Mem);	2724 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg \| Legal_Mem);

2725 if (isVectorType(DestTy)) {	2725 if (isVectorType(DestTy)) {

2726 // onemask = materialize(1,1,...); dest = onemask & src	2726 // onemask = materialize(1,1,...); dest = onemask & src

2727 Variable *OneMask = makeVectorOfOnes(DestTy);	2727 Variable *OneMask = makeVectorOfOnes(DestTy);

2728 Variable *T = makeReg(DestTy);	2728 Variable *T = makeReg(DestTy);

2729 _movp(T, Src0RM);	2729 _movp(T, Src0RM);

2730 _pand(T, OneMask);	2730 _pand(T, OneMask);

2731 _movp(Dest, T);	2731 _movp(Dest, T);

2732 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {	2732 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {

2733 // t1=movzx src; dst.lo=t1; dst.hi=0	2733 // t1=movzx src; dst.lo=t1; dst.hi=0

2734 Constant *Zero = Ctx->getConstantZero(IceType_i32);	2734 Constant *Zero = getConstantZero(IceType_i32);

2735 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));	2735 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));

2736 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));	2736 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2737 Variable *Tmp = makeReg(DestLo->getType());	2737 Variable *Tmp = makeReg(DestLo->getType());

2738 if (Src0RM->getType() == IceType_i32) {	2738 if (Src0RM->getType() == IceType_i32) {

2739 _mov(Tmp, Src0RM);	2739 _mov(Tmp, Src0RM);

2740 } else {	2740 } else {

2741 _movzx(Tmp, Src0RM);	2741 _movzx(Tmp, Src0RM);

2742 }	2742 }

2743 _mov(DestLo, Tmp);	2743 _mov(DestLo, Tmp);

2744 _mov(DestHi, Zero);	2744 _mov(DestHi, Zero);

(...skipping 1161 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3906 Operand *ByteSize = Instr->getArg(0);	3906 Operand *ByteSize = Instr->getArg(0);

3907 Variable *Dest = Instr->getDest();	3907 Variable *Dest = Instr->getDest();

3908 if (auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {	3908 if (auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {

3909 Constant *Result;	3909 Constant *Result;

3910 switch (CI->getValue()) {	3910 switch (CI->getValue()) {

3911 default:	3911 default:

3912 // Some x86-64 processors support the cmpxchg16b instruction, which can	3912 // Some x86-64 processors support the cmpxchg16b instruction, which can

3913 // make 16-byte operations lock free (when used with the LOCK prefix).	3913 // make 16-byte operations lock free (when used with the LOCK prefix).

3914 // However, that's not supported in 32-bit mode, so just return 0 even	3914 // However, that's not supported in 32-bit mode, so just return 0 even

3915 // for large sizes.	3915 // for large sizes.

3916 Result = Ctx->getConstantZero(IceType_i32);	3916 Result = getConstantZero(IceType_i32);

3917 break;	3917 break;

3918 case 1:	3918 case 1:

3919 case 2:	3919 case 2:

3920 case 4:	3920 case 4:

3921 case 8:	3921 case 8:

3922 Result = Ctx->getConstantInt32(1);	3922 Result = Ctx->getConstantInt32(1);

3923 break;	3923 break;

3924 }	3924 }

3925 _mov(Dest, Result);	3925 _mov(Dest, Result);

3926 return;	3926 return;

(...skipping 138 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4065 lowerCall(Call);	4065 lowerCall(Call);

4066 // The popcount helpers always return 32-bit values, while the intrinsic's	4066 // The popcount helpers always return 32-bit values, while the intrinsic's

4067 // signature matches the native POPCNT instruction and fills a 64-bit reg	4067 // signature matches the native POPCNT instruction and fills a 64-bit reg

4068 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case	4068 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case

4069 // the user doesn't do that in the IR. If the user does that in the IR,	4069 // the user doesn't do that in the IR. If the user does that in the IR,

4070 // then this zero'ing instruction is dead and gets optimized out.	4070 // then this zero'ing instruction is dead and gets optimized out.

4071 if (!Traits::Is64Bit) {	4071 if (!Traits::Is64Bit) {

4072 assert(T == Dest);	4072 assert(T == Dest);

4073 if (Val->getType() == IceType_i64) {	4073 if (Val->getType() == IceType_i64) {

4074 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));	4074 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));

4075 Constant *Zero = Ctx->getConstantZero(IceType_i32);	4075 Constant *Zero = getConstantZero(IceType_i32);

4076 _mov(DestHi, Zero);	4076 _mov(DestHi, Zero);

4077 }	4077 }

4078 } else {	4078 } else {

4079 assert(Val->getType() == IceType_i64);	4079 assert(Val->getType() == IceType_i64);

4080 // T is 64 bit. It needs to be copied to dest. We need to:	4080 // T is 64 bit. It needs to be copied to dest. We need to:

4081 //	4081 //

4082 // T_1.32 = trunc T.64 to i32	4082 // T_1.32 = trunc T.64 to i32

4083 // T_2.64 = zext T_1.32 to i64	4083 // T_2.64 = zext T_1.32 to i64

4084 // Dest.<<right_size>> = T_2.<<right_size>>	4084 // Dest.<<right_size>> = T_2.<<right_size>>

4085 //	4085 //

(...skipping 570 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4656 Variable *T_Dest2 = makeReg(IceType_i32);	4656 Variable *T_Dest2 = makeReg(IceType_i32);

4657 if (Cttz) {	4657 if (Cttz) {

4658 _bsf(T_Dest2, SecondVar);	4658 _bsf(T_Dest2, SecondVar);

4659 } else {	4659 } else {

4660 _bsr(T_Dest2, SecondVar);	4660 _bsr(T_Dest2, SecondVar);

4661 _xor(T_Dest2, _31);	4661 _xor(T_Dest2, _31);

4662 }	4662 }

4663 _test(SecondVar, SecondVar);	4663 _test(SecondVar, SecondVar);

4664 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);	4664 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);

4665 _mov(DestLo, T_Dest2);	4665 _mov(DestLo, T_Dest2);

4666 _mov(DestHi, Ctx->getConstantZero(IceType_i32));	4666 _mov(DestHi, getConstantZero(IceType_i32));

4667 }	4667 }

4668	4668

4669 template <typename TraitsType>	4669 template <typename TraitsType>

4670 void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest,	4670 void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest,

4671 Variable Base, Constant Offset) {	4671 Variable Base, Constant Offset) {

4672 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to	4672 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to

4673 // legalize Mem properly.	4673 // legalize Mem properly.

4674 if (Offset)	4674 if (Offset)

4675 assert(!llvm::isa<ConstantRelocatable>(Offset));	4675 assert(!llvm::isa<ConstantRelocatable>(Offset));

4676	4676

(...skipping 785 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5462 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd);	5462 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd);

5463 if (Var == nullptr)	5463 if (Var == nullptr)

5464 return;	5464 return;

5465 // We use lowerStore() to copy out-args onto the stack. This creates a memory	5465 // We use lowerStore() to copy out-args onto the stack. This creates a memory

5466 // operand with the stack pointer as the base register. Don't do bounds	5466 // operand with the stack pointer as the base register. Don't do bounds

5467 // checks on that.	5467 // checks on that.

5468 if (Var->getRegNum() == getStackReg())	5468 if (Var->getRegNum() == getStackReg())

5469 return;	5469 return;

5470	5470

5471 auto *Label = InstX86Label::create(Func, this);	5471 auto *Label = InstX86Label::create(Func, this);

5472 _cmp(Opnd, Ctx->getConstantZero(IceType_i32));	5472 _cmp(Opnd, getConstantZero(IceType_i32));

5473 _br(Traits::Cond::Br_e, Label);	5473 _br(Traits::Cond::Br_e, Label);

5474 _cmp(Opnd, Ctx->getConstantInt32(1));	5474 _cmp(Opnd, Ctx->getConstantInt32(1));

5475 _br(Traits::Cond::Br_e, Label);	5475 _br(Traits::Cond::Br_e, Label);

5476 Context.insert(Label);	5476 Context.insert(Label);

5477 }	5477 }

5478	5478

5479 template <typename TraitsType>	5479 template <typename TraitsType>

5480 void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) {	5480 void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) {

5481 // A Load instruction can be treated the same as an Assign instruction, after	5481 // A Load instruction can be treated the same as an Assign instruction, after

5482 // the source operand is transformed into an X86OperandMem operand. Note that	5482 // the source operand is transformed into an X86OperandMem operand. Note that

(...skipping 73 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5556 return;	5556 return;

5557 }	5557 }

5558 case BoolFolding<Traits>::PK_Fcmp: {	5558 case BoolFolding<Traits>::PK_Fcmp: {

5559 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Select);	5559 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Select);

5560 return;	5560 return;

5561 }	5561 }

5562 }	5562 }

5563 }	5563 }

5564	5564

5565 Operand *CmpResult = legalize(Condition, Legal_Reg \| Legal_Mem);	5565 Operand *CmpResult = legalize(Condition, Legal_Reg \| Legal_Mem);

5566 Operand *Zero = Ctx->getConstantZero(IceType_i32);	5566 Operand *Zero = getConstantZero(IceType_i32);

5567 _cmp(CmpResult, Zero);	5567 _cmp(CmpResult, Zero);

5568 Operand *SrcT = Select->getTrueOperand();	5568 Operand *SrcT = Select->getTrueOperand();

5569 Operand *SrcF = Select->getFalseOperand();	5569 Operand *SrcF = Select->getFalseOperand();

5570 const BrCond Cond = Traits::Cond::Br_ne;	5570 const BrCond Cond = Traits::Cond::Br_ne;

5571 lowerSelectMove(Dest, Cond, SrcT, SrcF);	5571 lowerSelectMove(Dest, Cond, SrcT, SrcF);

5572 }	5572 }

5573	5573

5574 template <typename TraitsType>	5574 template <typename TraitsType>

5575 void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond,	5575 void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond,

5576 Operand SrcT, Operand SrcF) {	5576 Operand SrcT, Operand SrcF) {

(...skipping 623 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6200 // during phi lowering assignments	6200 // during phi lowering assignments

6201 BoolFlagSaver B(RandomizationPoolingPaused, true);	6201 BoolFlagSaver B(RandomizationPoolingPaused, true);

6202 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>(	6202 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>(

6203 this, Context.getNode(), Func);	6203 this, Context.getNode(), Func);

6204 }	6204 }

6205	6205

6206 template <typename TraitsType>	6206 template <typename TraitsType>

6207 void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {	6207 void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {

6208 uint32_t StackArgumentsSize = 0;	6208 uint32_t StackArgumentsSize = 0;

6209 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {	6209 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {

6210 const char *HelperName = nullptr;	6210 RuntimeHelperFuncKind HelperName = H_Num;

6211 Variable *Dest = Arith->getDest();	6211 Variable *Dest = Arith->getDest();

6212 Type DestTy = Dest->getType();	6212 Type DestTy = Dest->getType();

6213 if (!Traits::Is64Bit && DestTy == IceType_i64) {	6213 if (!Traits::Is64Bit && DestTy == IceType_i64) {

6214 switch (Arith->getOp()) {	6214 switch (Arith->getOp()) {

6215 default:	6215 default:

6216 return;	6216 return;

6217 case InstArithmetic::Udiv:	6217 case InstArithmetic::Udiv:

6218 HelperName = H_udiv_i64;	6218 HelperName = H_udiv_i64;

6219 break;	6219 break;

6220 case InstArithmetic::Sdiv:	6220 case InstArithmetic::Sdiv:

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6269 Call->addArg(Arith->getSrc(1));	6269 Call->addArg(Arith->getSrc(1));

6270 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);	6270 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);

6271 Context.insert(Call);	6271 Context.insert(Call);

6272 Arith->setDeleted();	6272 Arith->setDeleted();

6273 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {	6273 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {

6274 InstCast::OpKind CastKind = Cast->getCastKind();	6274 InstCast::OpKind CastKind = Cast->getCastKind();

6275 Operand *Src0 = Cast->getSrc(0);	6275 Operand *Src0 = Cast->getSrc(0);

6276 const Type SrcType = Src0->getType();	6276 const Type SrcType = Src0->getType();

6277 Variable *Dest = Cast->getDest();	6277 Variable *Dest = Cast->getDest();

6278 const Type DestTy = Dest->getType();	6278 const Type DestTy = Dest->getType();

6279 const char *HelperName = nullptr;	6279 RuntimeHelperFuncKind HelperName = H_Num;

6280 Variable *CallDest = Dest;	6280 Variable *CallDest = Dest;

6281 switch (CastKind) {	6281 switch (CastKind) {

6282 default:	6282 default:

6283 return;	6283 return;

6284 case InstCast::Fptosi:	6284 case InstCast::Fptosi:

6285 if (!Traits::Is64Bit && DestTy == IceType_i64) {	6285 if (!Traits::Is64Bit && DestTy == IceType_i64) {

6286 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64	6286 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64

6287 : H_fptosi_f64_i64;	6287 : H_fptosi_f64_i64;

6288 } else {	6288 } else {

6289 return;	6289 return;

(...skipping 204 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6494 Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty,	6494 Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty,

6495 RegNumT RegNum) {	6495 RegNumT RegNum) {

6496 Variable *Reg = makeReg(Ty, RegNum);	6496 Variable *Reg = makeReg(Ty, RegNum);

6497 switch (Ty) {	6497 switch (Ty) {

6498 case IceType_i1:	6498 case IceType_i1:

6499 case IceType_i8:	6499 case IceType_i8:

6500 case IceType_i16:	6500 case IceType_i16:

6501 case IceType_i32:	6501 case IceType_i32:

6502 case IceType_i64:	6502 case IceType_i64:

6503 // Conservatively do "mov reg, 0" to avoid modifying FLAGS.	6503 // Conservatively do "mov reg, 0" to avoid modifying FLAGS.

6504 _mov(Reg, Ctx->getConstantZero(Ty));	6504 _mov(Reg, getConstantZero(Ty));

6505 break;	6505 break;

6506 case IceType_f32:	6506 case IceType_f32:

6507 case IceType_f64:	6507 case IceType_f64:

6508 Context.insert<InstFakeDef>(Reg);	6508 Context.insert<InstFakeDef>(Reg);

6509 _xorps(Reg, Reg);	6509 _xorps(Reg, Reg);

6510 break;	6510 break;

6511 default:	6511 default:

6512 // All vector types use the same pxor instruction.	6512 // All vector types use the same pxor instruction.

6513 assert(isVectorType(Ty));	6513 assert(isVectorType(Ty));

6514 Context.insert<InstFakeDef>(Reg);	6514 Context.insert<InstFakeDef>(Reg);

(...skipping 51 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6566 SizeT Shift =	6566 SizeT Shift =

6567 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;	6567 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;

6568 _psll(Reg, Ctx->getConstantInt8(Shift));	6568 _psll(Reg, Ctx->getConstantInt8(Shift));

6569 return Reg;	6569 return Reg;

6570 } else {	6570 } else {

6571 // SSE has no left shift operation for vectors of 8 bit integers.	6571 // SSE has no left shift operation for vectors of 8 bit integers.

6572 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;	6572 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;

6573 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);	6573 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);

6574 Variable *Reg = makeReg(Ty, RegNum);	6574 Variable *Reg = makeReg(Ty, RegNum);

6575 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));	6575 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));

6576 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));	6576 _pshufd(Reg, Reg, getConstantZero(IceType_i8));

6577 return Reg;	6577 return Reg;

6578 }	6578 }

6579 }	6579 }

6580	6580

6581 /// Construct a mask in a register that can be and'ed with a floating-point	6581 /// Construct a mask in a register that can be and'ed with a floating-point

6582 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32	6582 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32

6583 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of	6583 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of

6584 /// ones logically right shifted one bit.	6584 /// ones logically right shifted one bit.

6585 // TODO(stichnot): Fix the wala	6585 // TODO(stichnot): Fix the wala

6586 // TODO: above, to represent vector constants in memory.	6586 // TODO: above, to represent vector constants in memory.

(...skipping 291 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6878 //	6878 //

6879 // If in the future the implementation is changed to lower undef values to	6879 // If in the future the implementation is changed to lower undef values to

6880 // uninitialized registers, a FakeDef will be needed:	6880 // uninitialized registers, a FakeDef will be needed:

6881 // Context.insert<InstFakeDef>(Reg);	6881 // Context.insert<InstFakeDef>(Reg);

6882 // This is in order to ensure that the live range of Reg is not	6882 // This is in order to ensure that the live range of Reg is not

6883 // overestimated. If the constant being lowered is a 64 bit value, then	6883 // overestimated. If the constant being lowered is a 64 bit value, then

6884 // the result should be split and the lo and hi components will need to go	6884 // the result should be split and the lo and hi components will need to go

6885 // in uninitialized registers.	6885 // in uninitialized registers.

6886 if (isVectorType(Ty))	6886 if (isVectorType(Ty))

6887 return makeVectorOfZeros(Ty, RegNum);	6887 return makeVectorOfZeros(Ty, RegNum);

6888 return Ctx->getConstantZero(Ty);	6888 return getConstantZero(Ty);

6889 }	6889 }

6890 return From;	6890 return From;

6891 }	6891 }

6892	6892

6893 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical	6893 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical

6894 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be	6894 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be

6895 /// copied into a physical register. (Actually, either Src0 or Src1 can be	6895 /// copied into a physical register. (Actually, either Src0 or Src1 can be

6896 /// chosen for the physical register, but unfortunately we have to commit to one	6896 /// chosen for the physical register, but unfortunately we have to commit to one

6897 /// or the other before register allocation.)	6897 /// or the other before register allocation.)

6898 template <typename TraitsType>	6898 template <typename TraitsType>

(...skipping 501 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
7400 emitGlobal(*Var, SectionSuffix);	7400 emitGlobal(*Var, SectionSuffix);

7401 }	7401 }

7402 }	7402 }

7403 } break;	7403 } break;

7404 }	7404 }

7405 }	7405 }

7406 } // end of namespace X86NAMESPACE	7406 } // end of namespace X86NAMESPACE

7407 } // end of namespace Ice	7407 } // end of namespace Ice

7408	7408

7409 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	7409 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« src/IceTargetLoweringARM32.cpp ('K') | « src/IceTargetLoweringX8664.cpp ('k') | no next file » | no next file with comments »