Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1591)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1775253003: Cache common constants before lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Clean up code. Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 1507 matching lines...) Expand 10 before | Expand all | Expand 10 after
1518 return false; 1518 return false;
1519 Type Ty = Dest->getType(); 1519 Type Ty = Dest->getType();
1520 if (Src1 == -1) { 1520 if (Src1 == -1) {
1521 Variable *T = nullptr; 1521 Variable *T = nullptr;
1522 _mov(T, Src0); 1522 _mov(T, Src0);
1523 _neg(T); 1523 _neg(T);
1524 _mov(Dest, T); 1524 _mov(Dest, T);
1525 return true; 1525 return true;
1526 } 1526 }
1527 if (Src1 == 0) { 1527 if (Src1 == 0) {
1528 _mov(Dest, Ctx->getConstantZero(Ty)); 1528 _mov(Dest, getConstantZero(Ty));
1529 return true; 1529 return true;
1530 } 1530 }
1531 if (Src1 == 1) { 1531 if (Src1 == 1) {
1532 Variable *T = nullptr; 1532 Variable *T = nullptr;
1533 _mov(T, Src0); 1533 _mov(T, Src0);
1534 _mov(Dest, T); 1534 _mov(Dest, T);
1535 return true; 1535 return true;
1536 } 1536 }
1537 // Don't bother with the edge case where Src1 == MININT. 1537 // Don't bother with the edge case where Src1 == MININT.
1538 if (Src1 == -Src1) 1538 if (Src1 == -Src1)
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1575 // somewhat arbitrary choice of 3. 1575 // somewhat arbitrary choice of 3.
1576 constexpr uint32_t MaxOpsForOptimizedMul = 3; 1576 constexpr uint32_t MaxOpsForOptimizedMul = 3;
1577 if (CountOps > MaxOpsForOptimizedMul) 1577 if (CountOps > MaxOpsForOptimizedMul)
1578 return false; 1578 return false;
1579 Variable *T = makeReg(Traits::WordType); 1579 Variable *T = makeReg(Traits::WordType);
1580 if (typeWidthInBytes(Src0->getType()) < typeWidthInBytes(T->getType())) { 1580 if (typeWidthInBytes(Src0->getType()) < typeWidthInBytes(T->getType())) {
1581 _movzx(T, Src0); 1581 _movzx(T, Src0);
1582 } else { 1582 } else {
1583 _mov(T, Src0); 1583 _mov(T, Src0);
1584 } 1584 }
1585 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1585 Constant *Zero = getConstantZero(IceType_i32);
1586 for (uint32_t i = 0; i < Count9; ++i) { 1586 for (uint32_t i = 0; i < Count9; ++i) {
1587 constexpr uint16_t Shift = 3; // log2(9-1) 1587 constexpr uint16_t Shift = 3; // log2(9-1)
1588 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); 1588 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1589 } 1589 }
1590 for (uint32_t i = 0; i < Count5; ++i) { 1590 for (uint32_t i = 0; i < Count5; ++i) {
1591 constexpr uint16_t Shift = 2; // log2(5-1) 1591 constexpr uint16_t Shift = 2; // log2(5-1)
1592 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); 1592 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1593 } 1593 }
1594 for (uint32_t i = 0; i < Count3; ++i) { 1594 for (uint32_t i = 0; i < Count3; ++i) {
1595 constexpr uint16_t Shift = 1; // log2(3-1) 1595 constexpr uint16_t Shift = 1; // log2(3-1)
1596 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); 1596 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1597 } 1597 }
1598 if (Count2) { 1598 if (Count2) {
1599 _shl(T, Ctx->getConstantInt(Ty, Count2)); 1599 _shl(T, Ctx->getConstantInt(Ty, Count2));
1600 } 1600 }
1601 if (Src1IsNegative) 1601 if (Src1IsNegative)
1602 _neg(T); 1602 _neg(T);
1603 _mov(Dest, T); 1603 _mov(Dest, T);
1604 return true; 1604 return true;
1605 } 1605 }
1606 1606
1607 template <typename TraitsType> 1607 template <typename TraitsType>
1608 void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op, 1608 void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op,
1609 Operand *Src0Lo, Operand *Src0Hi, 1609 Operand *Src0Lo, Operand *Src0Hi,
1610 Operand *Src1Lo, Variable *DestLo, 1610 Operand *Src1Lo, Variable *DestLo,
1611 Variable *DestHi) { 1611 Variable *DestHi) {
1612 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. 1612 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1613 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1613 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1614 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1614 Constant *Zero = getConstantZero(IceType_i32);
1615 Constant *SignExtend = Ctx->getConstantInt32(0x1f); 1615 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1616 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { 1616 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1617 uint32_t ShiftAmount = ConstantShiftAmount->getValue(); 1617 uint32_t ShiftAmount = ConstantShiftAmount->getValue();
1618 if (ShiftAmount > 32) { 1618 if (ShiftAmount > 32) {
1619 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32); 1619 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32);
1620 switch (Op) { 1620 switch (Op) {
1621 default: 1621 default:
1622 assert(0 && "non-shift op"); 1622 assert(0 && "non-shift op");
1623 break; 1623 break;
1624 case InstArithmetic::Shl: { 1624 case InstArithmetic::Shl: {
(...skipping 569 matching lines...) Expand 10 before | Expand all | Expand 10 after
2194 Eax = Traits::RegisterSet::Reg_ax; 2194 Eax = Traits::RegisterSet::Reg_ax;
2195 Edx = Traits::RegisterSet::Reg_dx; 2195 Edx = Traits::RegisterSet::Reg_dx;
2196 break; 2196 break;
2197 case IceType_i8: 2197 case IceType_i8:
2198 Eax = Traits::RegisterSet::Reg_al; 2198 Eax = Traits::RegisterSet::Reg_al;
2199 Edx = Traits::RegisterSet::Reg_ah; 2199 Edx = Traits::RegisterSet::Reg_ah;
2200 break; 2200 break;
2201 } 2201 }
2202 T_edx = makeReg(Ty, Edx); 2202 T_edx = makeReg(Ty, Edx);
2203 _mov(T, Src0, Eax); 2203 _mov(T, Src0, Eax);
2204 _mov(T_edx, Ctx->getConstantZero(Ty)); 2204 _mov(T_edx, getConstantZero(Ty));
2205 _div(T, Src1, T_edx); 2205 _div(T, Src1, T_edx);
2206 _mov(Dest, T); 2206 _mov(Dest, T);
2207 } break; 2207 } break;
2208 case InstArithmetic::Sdiv: 2208 case InstArithmetic::Sdiv:
2209 // TODO(stichnot): Enable this after doing better performance and cross 2209 // TODO(stichnot): Enable this after doing better performance and cross
2210 // testing. 2210 // testing.
2211 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 2211 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
2212 // Optimize division by constant power of 2, but not for Om1 or O0, just 2212 // Optimize division by constant power of 2, but not for Om1 or O0, just
2213 // to keep things simple there. 2213 // to keep things simple there.
2214 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 2214 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
2283 case IceType_i16: 2283 case IceType_i16:
2284 Eax = Traits::RegisterSet::Reg_ax; 2284 Eax = Traits::RegisterSet::Reg_ax;
2285 Edx = Traits::RegisterSet::Reg_dx; 2285 Edx = Traits::RegisterSet::Reg_dx;
2286 break; 2286 break;
2287 case IceType_i8: 2287 case IceType_i8:
2288 Eax = Traits::RegisterSet::Reg_al; 2288 Eax = Traits::RegisterSet::Reg_al;
2289 Edx = Traits::RegisterSet::Reg_ah; 2289 Edx = Traits::RegisterSet::Reg_ah;
2290 break; 2290 break;
2291 } 2291 }
2292 T_edx = makeReg(Ty, Edx); 2292 T_edx = makeReg(Ty, Edx);
2293 _mov(T_edx, Ctx->getConstantZero(Ty)); 2293 _mov(T_edx, getConstantZero(Ty));
2294 _mov(T, Src0, Eax); 2294 _mov(T, Src0, Eax);
2295 _div(T_edx, Src1, T); 2295 _div(T_edx, Src1, T);
2296 _mov(Dest, T_edx); 2296 _mov(Dest, T_edx);
2297 } break; 2297 } break;
2298 case InstArithmetic::Srem: { 2298 case InstArithmetic::Srem: {
2299 // TODO(stichnot): Enable this after doing better performance and cross 2299 // TODO(stichnot): Enable this after doing better performance and cross
2300 // testing. 2300 // testing.
2301 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 2301 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
2302 // Optimize mod by constant power of 2, but not for Om1 or O0, just to 2302 // Optimize mod by constant power of 2, but not for Om1 or O0, just to
2303 // keep things simple there. 2303 // keep things simple there.
2304 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 2304 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2305 const int32_t Divisor = C->getValue(); 2305 const int32_t Divisor = C->getValue();
2306 const uint32_t UDivisor = Divisor; 2306 const uint32_t UDivisor = Divisor;
2307 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { 2307 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
2308 uint32_t LogDiv = llvm::Log2_32(UDivisor); 2308 uint32_t LogDiv = llvm::Log2_32(UDivisor);
2309 // LLVM does the following for dest=src%(1<<log): 2309 // LLVM does the following for dest=src%(1<<log):
2310 // t=src 2310 // t=src
2311 // sar t,typewidth-1 // -1 if src is negative, 0 if not 2311 // sar t,typewidth-1 // -1 if src is negative, 0 if not
2312 // shr t,typewidth-log 2312 // shr t,typewidth-log
2313 // add t,src 2313 // add t,src
2314 // and t, -(1<<log) 2314 // and t, -(1<<log)
2315 // sub t,src 2315 // sub t,src
2316 // neg t 2316 // neg t
2317 // dest=t 2317 // dest=t
2318 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); 2318 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
2319 // If for some reason we are dividing by 1, just assign 0. 2319 // If for some reason we are dividing by 1, just assign 0.
2320 if (LogDiv == 0) { 2320 if (LogDiv == 0) {
2321 _mov(Dest, Ctx->getConstantZero(Ty)); 2321 _mov(Dest, getConstantZero(Ty));
2322 return; 2322 return;
2323 } 2323 }
2324 _mov(T, Src0); 2324 _mov(T, Src0);
2325 // The initial sar is unnecessary when dividing by 2. 2325 // The initial sar is unnecessary when dividing by 2.
2326 if (LogDiv > 1) 2326 if (LogDiv > 1)
2327 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); 2327 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
2328 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); 2328 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
2329 _add(T, Src0); 2329 _add(T, Src0);
2330 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); 2330 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
2331 _sub(T, Src0); 2331 _sub(T, Src0);
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
2425 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Br); 2425 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Br);
2426 return; 2426 return;
2427 } 2427 }
2428 case BoolFolding<Traits>::PK_Arith: { 2428 case BoolFolding<Traits>::PK_Arith: {
2429 lowerArithAndConsumer(llvm::cast<InstArithmetic>(Producer), Br); 2429 lowerArithAndConsumer(llvm::cast<InstArithmetic>(Producer), Br);
2430 return; 2430 return;
2431 } 2431 }
2432 } 2432 }
2433 } 2433 }
2434 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); 2434 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
2435 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2435 Constant *Zero = getConstantZero(IceType_i32);
2436 _cmp(Src0, Zero); 2436 _cmp(Src0, Zero);
2437 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); 2437 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
2438 } 2438 }
2439 2439
2440 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining 2440 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
2441 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't. 2441 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
2442 inline constexpr SizeT constexprMax(SizeT S0, SizeT S1) { 2442 inline constexpr SizeT constexprMax(SizeT S0, SizeT S1) {
2443 return S0 < S1 ? S1 : S0; 2443 return S0 < S1 ? S1 : S0;
2444 } 2444 }
2445 2445
(...skipping 278 matching lines...) Expand 10 before | Expand all | Expand 10 after
2724 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem); 2724 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
2725 if (isVectorType(DestTy)) { 2725 if (isVectorType(DestTy)) {
2726 // onemask = materialize(1,1,...); dest = onemask & src 2726 // onemask = materialize(1,1,...); dest = onemask & src
2727 Variable *OneMask = makeVectorOfOnes(DestTy); 2727 Variable *OneMask = makeVectorOfOnes(DestTy);
2728 Variable *T = makeReg(DestTy); 2728 Variable *T = makeReg(DestTy);
2729 _movp(T, Src0RM); 2729 _movp(T, Src0RM);
2730 _pand(T, OneMask); 2730 _pand(T, OneMask);
2731 _movp(Dest, T); 2731 _movp(Dest, T);
2732 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { 2732 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
2733 // t1=movzx src; dst.lo=t1; dst.hi=0 2733 // t1=movzx src; dst.lo=t1; dst.hi=0
2734 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2734 Constant *Zero = getConstantZero(IceType_i32);
2735 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2735 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2736 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2736 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2737 Variable *Tmp = makeReg(DestLo->getType()); 2737 Variable *Tmp = makeReg(DestLo->getType());
2738 if (Src0RM->getType() == IceType_i32) { 2738 if (Src0RM->getType() == IceType_i32) {
2739 _mov(Tmp, Src0RM); 2739 _mov(Tmp, Src0RM);
2740 } else { 2740 } else {
2741 _movzx(Tmp, Src0RM); 2741 _movzx(Tmp, Src0RM);
2742 } 2742 }
2743 _mov(DestLo, Tmp); 2743 _mov(DestLo, Tmp);
2744 _mov(DestHi, Zero); 2744 _mov(DestHi, Zero);
(...skipping 1161 matching lines...) Expand 10 before | Expand all | Expand 10 after
3906 Operand *ByteSize = Instr->getArg(0); 3906 Operand *ByteSize = Instr->getArg(0);
3907 Variable *Dest = Instr->getDest(); 3907 Variable *Dest = Instr->getDest();
3908 if (auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) { 3908 if (auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
3909 Constant *Result; 3909 Constant *Result;
3910 switch (CI->getValue()) { 3910 switch (CI->getValue()) {
3911 default: 3911 default:
3912 // Some x86-64 processors support the cmpxchg16b instruction, which can 3912 // Some x86-64 processors support the cmpxchg16b instruction, which can
3913 // make 16-byte operations lock free (when used with the LOCK prefix). 3913 // make 16-byte operations lock free (when used with the LOCK prefix).
3914 // However, that's not supported in 32-bit mode, so just return 0 even 3914 // However, that's not supported in 32-bit mode, so just return 0 even
3915 // for large sizes. 3915 // for large sizes.
3916 Result = Ctx->getConstantZero(IceType_i32); 3916 Result = getConstantZero(IceType_i32);
3917 break; 3917 break;
3918 case 1: 3918 case 1:
3919 case 2: 3919 case 2:
3920 case 4: 3920 case 4:
3921 case 8: 3921 case 8:
3922 Result = Ctx->getConstantInt32(1); 3922 Result = Ctx->getConstantInt32(1);
3923 break; 3923 break;
3924 } 3924 }
3925 _mov(Dest, Result); 3925 _mov(Dest, Result);
3926 return; 3926 return;
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after
4065 lowerCall(Call); 4065 lowerCall(Call);
4066 // The popcount helpers always return 32-bit values, while the intrinsic's 4066 // The popcount helpers always return 32-bit values, while the intrinsic's
4067 // signature matches the native POPCNT instruction and fills a 64-bit reg 4067 // signature matches the native POPCNT instruction and fills a 64-bit reg
4068 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case 4068 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
4069 // the user doesn't do that in the IR. If the user does that in the IR, 4069 // the user doesn't do that in the IR. If the user does that in the IR,
4070 // then this zero'ing instruction is dead and gets optimized out. 4070 // then this zero'ing instruction is dead and gets optimized out.
4071 if (!Traits::Is64Bit) { 4071 if (!Traits::Is64Bit) {
4072 assert(T == Dest); 4072 assert(T == Dest);
4073 if (Val->getType() == IceType_i64) { 4073 if (Val->getType() == IceType_i64) {
4074 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 4074 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4075 Constant *Zero = Ctx->getConstantZero(IceType_i32); 4075 Constant *Zero = getConstantZero(IceType_i32);
4076 _mov(DestHi, Zero); 4076 _mov(DestHi, Zero);
4077 } 4077 }
4078 } else { 4078 } else {
4079 assert(Val->getType() == IceType_i64); 4079 assert(Val->getType() == IceType_i64);
4080 // T is 64 bit. It needs to be copied to dest. We need to: 4080 // T is 64 bit. It needs to be copied to dest. We need to:
4081 // 4081 //
4082 // T_1.32 = trunc T.64 to i32 4082 // T_1.32 = trunc T.64 to i32
4083 // T_2.64 = zext T_1.32 to i64 4083 // T_2.64 = zext T_1.32 to i64
4084 // Dest.<<right_size>> = T_2.<<right_size>> 4084 // Dest.<<right_size>> = T_2.<<right_size>>
4085 // 4085 //
(...skipping 570 matching lines...) Expand 10 before | Expand all | Expand 10 after
4656 Variable *T_Dest2 = makeReg(IceType_i32); 4656 Variable *T_Dest2 = makeReg(IceType_i32);
4657 if (Cttz) { 4657 if (Cttz) {
4658 _bsf(T_Dest2, SecondVar); 4658 _bsf(T_Dest2, SecondVar);
4659 } else { 4659 } else {
4660 _bsr(T_Dest2, SecondVar); 4660 _bsr(T_Dest2, SecondVar);
4661 _xor(T_Dest2, _31); 4661 _xor(T_Dest2, _31);
4662 } 4662 }
4663 _test(SecondVar, SecondVar); 4663 _test(SecondVar, SecondVar);
4664 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); 4664 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
4665 _mov(DestLo, T_Dest2); 4665 _mov(DestLo, T_Dest2);
4666 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); 4666 _mov(DestHi, getConstantZero(IceType_i32));
4667 } 4667 }
4668 4668
4669 template <typename TraitsType> 4669 template <typename TraitsType>
4670 void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest, 4670 void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest,
4671 Variable *Base, Constant *Offset) { 4671 Variable *Base, Constant *Offset) {
4672 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to 4672 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to
4673 // legalize Mem properly. 4673 // legalize Mem properly.
4674 if (Offset) 4674 if (Offset)
4675 assert(!llvm::isa<ConstantRelocatable>(Offset)); 4675 assert(!llvm::isa<ConstantRelocatable>(Offset));
4676 4676
(...skipping 785 matching lines...) Expand 10 before | Expand all | Expand 10 after
5462 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); 5462 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd);
5463 if (Var == nullptr) 5463 if (Var == nullptr)
5464 return; 5464 return;
5465 // We use lowerStore() to copy out-args onto the stack. This creates a memory 5465 // We use lowerStore() to copy out-args onto the stack. This creates a memory
5466 // operand with the stack pointer as the base register. Don't do bounds 5466 // operand with the stack pointer as the base register. Don't do bounds
5467 // checks on that. 5467 // checks on that.
5468 if (Var->getRegNum() == getStackReg()) 5468 if (Var->getRegNum() == getStackReg())
5469 return; 5469 return;
5470 5470
5471 auto *Label = InstX86Label::create(Func, this); 5471 auto *Label = InstX86Label::create(Func, this);
5472 _cmp(Opnd, Ctx->getConstantZero(IceType_i32)); 5472 _cmp(Opnd, getConstantZero(IceType_i32));
5473 _br(Traits::Cond::Br_e, Label); 5473 _br(Traits::Cond::Br_e, Label);
5474 _cmp(Opnd, Ctx->getConstantInt32(1)); 5474 _cmp(Opnd, Ctx->getConstantInt32(1));
5475 _br(Traits::Cond::Br_e, Label); 5475 _br(Traits::Cond::Br_e, Label);
5476 Context.insert(Label); 5476 Context.insert(Label);
5477 } 5477 }
5478 5478
5479 template <typename TraitsType> 5479 template <typename TraitsType>
5480 void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) { 5480 void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) {
5481 // A Load instruction can be treated the same as an Assign instruction, after 5481 // A Load instruction can be treated the same as an Assign instruction, after
5482 // the source operand is transformed into an X86OperandMem operand. Note that 5482 // the source operand is transformed into an X86OperandMem operand. Note that
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
5556 return; 5556 return;
5557 } 5557 }
5558 case BoolFolding<Traits>::PK_Fcmp: { 5558 case BoolFolding<Traits>::PK_Fcmp: {
5559 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Select); 5559 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Select);
5560 return; 5560 return;
5561 } 5561 }
5562 } 5562 }
5563 } 5563 }
5564 5564
5565 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem); 5565 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem);
5566 Operand *Zero = Ctx->getConstantZero(IceType_i32); 5566 Operand *Zero = getConstantZero(IceType_i32);
5567 _cmp(CmpResult, Zero); 5567 _cmp(CmpResult, Zero);
5568 Operand *SrcT = Select->getTrueOperand(); 5568 Operand *SrcT = Select->getTrueOperand();
5569 Operand *SrcF = Select->getFalseOperand(); 5569 Operand *SrcF = Select->getFalseOperand();
5570 const BrCond Cond = Traits::Cond::Br_ne; 5570 const BrCond Cond = Traits::Cond::Br_ne;
5571 lowerSelectMove(Dest, Cond, SrcT, SrcF); 5571 lowerSelectMove(Dest, Cond, SrcT, SrcF);
5572 } 5572 }
5573 5573
5574 template <typename TraitsType> 5574 template <typename TraitsType>
5575 void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond, 5575 void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond,
5576 Operand *SrcT, Operand *SrcF) { 5576 Operand *SrcT, Operand *SrcF) {
(...skipping 623 matching lines...) Expand 10 before | Expand all | Expand 10 after
6200 // during phi lowering assignments 6200 // during phi lowering assignments
6201 BoolFlagSaver B(RandomizationPoolingPaused, true); 6201 BoolFlagSaver B(RandomizationPoolingPaused, true);
6202 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>( 6202 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>(
6203 this, Context.getNode(), Func); 6203 this, Context.getNode(), Func);
6204 } 6204 }
6205 6205
6206 template <typename TraitsType> 6206 template <typename TraitsType>
6207 void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) { 6207 void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {
6208 uint32_t StackArgumentsSize = 0; 6208 uint32_t StackArgumentsSize = 0;
6209 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { 6209 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
6210 const char *HelperName = nullptr; 6210 RuntimeHelperFuncKind HelperName = H_Num;
6211 Variable *Dest = Arith->getDest(); 6211 Variable *Dest = Arith->getDest();
6212 Type DestTy = Dest->getType(); 6212 Type DestTy = Dest->getType();
6213 if (!Traits::Is64Bit && DestTy == IceType_i64) { 6213 if (!Traits::Is64Bit && DestTy == IceType_i64) {
6214 switch (Arith->getOp()) { 6214 switch (Arith->getOp()) {
6215 default: 6215 default:
6216 return; 6216 return;
6217 case InstArithmetic::Udiv: 6217 case InstArithmetic::Udiv:
6218 HelperName = H_udiv_i64; 6218 HelperName = H_udiv_i64;
6219 break; 6219 break;
6220 case InstArithmetic::Sdiv: 6220 case InstArithmetic::Sdiv:
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
6269 Call->addArg(Arith->getSrc(1)); 6269 Call->addArg(Arith->getSrc(1));
6270 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); 6270 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
6271 Context.insert(Call); 6271 Context.insert(Call);
6272 Arith->setDeleted(); 6272 Arith->setDeleted();
6273 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { 6273 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
6274 InstCast::OpKind CastKind = Cast->getCastKind(); 6274 InstCast::OpKind CastKind = Cast->getCastKind();
6275 Operand *Src0 = Cast->getSrc(0); 6275 Operand *Src0 = Cast->getSrc(0);
6276 const Type SrcType = Src0->getType(); 6276 const Type SrcType = Src0->getType();
6277 Variable *Dest = Cast->getDest(); 6277 Variable *Dest = Cast->getDest();
6278 const Type DestTy = Dest->getType(); 6278 const Type DestTy = Dest->getType();
6279 const char *HelperName = nullptr; 6279 RuntimeHelperFuncKind HelperName = H_Num;
6280 Variable *CallDest = Dest; 6280 Variable *CallDest = Dest;
6281 switch (CastKind) { 6281 switch (CastKind) {
6282 default: 6282 default:
6283 return; 6283 return;
6284 case InstCast::Fptosi: 6284 case InstCast::Fptosi:
6285 if (!Traits::Is64Bit && DestTy == IceType_i64) { 6285 if (!Traits::Is64Bit && DestTy == IceType_i64) {
6286 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 6286 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
6287 : H_fptosi_f64_i64; 6287 : H_fptosi_f64_i64;
6288 } else { 6288 } else {
6289 return; 6289 return;
(...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after
6494 Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty, 6494 Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty,
6495 RegNumT RegNum) { 6495 RegNumT RegNum) {
6496 Variable *Reg = makeReg(Ty, RegNum); 6496 Variable *Reg = makeReg(Ty, RegNum);
6497 switch (Ty) { 6497 switch (Ty) {
6498 case IceType_i1: 6498 case IceType_i1:
6499 case IceType_i8: 6499 case IceType_i8:
6500 case IceType_i16: 6500 case IceType_i16:
6501 case IceType_i32: 6501 case IceType_i32:
6502 case IceType_i64: 6502 case IceType_i64:
6503 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. 6503 // Conservatively do "mov reg, 0" to avoid modifying FLAGS.
6504 _mov(Reg, Ctx->getConstantZero(Ty)); 6504 _mov(Reg, getConstantZero(Ty));
6505 break; 6505 break;
6506 case IceType_f32: 6506 case IceType_f32:
6507 case IceType_f64: 6507 case IceType_f64:
6508 Context.insert<InstFakeDef>(Reg); 6508 Context.insert<InstFakeDef>(Reg);
6509 _xorps(Reg, Reg); 6509 _xorps(Reg, Reg);
6510 break; 6510 break;
6511 default: 6511 default:
6512 // All vector types use the same pxor instruction. 6512 // All vector types use the same pxor instruction.
6513 assert(isVectorType(Ty)); 6513 assert(isVectorType(Ty));
6514 Context.insert<InstFakeDef>(Reg); 6514 Context.insert<InstFakeDef>(Reg);
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
6566 SizeT Shift = 6566 SizeT Shift =
6567 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; 6567 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
6568 _psll(Reg, Ctx->getConstantInt8(Shift)); 6568 _psll(Reg, Ctx->getConstantInt8(Shift));
6569 return Reg; 6569 return Reg;
6570 } else { 6570 } else {
6571 // SSE has no left shift operation for vectors of 8 bit integers. 6571 // SSE has no left shift operation for vectors of 8 bit integers.
6572 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; 6572 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
6573 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); 6573 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
6574 Variable *Reg = makeReg(Ty, RegNum); 6574 Variable *Reg = makeReg(Ty, RegNum);
6575 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); 6575 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
6576 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); 6576 _pshufd(Reg, Reg, getConstantZero(IceType_i8));
6577 return Reg; 6577 return Reg;
6578 } 6578 }
6579 } 6579 }
6580 6580
6581 /// Construct a mask in a register that can be and'ed with a floating-point 6581 /// Construct a mask in a register that can be and'ed with a floating-point
6582 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 6582 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32
6583 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of 6583 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of
6584 /// ones logically right shifted one bit. 6584 /// ones logically right shifted one bit.
6585 // TODO(stichnot): Fix the wala 6585 // TODO(stichnot): Fix the wala
6586 // TODO: above, to represent vector constants in memory. 6586 // TODO: above, to represent vector constants in memory.
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after
6878 // 6878 //
6879 // If in the future the implementation is changed to lower undef values to 6879 // If in the future the implementation is changed to lower undef values to
6880 // uninitialized registers, a FakeDef will be needed: 6880 // uninitialized registers, a FakeDef will be needed:
6881 // Context.insert<InstFakeDef>(Reg); 6881 // Context.insert<InstFakeDef>(Reg);
6882 // This is in order to ensure that the live range of Reg is not 6882 // This is in order to ensure that the live range of Reg is not
6883 // overestimated. If the constant being lowered is a 64 bit value, then 6883 // overestimated. If the constant being lowered is a 64 bit value, then
6884 // the result should be split and the lo and hi components will need to go 6884 // the result should be split and the lo and hi components will need to go
6885 // in uninitialized registers. 6885 // in uninitialized registers.
6886 if (isVectorType(Ty)) 6886 if (isVectorType(Ty))
6887 return makeVectorOfZeros(Ty, RegNum); 6887 return makeVectorOfZeros(Ty, RegNum);
6888 return Ctx->getConstantZero(Ty); 6888 return getConstantZero(Ty);
6889 } 6889 }
6890 return From; 6890 return From;
6891 } 6891 }
6892 6892
6893 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical 6893 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical
6894 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be 6894 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be
6895 /// copied into a physical register. (Actually, either Src0 or Src1 can be 6895 /// copied into a physical register. (Actually, either Src0 or Src1 can be
6896 /// chosen for the physical register, but unfortunately we have to commit to one 6896 /// chosen for the physical register, but unfortunately we have to commit to one
6897 /// or the other before register allocation.) 6897 /// or the other before register allocation.)
6898 template <typename TraitsType> 6898 template <typename TraitsType>
(...skipping 501 matching lines...) Expand 10 before | Expand all | Expand 10 after
7400 emitGlobal(*Var, SectionSuffix); 7400 emitGlobal(*Var, SectionSuffix);
7401 } 7401 }
7402 } 7402 }
7403 } break; 7403 } break;
7404 } 7404 }
7405 } 7405 }
7406 } // end of namespace X86NAMESPACE 7406 } // end of namespace X86NAMESPACE
7407 } // end of namespace Ice 7407 } // end of namespace Ice
7408 7408
7409 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 7409 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« src/IceTargetLoweringARM32.cpp ('K') | « src/IceTargetLoweringX8664.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698