| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 1507 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1518 return false; | 1518 return false; |
| 1519 Type Ty = Dest->getType(); | 1519 Type Ty = Dest->getType(); |
| 1520 if (Src1 == -1) { | 1520 if (Src1 == -1) { |
| 1521 Variable *T = nullptr; | 1521 Variable *T = nullptr; |
| 1522 _mov(T, Src0); | 1522 _mov(T, Src0); |
| 1523 _neg(T); | 1523 _neg(T); |
| 1524 _mov(Dest, T); | 1524 _mov(Dest, T); |
| 1525 return true; | 1525 return true; |
| 1526 } | 1526 } |
| 1527 if (Src1 == 0) { | 1527 if (Src1 == 0) { |
| 1528 _mov(Dest, Ctx->getConstantZero(Ty)); | 1528 _mov(Dest, getConstantZero(Ty)); |
| 1529 return true; | 1529 return true; |
| 1530 } | 1530 } |
| 1531 if (Src1 == 1) { | 1531 if (Src1 == 1) { |
| 1532 Variable *T = nullptr; | 1532 Variable *T = nullptr; |
| 1533 _mov(T, Src0); | 1533 _mov(T, Src0); |
| 1534 _mov(Dest, T); | 1534 _mov(Dest, T); |
| 1535 return true; | 1535 return true; |
| 1536 } | 1536 } |
| 1537 // Don't bother with the edge case where Src1 == MININT. | 1537 // Don't bother with the edge case where Src1 == MININT. |
| 1538 if (Src1 == -Src1) | 1538 if (Src1 == -Src1) |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1575 // somewhat arbitrary choice of 3. | 1575 // somewhat arbitrary choice of 3. |
| 1576 constexpr uint32_t MaxOpsForOptimizedMul = 3; | 1576 constexpr uint32_t MaxOpsForOptimizedMul = 3; |
| 1577 if (CountOps > MaxOpsForOptimizedMul) | 1577 if (CountOps > MaxOpsForOptimizedMul) |
| 1578 return false; | 1578 return false; |
| 1579 Variable *T = makeReg(Traits::WordType); | 1579 Variable *T = makeReg(Traits::WordType); |
| 1580 if (typeWidthInBytes(Src0->getType()) < typeWidthInBytes(T->getType())) { | 1580 if (typeWidthInBytes(Src0->getType()) < typeWidthInBytes(T->getType())) { |
| 1581 _movzx(T, Src0); | 1581 _movzx(T, Src0); |
| 1582 } else { | 1582 } else { |
| 1583 _mov(T, Src0); | 1583 _mov(T, Src0); |
| 1584 } | 1584 } |
| 1585 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1585 Constant *Zero = getConstantZero(IceType_i32); |
| 1586 for (uint32_t i = 0; i < Count9; ++i) { | 1586 for (uint32_t i = 0; i < Count9; ++i) { |
| 1587 constexpr uint16_t Shift = 3; // log2(9-1) | 1587 constexpr uint16_t Shift = 3; // log2(9-1) |
| 1588 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1588 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
| 1589 } | 1589 } |
| 1590 for (uint32_t i = 0; i < Count5; ++i) { | 1590 for (uint32_t i = 0; i < Count5; ++i) { |
| 1591 constexpr uint16_t Shift = 2; // log2(5-1) | 1591 constexpr uint16_t Shift = 2; // log2(5-1) |
| 1592 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1592 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
| 1593 } | 1593 } |
| 1594 for (uint32_t i = 0; i < Count3; ++i) { | 1594 for (uint32_t i = 0; i < Count3; ++i) { |
| 1595 constexpr uint16_t Shift = 1; // log2(3-1) | 1595 constexpr uint16_t Shift = 1; // log2(3-1) |
| 1596 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1596 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
| 1597 } | 1597 } |
| 1598 if (Count2) { | 1598 if (Count2) { |
| 1599 _shl(T, Ctx->getConstantInt(Ty, Count2)); | 1599 _shl(T, Ctx->getConstantInt(Ty, Count2)); |
| 1600 } | 1600 } |
| 1601 if (Src1IsNegative) | 1601 if (Src1IsNegative) |
| 1602 _neg(T); | 1602 _neg(T); |
| 1603 _mov(Dest, T); | 1603 _mov(Dest, T); |
| 1604 return true; | 1604 return true; |
| 1605 } | 1605 } |
| 1606 | 1606 |
| 1607 template <typename TraitsType> | 1607 template <typename TraitsType> |
| 1608 void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op, | 1608 void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op, |
| 1609 Operand *Src0Lo, Operand *Src0Hi, | 1609 Operand *Src0Lo, Operand *Src0Hi, |
| 1610 Operand *Src1Lo, Variable *DestLo, | 1610 Operand *Src1Lo, Variable *DestLo, |
| 1611 Variable *DestHi) { | 1611 Variable *DestHi) { |
| 1612 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. | 1612 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. |
| 1613 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; | 1613 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| 1614 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1614 Constant *Zero = getConstantZero(IceType_i32); |
| 1615 Constant *SignExtend = Ctx->getConstantInt32(0x1f); | 1615 Constant *SignExtend = Ctx->getConstantInt32(0x1f); |
| 1616 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { | 1616 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
| 1617 uint32_t ShiftAmount = ConstantShiftAmount->getValue(); | 1617 uint32_t ShiftAmount = ConstantShiftAmount->getValue(); |
| 1618 if (ShiftAmount > 32) { | 1618 if (ShiftAmount > 32) { |
| 1619 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32); | 1619 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32); |
| 1620 switch (Op) { | 1620 switch (Op) { |
| 1621 default: | 1621 default: |
| 1622 assert(0 && "non-shift op"); | 1622 assert(0 && "non-shift op"); |
| 1623 break; | 1623 break; |
| 1624 case InstArithmetic::Shl: { | 1624 case InstArithmetic::Shl: { |
| (...skipping 569 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2194 Eax = Traits::RegisterSet::Reg_ax; | 2194 Eax = Traits::RegisterSet::Reg_ax; |
| 2195 Edx = Traits::RegisterSet::Reg_dx; | 2195 Edx = Traits::RegisterSet::Reg_dx; |
| 2196 break; | 2196 break; |
| 2197 case IceType_i8: | 2197 case IceType_i8: |
| 2198 Eax = Traits::RegisterSet::Reg_al; | 2198 Eax = Traits::RegisterSet::Reg_al; |
| 2199 Edx = Traits::RegisterSet::Reg_ah; | 2199 Edx = Traits::RegisterSet::Reg_ah; |
| 2200 break; | 2200 break; |
| 2201 } | 2201 } |
| 2202 T_edx = makeReg(Ty, Edx); | 2202 T_edx = makeReg(Ty, Edx); |
| 2203 _mov(T, Src0, Eax); | 2203 _mov(T, Src0, Eax); |
| 2204 _mov(T_edx, Ctx->getConstantZero(Ty)); | 2204 _mov(T_edx, getConstantZero(Ty)); |
| 2205 _div(T, Src1, T_edx); | 2205 _div(T, Src1, T_edx); |
| 2206 _mov(Dest, T); | 2206 _mov(Dest, T); |
| 2207 } break; | 2207 } break; |
| 2208 case InstArithmetic::Sdiv: | 2208 case InstArithmetic::Sdiv: |
| 2209 // TODO(stichnot): Enable this after doing better performance and cross | 2209 // TODO(stichnot): Enable this after doing better performance and cross |
| 2210 // testing. | 2210 // testing. |
| 2211 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 2211 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
| 2212 // Optimize division by constant power of 2, but not for Om1 or O0, just | 2212 // Optimize division by constant power of 2, but not for Om1 or O0, just |
| 2213 // to keep things simple there. | 2213 // to keep things simple there. |
| 2214 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 2214 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2283 case IceType_i16: | 2283 case IceType_i16: |
| 2284 Eax = Traits::RegisterSet::Reg_ax; | 2284 Eax = Traits::RegisterSet::Reg_ax; |
| 2285 Edx = Traits::RegisterSet::Reg_dx; | 2285 Edx = Traits::RegisterSet::Reg_dx; |
| 2286 break; | 2286 break; |
| 2287 case IceType_i8: | 2287 case IceType_i8: |
| 2288 Eax = Traits::RegisterSet::Reg_al; | 2288 Eax = Traits::RegisterSet::Reg_al; |
| 2289 Edx = Traits::RegisterSet::Reg_ah; | 2289 Edx = Traits::RegisterSet::Reg_ah; |
| 2290 break; | 2290 break; |
| 2291 } | 2291 } |
| 2292 T_edx = makeReg(Ty, Edx); | 2292 T_edx = makeReg(Ty, Edx); |
| 2293 _mov(T_edx, Ctx->getConstantZero(Ty)); | 2293 _mov(T_edx, getConstantZero(Ty)); |
| 2294 _mov(T, Src0, Eax); | 2294 _mov(T, Src0, Eax); |
| 2295 _div(T_edx, Src1, T); | 2295 _div(T_edx, Src1, T); |
| 2296 _mov(Dest, T_edx); | 2296 _mov(Dest, T_edx); |
| 2297 } break; | 2297 } break; |
| 2298 case InstArithmetic::Srem: { | 2298 case InstArithmetic::Srem: { |
| 2299 // TODO(stichnot): Enable this after doing better performance and cross | 2299 // TODO(stichnot): Enable this after doing better performance and cross |
| 2300 // testing. | 2300 // testing. |
| 2301 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 2301 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
| 2302 // Optimize mod by constant power of 2, but not for Om1 or O0, just to | 2302 // Optimize mod by constant power of 2, but not for Om1 or O0, just to |
| 2303 // keep things simple there. | 2303 // keep things simple there. |
| 2304 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 2304 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| 2305 const int32_t Divisor = C->getValue(); | 2305 const int32_t Divisor = C->getValue(); |
| 2306 const uint32_t UDivisor = Divisor; | 2306 const uint32_t UDivisor = Divisor; |
| 2307 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { | 2307 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { |
| 2308 uint32_t LogDiv = llvm::Log2_32(UDivisor); | 2308 uint32_t LogDiv = llvm::Log2_32(UDivisor); |
| 2309 // LLVM does the following for dest=src%(1<<log): | 2309 // LLVM does the following for dest=src%(1<<log): |
| 2310 // t=src | 2310 // t=src |
| 2311 // sar t,typewidth-1 // -1 if src is negative, 0 if not | 2311 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
| 2312 // shr t,typewidth-log | 2312 // shr t,typewidth-log |
| 2313 // add t,src | 2313 // add t,src |
| 2314 // and t, -(1<<log) | 2314 // and t, -(1<<log) |
| 2315 // sub t,src | 2315 // sub t,src |
| 2316 // neg t | 2316 // neg t |
| 2317 // dest=t | 2317 // dest=t |
| 2318 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); | 2318 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); |
| 2319 // If for some reason we are dividing by 1, just assign 0. | 2319 // If for some reason we are dividing by 1, just assign 0. |
| 2320 if (LogDiv == 0) { | 2320 if (LogDiv == 0) { |
| 2321 _mov(Dest, Ctx->getConstantZero(Ty)); | 2321 _mov(Dest, getConstantZero(Ty)); |
| 2322 return; | 2322 return; |
| 2323 } | 2323 } |
| 2324 _mov(T, Src0); | 2324 _mov(T, Src0); |
| 2325 // The initial sar is unnecessary when dividing by 2. | 2325 // The initial sar is unnecessary when dividing by 2. |
| 2326 if (LogDiv > 1) | 2326 if (LogDiv > 1) |
| 2327 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); | 2327 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); |
| 2328 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); | 2328 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); |
| 2329 _add(T, Src0); | 2329 _add(T, Src0); |
| 2330 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); | 2330 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); |
| 2331 _sub(T, Src0); | 2331 _sub(T, Src0); |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2425 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Br); | 2425 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Br); |
| 2426 return; | 2426 return; |
| 2427 } | 2427 } |
| 2428 case BoolFolding<Traits>::PK_Arith: { | 2428 case BoolFolding<Traits>::PK_Arith: { |
| 2429 lowerArithAndConsumer(llvm::cast<InstArithmetic>(Producer), Br); | 2429 lowerArithAndConsumer(llvm::cast<InstArithmetic>(Producer), Br); |
| 2430 return; | 2430 return; |
| 2431 } | 2431 } |
| 2432 } | 2432 } |
| 2433 } | 2433 } |
| 2434 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); | 2434 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
| 2435 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2435 Constant *Zero = getConstantZero(IceType_i32); |
| 2436 _cmp(Src0, Zero); | 2436 _cmp(Src0, Zero); |
| 2437 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 2437 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
| 2438 } | 2438 } |
| 2439 | 2439 |
| 2440 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining | 2440 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining |
| 2441 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't. | 2441 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't. |
| 2442 inline constexpr SizeT constexprMax(SizeT S0, SizeT S1) { | 2442 inline constexpr SizeT constexprMax(SizeT S0, SizeT S1) { |
| 2443 return S0 < S1 ? S1 : S0; | 2443 return S0 < S1 ? S1 : S0; |
| 2444 } | 2444 } |
| 2445 | 2445 |
| (...skipping 278 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2724 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem); | 2724 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem); |
| 2725 if (isVectorType(DestTy)) { | 2725 if (isVectorType(DestTy)) { |
| 2726 // onemask = materialize(1,1,...); dest = onemask & src | 2726 // onemask = materialize(1,1,...); dest = onemask & src |
| 2727 Variable *OneMask = makeVectorOfOnes(DestTy); | 2727 Variable *OneMask = makeVectorOfOnes(DestTy); |
| 2728 Variable *T = makeReg(DestTy); | 2728 Variable *T = makeReg(DestTy); |
| 2729 _movp(T, Src0RM); | 2729 _movp(T, Src0RM); |
| 2730 _pand(T, OneMask); | 2730 _pand(T, OneMask); |
| 2731 _movp(Dest, T); | 2731 _movp(Dest, T); |
| 2732 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { | 2732 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 2733 // t1=movzx src; dst.lo=t1; dst.hi=0 | 2733 // t1=movzx src; dst.lo=t1; dst.hi=0 |
| 2734 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2734 Constant *Zero = getConstantZero(IceType_i32); |
| 2735 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2735 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 2736 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2736 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 2737 Variable *Tmp = makeReg(DestLo->getType()); | 2737 Variable *Tmp = makeReg(DestLo->getType()); |
| 2738 if (Src0RM->getType() == IceType_i32) { | 2738 if (Src0RM->getType() == IceType_i32) { |
| 2739 _mov(Tmp, Src0RM); | 2739 _mov(Tmp, Src0RM); |
| 2740 } else { | 2740 } else { |
| 2741 _movzx(Tmp, Src0RM); | 2741 _movzx(Tmp, Src0RM); |
| 2742 } | 2742 } |
| 2743 _mov(DestLo, Tmp); | 2743 _mov(DestLo, Tmp); |
| 2744 _mov(DestHi, Zero); | 2744 _mov(DestHi, Zero); |
| (...skipping 1161 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3906 Operand *ByteSize = Instr->getArg(0); | 3906 Operand *ByteSize = Instr->getArg(0); |
| 3907 Variable *Dest = Instr->getDest(); | 3907 Variable *Dest = Instr->getDest(); |
| 3908 if (auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) { | 3908 if (auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) { |
| 3909 Constant *Result; | 3909 Constant *Result; |
| 3910 switch (CI->getValue()) { | 3910 switch (CI->getValue()) { |
| 3911 default: | 3911 default: |
| 3912 // Some x86-64 processors support the cmpxchg16b instruction, which can | 3912 // Some x86-64 processors support the cmpxchg16b instruction, which can |
| 3913 // make 16-byte operations lock free (when used with the LOCK prefix). | 3913 // make 16-byte operations lock free (when used with the LOCK prefix). |
| 3914 // However, that's not supported in 32-bit mode, so just return 0 even | 3914 // However, that's not supported in 32-bit mode, so just return 0 even |
| 3915 // for large sizes. | 3915 // for large sizes. |
| 3916 Result = Ctx->getConstantZero(IceType_i32); | 3916 Result = getConstantZero(IceType_i32); |
| 3917 break; | 3917 break; |
| 3918 case 1: | 3918 case 1: |
| 3919 case 2: | 3919 case 2: |
| 3920 case 4: | 3920 case 4: |
| 3921 case 8: | 3921 case 8: |
| 3922 Result = Ctx->getConstantInt32(1); | 3922 Result = Ctx->getConstantInt32(1); |
| 3923 break; | 3923 break; |
| 3924 } | 3924 } |
| 3925 _mov(Dest, Result); | 3925 _mov(Dest, Result); |
| 3926 return; | 3926 return; |
| (...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4065 lowerCall(Call); | 4065 lowerCall(Call); |
| 4066 // The popcount helpers always return 32-bit values, while the intrinsic's | 4066 // The popcount helpers always return 32-bit values, while the intrinsic's |
| 4067 // signature matches the native POPCNT instruction and fills a 64-bit reg | 4067 // signature matches the native POPCNT instruction and fills a 64-bit reg |
| 4068 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case | 4068 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case |
| 4069 // the user doesn't do that in the IR. If the user does that in the IR, | 4069 // the user doesn't do that in the IR. If the user does that in the IR, |
| 4070 // then this zero'ing instruction is dead and gets optimized out. | 4070 // then this zero'ing instruction is dead and gets optimized out. |
| 4071 if (!Traits::Is64Bit) { | 4071 if (!Traits::Is64Bit) { |
| 4072 assert(T == Dest); | 4072 assert(T == Dest); |
| 4073 if (Val->getType() == IceType_i64) { | 4073 if (Val->getType() == IceType_i64) { |
| 4074 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 4074 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 4075 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 4075 Constant *Zero = getConstantZero(IceType_i32); |
| 4076 _mov(DestHi, Zero); | 4076 _mov(DestHi, Zero); |
| 4077 } | 4077 } |
| 4078 } else { | 4078 } else { |
| 4079 assert(Val->getType() == IceType_i64); | 4079 assert(Val->getType() == IceType_i64); |
| 4080 // T is 64 bit. It needs to be copied to dest. We need to: | 4080 // T is 64 bit. It needs to be copied to dest. We need to: |
| 4081 // | 4081 // |
| 4082 // T_1.32 = trunc T.64 to i32 | 4082 // T_1.32 = trunc T.64 to i32 |
| 4083 // T_2.64 = zext T_1.32 to i64 | 4083 // T_2.64 = zext T_1.32 to i64 |
| 4084 // Dest.<<right_size>> = T_2.<<right_size>> | 4084 // Dest.<<right_size>> = T_2.<<right_size>> |
| 4085 // | 4085 // |
| (...skipping 570 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4656 Variable *T_Dest2 = makeReg(IceType_i32); | 4656 Variable *T_Dest2 = makeReg(IceType_i32); |
| 4657 if (Cttz) { | 4657 if (Cttz) { |
| 4658 _bsf(T_Dest2, SecondVar); | 4658 _bsf(T_Dest2, SecondVar); |
| 4659 } else { | 4659 } else { |
| 4660 _bsr(T_Dest2, SecondVar); | 4660 _bsr(T_Dest2, SecondVar); |
| 4661 _xor(T_Dest2, _31); | 4661 _xor(T_Dest2, _31); |
| 4662 } | 4662 } |
| 4663 _test(SecondVar, SecondVar); | 4663 _test(SecondVar, SecondVar); |
| 4664 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); | 4664 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
| 4665 _mov(DestLo, T_Dest2); | 4665 _mov(DestLo, T_Dest2); |
| 4666 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 4666 _mov(DestHi, getConstantZero(IceType_i32)); |
| 4667 } | 4667 } |
| 4668 | 4668 |
| 4669 template <typename TraitsType> | 4669 template <typename TraitsType> |
| 4670 void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest, | 4670 void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest, |
| 4671 Variable *Base, Constant *Offset) { | 4671 Variable *Base, Constant *Offset) { |
| 4672 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to | 4672 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to |
| 4673 // legalize Mem properly. | 4673 // legalize Mem properly. |
| 4674 if (Offset) | 4674 if (Offset) |
| 4675 assert(!llvm::isa<ConstantRelocatable>(Offset)); | 4675 assert(!llvm::isa<ConstantRelocatable>(Offset)); |
| 4676 | 4676 |
| (...skipping 785 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5462 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); | 5462 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); |
| 5463 if (Var == nullptr) | 5463 if (Var == nullptr) |
| 5464 return; | 5464 return; |
| 5465 // We use lowerStore() to copy out-args onto the stack. This creates a memory | 5465 // We use lowerStore() to copy out-args onto the stack. This creates a memory |
| 5466 // operand with the stack pointer as the base register. Don't do bounds | 5466 // operand with the stack pointer as the base register. Don't do bounds |
| 5467 // checks on that. | 5467 // checks on that. |
| 5468 if (Var->getRegNum() == getStackReg()) | 5468 if (Var->getRegNum() == getStackReg()) |
| 5469 return; | 5469 return; |
| 5470 | 5470 |
| 5471 auto *Label = InstX86Label::create(Func, this); | 5471 auto *Label = InstX86Label::create(Func, this); |
| 5472 _cmp(Opnd, Ctx->getConstantZero(IceType_i32)); | 5472 _cmp(Opnd, getConstantZero(IceType_i32)); |
| 5473 _br(Traits::Cond::Br_e, Label); | 5473 _br(Traits::Cond::Br_e, Label); |
| 5474 _cmp(Opnd, Ctx->getConstantInt32(1)); | 5474 _cmp(Opnd, Ctx->getConstantInt32(1)); |
| 5475 _br(Traits::Cond::Br_e, Label); | 5475 _br(Traits::Cond::Br_e, Label); |
| 5476 Context.insert(Label); | 5476 Context.insert(Label); |
| 5477 } | 5477 } |
| 5478 | 5478 |
| 5479 template <typename TraitsType> | 5479 template <typename TraitsType> |
| 5480 void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) { | 5480 void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) { |
| 5481 // A Load instruction can be treated the same as an Assign instruction, after | 5481 // A Load instruction can be treated the same as an Assign instruction, after |
| 5482 // the source operand is transformed into an X86OperandMem operand. Note that | 5482 // the source operand is transformed into an X86OperandMem operand. Note that |
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5556 return; | 5556 return; |
| 5557 } | 5557 } |
| 5558 case BoolFolding<Traits>::PK_Fcmp: { | 5558 case BoolFolding<Traits>::PK_Fcmp: { |
| 5559 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Select); | 5559 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Select); |
| 5560 return; | 5560 return; |
| 5561 } | 5561 } |
| 5562 } | 5562 } |
| 5563 } | 5563 } |
| 5564 | 5564 |
| 5565 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem); | 5565 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem); |
| 5566 Operand *Zero = Ctx->getConstantZero(IceType_i32); | 5566 Operand *Zero = getConstantZero(IceType_i32); |
| 5567 _cmp(CmpResult, Zero); | 5567 _cmp(CmpResult, Zero); |
| 5568 Operand *SrcT = Select->getTrueOperand(); | 5568 Operand *SrcT = Select->getTrueOperand(); |
| 5569 Operand *SrcF = Select->getFalseOperand(); | 5569 Operand *SrcF = Select->getFalseOperand(); |
| 5570 const BrCond Cond = Traits::Cond::Br_ne; | 5570 const BrCond Cond = Traits::Cond::Br_ne; |
| 5571 lowerSelectMove(Dest, Cond, SrcT, SrcF); | 5571 lowerSelectMove(Dest, Cond, SrcT, SrcF); |
| 5572 } | 5572 } |
| 5573 | 5573 |
| 5574 template <typename TraitsType> | 5574 template <typename TraitsType> |
| 5575 void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond, | 5575 void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond, |
| 5576 Operand *SrcT, Operand *SrcF) { | 5576 Operand *SrcT, Operand *SrcF) { |
| (...skipping 623 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6200 // during phi lowering assignments | 6200 // during phi lowering assignments |
| 6201 BoolFlagSaver B(RandomizationPoolingPaused, true); | 6201 BoolFlagSaver B(RandomizationPoolingPaused, true); |
| 6202 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>( | 6202 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>( |
| 6203 this, Context.getNode(), Func); | 6203 this, Context.getNode(), Func); |
| 6204 } | 6204 } |
| 6205 | 6205 |
| 6206 template <typename TraitsType> | 6206 template <typename TraitsType> |
| 6207 void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) { | 6207 void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) { |
| 6208 uint32_t StackArgumentsSize = 0; | 6208 uint32_t StackArgumentsSize = 0; |
| 6209 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { | 6209 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { |
| 6210 const char *HelperName = nullptr; | 6210 RuntimeHelperFuncKind HelperName = H_Num; |
| 6211 Variable *Dest = Arith->getDest(); | 6211 Variable *Dest = Arith->getDest(); |
| 6212 Type DestTy = Dest->getType(); | 6212 Type DestTy = Dest->getType(); |
| 6213 if (!Traits::Is64Bit && DestTy == IceType_i64) { | 6213 if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 6214 switch (Arith->getOp()) { | 6214 switch (Arith->getOp()) { |
| 6215 default: | 6215 default: |
| 6216 return; | 6216 return; |
| 6217 case InstArithmetic::Udiv: | 6217 case InstArithmetic::Udiv: |
| 6218 HelperName = H_udiv_i64; | 6218 HelperName = H_udiv_i64; |
| 6219 break; | 6219 break; |
| 6220 case InstArithmetic::Sdiv: | 6220 case InstArithmetic::Sdiv: |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6269 Call->addArg(Arith->getSrc(1)); | 6269 Call->addArg(Arith->getSrc(1)); |
| 6270 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); | 6270 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); |
| 6271 Context.insert(Call); | 6271 Context.insert(Call); |
| 6272 Arith->setDeleted(); | 6272 Arith->setDeleted(); |
| 6273 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 6273 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
| 6274 InstCast::OpKind CastKind = Cast->getCastKind(); | 6274 InstCast::OpKind CastKind = Cast->getCastKind(); |
| 6275 Operand *Src0 = Cast->getSrc(0); | 6275 Operand *Src0 = Cast->getSrc(0); |
| 6276 const Type SrcType = Src0->getType(); | 6276 const Type SrcType = Src0->getType(); |
| 6277 Variable *Dest = Cast->getDest(); | 6277 Variable *Dest = Cast->getDest(); |
| 6278 const Type DestTy = Dest->getType(); | 6278 const Type DestTy = Dest->getType(); |
| 6279 const char *HelperName = nullptr; | 6279 RuntimeHelperFuncKind HelperName = H_Num; |
| 6280 Variable *CallDest = Dest; | 6280 Variable *CallDest = Dest; |
| 6281 switch (CastKind) { | 6281 switch (CastKind) { |
| 6282 default: | 6282 default: |
| 6283 return; | 6283 return; |
| 6284 case InstCast::Fptosi: | 6284 case InstCast::Fptosi: |
| 6285 if (!Traits::Is64Bit && DestTy == IceType_i64) { | 6285 if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 6286 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | 6286 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 |
| 6287 : H_fptosi_f64_i64; | 6287 : H_fptosi_f64_i64; |
| 6288 } else { | 6288 } else { |
| 6289 return; | 6289 return; |
| (...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6494 Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty, | 6494 Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty, |
| 6495 RegNumT RegNum) { | 6495 RegNumT RegNum) { |
| 6496 Variable *Reg = makeReg(Ty, RegNum); | 6496 Variable *Reg = makeReg(Ty, RegNum); |
| 6497 switch (Ty) { | 6497 switch (Ty) { |
| 6498 case IceType_i1: | 6498 case IceType_i1: |
| 6499 case IceType_i8: | 6499 case IceType_i8: |
| 6500 case IceType_i16: | 6500 case IceType_i16: |
| 6501 case IceType_i32: | 6501 case IceType_i32: |
| 6502 case IceType_i64: | 6502 case IceType_i64: |
| 6503 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. | 6503 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. |
| 6504 _mov(Reg, Ctx->getConstantZero(Ty)); | 6504 _mov(Reg, getConstantZero(Ty)); |
| 6505 break; | 6505 break; |
| 6506 case IceType_f32: | 6506 case IceType_f32: |
| 6507 case IceType_f64: | 6507 case IceType_f64: |
| 6508 Context.insert<InstFakeDef>(Reg); | 6508 Context.insert<InstFakeDef>(Reg); |
| 6509 _xorps(Reg, Reg); | 6509 _xorps(Reg, Reg); |
| 6510 break; | 6510 break; |
| 6511 default: | 6511 default: |
| 6512 // All vector types use the same pxor instruction. | 6512 // All vector types use the same pxor instruction. |
| 6513 assert(isVectorType(Ty)); | 6513 assert(isVectorType(Ty)); |
| 6514 Context.insert<InstFakeDef>(Reg); | 6514 Context.insert<InstFakeDef>(Reg); |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6566 SizeT Shift = | 6566 SizeT Shift = |
| 6567 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; | 6567 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; |
| 6568 _psll(Reg, Ctx->getConstantInt8(Shift)); | 6568 _psll(Reg, Ctx->getConstantInt8(Shift)); |
| 6569 return Reg; | 6569 return Reg; |
| 6570 } else { | 6570 } else { |
| 6571 // SSE has no left shift operation for vectors of 8 bit integers. | 6571 // SSE has no left shift operation for vectors of 8 bit integers. |
| 6572 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 6572 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
| 6573 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); | 6573 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); |
| 6574 Variable *Reg = makeReg(Ty, RegNum); | 6574 Variable *Reg = makeReg(Ty, RegNum); |
| 6575 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | 6575 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
| 6576 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | 6576 _pshufd(Reg, Reg, getConstantZero(IceType_i8)); |
| 6577 return Reg; | 6577 return Reg; |
| 6578 } | 6578 } |
| 6579 } | 6579 } |
| 6580 | 6580 |
| 6581 /// Construct a mask in a register that can be and'ed with a floating-point | 6581 /// Construct a mask in a register that can be and'ed with a floating-point |
| 6582 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 | 6582 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 |
| 6583 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of | 6583 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of |
| 6584 /// ones logically right shifted one bit. | 6584 /// ones logically right shifted one bit. |
| 6585 // TODO(stichnot): Fix the wala | 6585 // TODO(stichnot): Fix the wala |
| 6586 // TODO: above, to represent vector constants in memory. | 6586 // TODO: above, to represent vector constants in memory. |
| (...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6878 // | 6878 // |
| 6879 // If in the future the implementation is changed to lower undef values to | 6879 // If in the future the implementation is changed to lower undef values to |
| 6880 // uninitialized registers, a FakeDef will be needed: | 6880 // uninitialized registers, a FakeDef will be needed: |
| 6881 // Context.insert<InstFakeDef>(Reg); | 6881 // Context.insert<InstFakeDef>(Reg); |
| 6882 // This is in order to ensure that the live range of Reg is not | 6882 // This is in order to ensure that the live range of Reg is not |
| 6883 // overestimated. If the constant being lowered is a 64 bit value, then | 6883 // overestimated. If the constant being lowered is a 64 bit value, then |
| 6884 // the result should be split and the lo and hi components will need to go | 6884 // the result should be split and the lo and hi components will need to go |
| 6885 // in uninitialized registers. | 6885 // in uninitialized registers. |
| 6886 if (isVectorType(Ty)) | 6886 if (isVectorType(Ty)) |
| 6887 return makeVectorOfZeros(Ty, RegNum); | 6887 return makeVectorOfZeros(Ty, RegNum); |
| 6888 return Ctx->getConstantZero(Ty); | 6888 return getConstantZero(Ty); |
| 6889 } | 6889 } |
| 6890 return From; | 6890 return From; |
| 6891 } | 6891 } |
| 6892 | 6892 |
| 6893 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical | 6893 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical |
| 6894 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be | 6894 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be |
| 6895 /// copied into a physical register. (Actually, either Src0 or Src1 can be | 6895 /// copied into a physical register. (Actually, either Src0 or Src1 can be |
| 6896 /// chosen for the physical register, but unfortunately we have to commit to one | 6896 /// chosen for the physical register, but unfortunately we have to commit to one |
| 6897 /// or the other before register allocation.) | 6897 /// or the other before register allocation.) |
| 6898 template <typename TraitsType> | 6898 template <typename TraitsType> |
| (...skipping 501 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 7400 emitGlobal(*Var, SectionSuffix); | 7400 emitGlobal(*Var, SectionSuffix); |
| 7401 } | 7401 } |
| 7402 } | 7402 } |
| 7403 } break; | 7403 } break; |
| 7404 } | 7404 } |
| 7405 } | 7405 } |
| 7406 } // end of namespace X86NAMESPACE | 7406 } // end of namespace X86NAMESPACE |
| 7407 } // end of namespace Ice | 7407 } // end of namespace Ice |
| 7408 | 7408 |
| 7409 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7409 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |