OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 1507 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1518 return false; | 1518 return false; |
1519 Type Ty = Dest->getType(); | 1519 Type Ty = Dest->getType(); |
1520 if (Src1 == -1) { | 1520 if (Src1 == -1) { |
1521 Variable *T = nullptr; | 1521 Variable *T = nullptr; |
1522 _mov(T, Src0); | 1522 _mov(T, Src0); |
1523 _neg(T); | 1523 _neg(T); |
1524 _mov(Dest, T); | 1524 _mov(Dest, T); |
1525 return true; | 1525 return true; |
1526 } | 1526 } |
1527 if (Src1 == 0) { | 1527 if (Src1 == 0) { |
1528 _mov(Dest, Ctx->getConstantZero(Ty)); | 1528 _mov(Dest, getConstantZero(Ty)); |
1529 return true; | 1529 return true; |
1530 } | 1530 } |
1531 if (Src1 == 1) { | 1531 if (Src1 == 1) { |
1532 Variable *T = nullptr; | 1532 Variable *T = nullptr; |
1533 _mov(T, Src0); | 1533 _mov(T, Src0); |
1534 _mov(Dest, T); | 1534 _mov(Dest, T); |
1535 return true; | 1535 return true; |
1536 } | 1536 } |
1537 // Don't bother with the edge case where Src1 == MININT. | 1537 // Don't bother with the edge case where Src1 == MININT. |
1538 if (Src1 == -Src1) | 1538 if (Src1 == -Src1) |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1575 // somewhat arbitrary choice of 3. | 1575 // somewhat arbitrary choice of 3. |
1576 constexpr uint32_t MaxOpsForOptimizedMul = 3; | 1576 constexpr uint32_t MaxOpsForOptimizedMul = 3; |
1577 if (CountOps > MaxOpsForOptimizedMul) | 1577 if (CountOps > MaxOpsForOptimizedMul) |
1578 return false; | 1578 return false; |
1579 Variable *T = makeReg(Traits::WordType); | 1579 Variable *T = makeReg(Traits::WordType); |
1580 if (typeWidthInBytes(Src0->getType()) < typeWidthInBytes(T->getType())) { | 1580 if (typeWidthInBytes(Src0->getType()) < typeWidthInBytes(T->getType())) { |
1581 _movzx(T, Src0); | 1581 _movzx(T, Src0); |
1582 } else { | 1582 } else { |
1583 _mov(T, Src0); | 1583 _mov(T, Src0); |
1584 } | 1584 } |
1585 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1585 Constant *Zero = getConstantZero(IceType_i32); |
1586 for (uint32_t i = 0; i < Count9; ++i) { | 1586 for (uint32_t i = 0; i < Count9; ++i) { |
1587 constexpr uint16_t Shift = 3; // log2(9-1) | 1587 constexpr uint16_t Shift = 3; // log2(9-1) |
1588 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1588 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
1589 } | 1589 } |
1590 for (uint32_t i = 0; i < Count5; ++i) { | 1590 for (uint32_t i = 0; i < Count5; ++i) { |
1591 constexpr uint16_t Shift = 2; // log2(5-1) | 1591 constexpr uint16_t Shift = 2; // log2(5-1) |
1592 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1592 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
1593 } | 1593 } |
1594 for (uint32_t i = 0; i < Count3; ++i) { | 1594 for (uint32_t i = 0; i < Count3; ++i) { |
1595 constexpr uint16_t Shift = 1; // log2(3-1) | 1595 constexpr uint16_t Shift = 1; // log2(3-1) |
1596 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1596 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
1597 } | 1597 } |
1598 if (Count2) { | 1598 if (Count2) { |
1599 _shl(T, Ctx->getConstantInt(Ty, Count2)); | 1599 _shl(T, Ctx->getConstantInt(Ty, Count2)); |
1600 } | 1600 } |
1601 if (Src1IsNegative) | 1601 if (Src1IsNegative) |
1602 _neg(T); | 1602 _neg(T); |
1603 _mov(Dest, T); | 1603 _mov(Dest, T); |
1604 return true; | 1604 return true; |
1605 } | 1605 } |
1606 | 1606 |
1607 template <typename TraitsType> | 1607 template <typename TraitsType> |
1608 void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op, | 1608 void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op, |
1609 Operand *Src0Lo, Operand *Src0Hi, | 1609 Operand *Src0Lo, Operand *Src0Hi, |
1610 Operand *Src1Lo, Variable *DestLo, | 1610 Operand *Src1Lo, Variable *DestLo, |
1611 Variable *DestHi) { | 1611 Variable *DestHi) { |
1612 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. | 1612 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. |
1613 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; | 1613 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
1614 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1614 Constant *Zero = getConstantZero(IceType_i32); |
1615 Constant *SignExtend = Ctx->getConstantInt32(0x1f); | 1615 Constant *SignExtend = Ctx->getConstantInt32(0x1f); |
1616 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { | 1616 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
1617 uint32_t ShiftAmount = ConstantShiftAmount->getValue(); | 1617 uint32_t ShiftAmount = ConstantShiftAmount->getValue(); |
1618 if (ShiftAmount > 32) { | 1618 if (ShiftAmount > 32) { |
1619 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32); | 1619 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32); |
1620 switch (Op) { | 1620 switch (Op) { |
1621 default: | 1621 default: |
1622 assert(0 && "non-shift op"); | 1622 assert(0 && "non-shift op"); |
1623 break; | 1623 break; |
1624 case InstArithmetic::Shl: { | 1624 case InstArithmetic::Shl: { |
(...skipping 569 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2194 Eax = Traits::RegisterSet::Reg_ax; | 2194 Eax = Traits::RegisterSet::Reg_ax; |
2195 Edx = Traits::RegisterSet::Reg_dx; | 2195 Edx = Traits::RegisterSet::Reg_dx; |
2196 break; | 2196 break; |
2197 case IceType_i8: | 2197 case IceType_i8: |
2198 Eax = Traits::RegisterSet::Reg_al; | 2198 Eax = Traits::RegisterSet::Reg_al; |
2199 Edx = Traits::RegisterSet::Reg_ah; | 2199 Edx = Traits::RegisterSet::Reg_ah; |
2200 break; | 2200 break; |
2201 } | 2201 } |
2202 T_edx = makeReg(Ty, Edx); | 2202 T_edx = makeReg(Ty, Edx); |
2203 _mov(T, Src0, Eax); | 2203 _mov(T, Src0, Eax); |
2204 _mov(T_edx, Ctx->getConstantZero(Ty)); | 2204 _mov(T_edx, getConstantZero(Ty)); |
2205 _div(T, Src1, T_edx); | 2205 _div(T, Src1, T_edx); |
2206 _mov(Dest, T); | 2206 _mov(Dest, T); |
2207 } break; | 2207 } break; |
2208 case InstArithmetic::Sdiv: | 2208 case InstArithmetic::Sdiv: |
2209 // TODO(stichnot): Enable this after doing better performance and cross | 2209 // TODO(stichnot): Enable this after doing better performance and cross |
2210 // testing. | 2210 // testing. |
2211 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 2211 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
2212 // Optimize division by constant power of 2, but not for Om1 or O0, just | 2212 // Optimize division by constant power of 2, but not for Om1 or O0, just |
2213 // to keep things simple there. | 2213 // to keep things simple there. |
2214 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 2214 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2283 case IceType_i16: | 2283 case IceType_i16: |
2284 Eax = Traits::RegisterSet::Reg_ax; | 2284 Eax = Traits::RegisterSet::Reg_ax; |
2285 Edx = Traits::RegisterSet::Reg_dx; | 2285 Edx = Traits::RegisterSet::Reg_dx; |
2286 break; | 2286 break; |
2287 case IceType_i8: | 2287 case IceType_i8: |
2288 Eax = Traits::RegisterSet::Reg_al; | 2288 Eax = Traits::RegisterSet::Reg_al; |
2289 Edx = Traits::RegisterSet::Reg_ah; | 2289 Edx = Traits::RegisterSet::Reg_ah; |
2290 break; | 2290 break; |
2291 } | 2291 } |
2292 T_edx = makeReg(Ty, Edx); | 2292 T_edx = makeReg(Ty, Edx); |
2293 _mov(T_edx, Ctx->getConstantZero(Ty)); | 2293 _mov(T_edx, getConstantZero(Ty)); |
2294 _mov(T, Src0, Eax); | 2294 _mov(T, Src0, Eax); |
2295 _div(T_edx, Src1, T); | 2295 _div(T_edx, Src1, T); |
2296 _mov(Dest, T_edx); | 2296 _mov(Dest, T_edx); |
2297 } break; | 2297 } break; |
2298 case InstArithmetic::Srem: { | 2298 case InstArithmetic::Srem: { |
2299 // TODO(stichnot): Enable this after doing better performance and cross | 2299 // TODO(stichnot): Enable this after doing better performance and cross |
2300 // testing. | 2300 // testing. |
2301 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 2301 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
2302 // Optimize mod by constant power of 2, but not for Om1 or O0, just to | 2302 // Optimize mod by constant power of 2, but not for Om1 or O0, just to |
2303 // keep things simple there. | 2303 // keep things simple there. |
2304 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 2304 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
2305 const int32_t Divisor = C->getValue(); | 2305 const int32_t Divisor = C->getValue(); |
2306 const uint32_t UDivisor = Divisor; | 2306 const uint32_t UDivisor = Divisor; |
2307 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { | 2307 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { |
2308 uint32_t LogDiv = llvm::Log2_32(UDivisor); | 2308 uint32_t LogDiv = llvm::Log2_32(UDivisor); |
2309 // LLVM does the following for dest=src%(1<<log): | 2309 // LLVM does the following for dest=src%(1<<log): |
2310 // t=src | 2310 // t=src |
2311 // sar t,typewidth-1 // -1 if src is negative, 0 if not | 2311 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
2312 // shr t,typewidth-log | 2312 // shr t,typewidth-log |
2313 // add t,src | 2313 // add t,src |
2314 // and t, -(1<<log) | 2314 // and t, -(1<<log) |
2315 // sub t,src | 2315 // sub t,src |
2316 // neg t | 2316 // neg t |
2317 // dest=t | 2317 // dest=t |
2318 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); | 2318 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); |
2319 // If for some reason we are dividing by 1, just assign 0. | 2319 // If for some reason we are dividing by 1, just assign 0. |
2320 if (LogDiv == 0) { | 2320 if (LogDiv == 0) { |
2321 _mov(Dest, Ctx->getConstantZero(Ty)); | 2321 _mov(Dest, getConstantZero(Ty)); |
2322 return; | 2322 return; |
2323 } | 2323 } |
2324 _mov(T, Src0); | 2324 _mov(T, Src0); |
2325 // The initial sar is unnecessary when dividing by 2. | 2325 // The initial sar is unnecessary when dividing by 2. |
2326 if (LogDiv > 1) | 2326 if (LogDiv > 1) |
2327 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); | 2327 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); |
2328 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); | 2328 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); |
2329 _add(T, Src0); | 2329 _add(T, Src0); |
2330 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); | 2330 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); |
2331 _sub(T, Src0); | 2331 _sub(T, Src0); |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2425 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Br); | 2425 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Br); |
2426 return; | 2426 return; |
2427 } | 2427 } |
2428 case BoolFolding<Traits>::PK_Arith: { | 2428 case BoolFolding<Traits>::PK_Arith: { |
2429 lowerArithAndConsumer(llvm::cast<InstArithmetic>(Producer), Br); | 2429 lowerArithAndConsumer(llvm::cast<InstArithmetic>(Producer), Br); |
2430 return; | 2430 return; |
2431 } | 2431 } |
2432 } | 2432 } |
2433 } | 2433 } |
2434 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); | 2434 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
2435 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2435 Constant *Zero = getConstantZero(IceType_i32); |
2436 _cmp(Src0, Zero); | 2436 _cmp(Src0, Zero); |
2437 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 2437 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
2438 } | 2438 } |
2439 | 2439 |
2440 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining | 2440 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining |
2441 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't. | 2441 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't. |
2442 inline constexpr SizeT constexprMax(SizeT S0, SizeT S1) { | 2442 inline constexpr SizeT constexprMax(SizeT S0, SizeT S1) { |
2443 return S0 < S1 ? S1 : S0; | 2443 return S0 < S1 ? S1 : S0; |
2444 } | 2444 } |
2445 | 2445 |
(...skipping 278 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2724 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem); | 2724 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem); |
2725 if (isVectorType(DestTy)) { | 2725 if (isVectorType(DestTy)) { |
2726 // onemask = materialize(1,1,...); dest = onemask & src | 2726 // onemask = materialize(1,1,...); dest = onemask & src |
2727 Variable *OneMask = makeVectorOfOnes(DestTy); | 2727 Variable *OneMask = makeVectorOfOnes(DestTy); |
2728 Variable *T = makeReg(DestTy); | 2728 Variable *T = makeReg(DestTy); |
2729 _movp(T, Src0RM); | 2729 _movp(T, Src0RM); |
2730 _pand(T, OneMask); | 2730 _pand(T, OneMask); |
2731 _movp(Dest, T); | 2731 _movp(Dest, T); |
2732 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { | 2732 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { |
2733 // t1=movzx src; dst.lo=t1; dst.hi=0 | 2733 // t1=movzx src; dst.lo=t1; dst.hi=0 |
2734 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2734 Constant *Zero = getConstantZero(IceType_i32); |
2735 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2735 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
2736 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2736 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2737 Variable *Tmp = makeReg(DestLo->getType()); | 2737 Variable *Tmp = makeReg(DestLo->getType()); |
2738 if (Src0RM->getType() == IceType_i32) { | 2738 if (Src0RM->getType() == IceType_i32) { |
2739 _mov(Tmp, Src0RM); | 2739 _mov(Tmp, Src0RM); |
2740 } else { | 2740 } else { |
2741 _movzx(Tmp, Src0RM); | 2741 _movzx(Tmp, Src0RM); |
2742 } | 2742 } |
2743 _mov(DestLo, Tmp); | 2743 _mov(DestLo, Tmp); |
2744 _mov(DestHi, Zero); | 2744 _mov(DestHi, Zero); |
(...skipping 1161 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3906 Operand *ByteSize = Instr->getArg(0); | 3906 Operand *ByteSize = Instr->getArg(0); |
3907 Variable *Dest = Instr->getDest(); | 3907 Variable *Dest = Instr->getDest(); |
3908 if (auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) { | 3908 if (auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) { |
3909 Constant *Result; | 3909 Constant *Result; |
3910 switch (CI->getValue()) { | 3910 switch (CI->getValue()) { |
3911 default: | 3911 default: |
3912 // Some x86-64 processors support the cmpxchg16b instruction, which can | 3912 // Some x86-64 processors support the cmpxchg16b instruction, which can |
3913 // make 16-byte operations lock free (when used with the LOCK prefix). | 3913 // make 16-byte operations lock free (when used with the LOCK prefix). |
3914 // However, that's not supported in 32-bit mode, so just return 0 even | 3914 // However, that's not supported in 32-bit mode, so just return 0 even |
3915 // for large sizes. | 3915 // for large sizes. |
3916 Result = Ctx->getConstantZero(IceType_i32); | 3916 Result = getConstantZero(IceType_i32); |
3917 break; | 3917 break; |
3918 case 1: | 3918 case 1: |
3919 case 2: | 3919 case 2: |
3920 case 4: | 3920 case 4: |
3921 case 8: | 3921 case 8: |
3922 Result = Ctx->getConstantInt32(1); | 3922 Result = Ctx->getConstantInt32(1); |
3923 break; | 3923 break; |
3924 } | 3924 } |
3925 _mov(Dest, Result); | 3925 _mov(Dest, Result); |
3926 return; | 3926 return; |
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4065 lowerCall(Call); | 4065 lowerCall(Call); |
4066 // The popcount helpers always return 32-bit values, while the intrinsic's | 4066 // The popcount helpers always return 32-bit values, while the intrinsic's |
4067 // signature matches the native POPCNT instruction and fills a 64-bit reg | 4067 // signature matches the native POPCNT instruction and fills a 64-bit reg |
4068 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case | 4068 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case |
4069 // the user doesn't do that in the IR. If the user does that in the IR, | 4069 // the user doesn't do that in the IR. If the user does that in the IR, |
4070 // then this zero'ing instruction is dead and gets optimized out. | 4070 // then this zero'ing instruction is dead and gets optimized out. |
4071 if (!Traits::Is64Bit) { | 4071 if (!Traits::Is64Bit) { |
4072 assert(T == Dest); | 4072 assert(T == Dest); |
4073 if (Val->getType() == IceType_i64) { | 4073 if (Val->getType() == IceType_i64) { |
4074 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 4074 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
4075 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 4075 Constant *Zero = getConstantZero(IceType_i32); |
4076 _mov(DestHi, Zero); | 4076 _mov(DestHi, Zero); |
4077 } | 4077 } |
4078 } else { | 4078 } else { |
4079 assert(Val->getType() == IceType_i64); | 4079 assert(Val->getType() == IceType_i64); |
4080 // T is 64 bit. It needs to be copied to dest. We need to: | 4080 // T is 64 bit. It needs to be copied to dest. We need to: |
4081 // | 4081 // |
4082 // T_1.32 = trunc T.64 to i32 | 4082 // T_1.32 = trunc T.64 to i32 |
4083 // T_2.64 = zext T_1.32 to i64 | 4083 // T_2.64 = zext T_1.32 to i64 |
4084 // Dest.<<right_size>> = T_2.<<right_size>> | 4084 // Dest.<<right_size>> = T_2.<<right_size>> |
4085 // | 4085 // |
(...skipping 570 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4656 Variable *T_Dest2 = makeReg(IceType_i32); | 4656 Variable *T_Dest2 = makeReg(IceType_i32); |
4657 if (Cttz) { | 4657 if (Cttz) { |
4658 _bsf(T_Dest2, SecondVar); | 4658 _bsf(T_Dest2, SecondVar); |
4659 } else { | 4659 } else { |
4660 _bsr(T_Dest2, SecondVar); | 4660 _bsr(T_Dest2, SecondVar); |
4661 _xor(T_Dest2, _31); | 4661 _xor(T_Dest2, _31); |
4662 } | 4662 } |
4663 _test(SecondVar, SecondVar); | 4663 _test(SecondVar, SecondVar); |
4664 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); | 4664 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
4665 _mov(DestLo, T_Dest2); | 4665 _mov(DestLo, T_Dest2); |
4666 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 4666 _mov(DestHi, getConstantZero(IceType_i32)); |
4667 } | 4667 } |
4668 | 4668 |
4669 template <typename TraitsType> | 4669 template <typename TraitsType> |
4670 void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest, | 4670 void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest, |
4671 Variable *Base, Constant *Offset) { | 4671 Variable *Base, Constant *Offset) { |
4672 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to | 4672 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to |
4673 // legalize Mem properly. | 4673 // legalize Mem properly. |
4674 if (Offset) | 4674 if (Offset) |
4675 assert(!llvm::isa<ConstantRelocatable>(Offset)); | 4675 assert(!llvm::isa<ConstantRelocatable>(Offset)); |
4676 | 4676 |
(...skipping 785 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5462 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); | 5462 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); |
5463 if (Var == nullptr) | 5463 if (Var == nullptr) |
5464 return; | 5464 return; |
5465 // We use lowerStore() to copy out-args onto the stack. This creates a memory | 5465 // We use lowerStore() to copy out-args onto the stack. This creates a memory |
5466 // operand with the stack pointer as the base register. Don't do bounds | 5466 // operand with the stack pointer as the base register. Don't do bounds |
5467 // checks on that. | 5467 // checks on that. |
5468 if (Var->getRegNum() == getStackReg()) | 5468 if (Var->getRegNum() == getStackReg()) |
5469 return; | 5469 return; |
5470 | 5470 |
5471 auto *Label = InstX86Label::create(Func, this); | 5471 auto *Label = InstX86Label::create(Func, this); |
5472 _cmp(Opnd, Ctx->getConstantZero(IceType_i32)); | 5472 _cmp(Opnd, getConstantZero(IceType_i32)); |
5473 _br(Traits::Cond::Br_e, Label); | 5473 _br(Traits::Cond::Br_e, Label); |
5474 _cmp(Opnd, Ctx->getConstantInt32(1)); | 5474 _cmp(Opnd, Ctx->getConstantInt32(1)); |
5475 _br(Traits::Cond::Br_e, Label); | 5475 _br(Traits::Cond::Br_e, Label); |
5476 Context.insert(Label); | 5476 Context.insert(Label); |
5477 } | 5477 } |
5478 | 5478 |
5479 template <typename TraitsType> | 5479 template <typename TraitsType> |
5480 void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) { | 5480 void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) { |
5481 // A Load instruction can be treated the same as an Assign instruction, after | 5481 // A Load instruction can be treated the same as an Assign instruction, after |
5482 // the source operand is transformed into an X86OperandMem operand. Note that | 5482 // the source operand is transformed into an X86OperandMem operand. Note that |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5556 return; | 5556 return; |
5557 } | 5557 } |
5558 case BoolFolding<Traits>::PK_Fcmp: { | 5558 case BoolFolding<Traits>::PK_Fcmp: { |
5559 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Select); | 5559 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Select); |
5560 return; | 5560 return; |
5561 } | 5561 } |
5562 } | 5562 } |
5563 } | 5563 } |
5564 | 5564 |
5565 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem); | 5565 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem); |
5566 Operand *Zero = Ctx->getConstantZero(IceType_i32); | 5566 Operand *Zero = getConstantZero(IceType_i32); |
5567 _cmp(CmpResult, Zero); | 5567 _cmp(CmpResult, Zero); |
5568 Operand *SrcT = Select->getTrueOperand(); | 5568 Operand *SrcT = Select->getTrueOperand(); |
5569 Operand *SrcF = Select->getFalseOperand(); | 5569 Operand *SrcF = Select->getFalseOperand(); |
5570 const BrCond Cond = Traits::Cond::Br_ne; | 5570 const BrCond Cond = Traits::Cond::Br_ne; |
5571 lowerSelectMove(Dest, Cond, SrcT, SrcF); | 5571 lowerSelectMove(Dest, Cond, SrcT, SrcF); |
5572 } | 5572 } |
5573 | 5573 |
5574 template <typename TraitsType> | 5574 template <typename TraitsType> |
5575 void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond, | 5575 void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond, |
5576 Operand *SrcT, Operand *SrcF) { | 5576 Operand *SrcT, Operand *SrcF) { |
(...skipping 623 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6200 // during phi lowering assignments | 6200 // during phi lowering assignments |
6201 BoolFlagSaver B(RandomizationPoolingPaused, true); | 6201 BoolFlagSaver B(RandomizationPoolingPaused, true); |
6202 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>( | 6202 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>( |
6203 this, Context.getNode(), Func); | 6203 this, Context.getNode(), Func); |
6204 } | 6204 } |
6205 | 6205 |
6206 template <typename TraitsType> | 6206 template <typename TraitsType> |
6207 void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) { | 6207 void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) { |
6208 uint32_t StackArgumentsSize = 0; | 6208 uint32_t StackArgumentsSize = 0; |
6209 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { | 6209 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { |
6210 const char *HelperName = nullptr; | 6210 RuntimeHelperFuncKind HelperName = H_Num; |
6211 Variable *Dest = Arith->getDest(); | 6211 Variable *Dest = Arith->getDest(); |
6212 Type DestTy = Dest->getType(); | 6212 Type DestTy = Dest->getType(); |
6213 if (!Traits::Is64Bit && DestTy == IceType_i64) { | 6213 if (!Traits::Is64Bit && DestTy == IceType_i64) { |
6214 switch (Arith->getOp()) { | 6214 switch (Arith->getOp()) { |
6215 default: | 6215 default: |
6216 return; | 6216 return; |
6217 case InstArithmetic::Udiv: | 6217 case InstArithmetic::Udiv: |
6218 HelperName = H_udiv_i64; | 6218 HelperName = H_udiv_i64; |
6219 break; | 6219 break; |
6220 case InstArithmetic::Sdiv: | 6220 case InstArithmetic::Sdiv: |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6269 Call->addArg(Arith->getSrc(1)); | 6269 Call->addArg(Arith->getSrc(1)); |
6270 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); | 6270 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); |
6271 Context.insert(Call); | 6271 Context.insert(Call); |
6272 Arith->setDeleted(); | 6272 Arith->setDeleted(); |
6273 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 6273 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
6274 InstCast::OpKind CastKind = Cast->getCastKind(); | 6274 InstCast::OpKind CastKind = Cast->getCastKind(); |
6275 Operand *Src0 = Cast->getSrc(0); | 6275 Operand *Src0 = Cast->getSrc(0); |
6276 const Type SrcType = Src0->getType(); | 6276 const Type SrcType = Src0->getType(); |
6277 Variable *Dest = Cast->getDest(); | 6277 Variable *Dest = Cast->getDest(); |
6278 const Type DestTy = Dest->getType(); | 6278 const Type DestTy = Dest->getType(); |
6279 const char *HelperName = nullptr; | 6279 RuntimeHelperFuncKind HelperName = H_Num; |
6280 Variable *CallDest = Dest; | 6280 Variable *CallDest = Dest; |
6281 switch (CastKind) { | 6281 switch (CastKind) { |
6282 default: | 6282 default: |
6283 return; | 6283 return; |
6284 case InstCast::Fptosi: | 6284 case InstCast::Fptosi: |
6285 if (!Traits::Is64Bit && DestTy == IceType_i64) { | 6285 if (!Traits::Is64Bit && DestTy == IceType_i64) { |
6286 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | 6286 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 |
6287 : H_fptosi_f64_i64; | 6287 : H_fptosi_f64_i64; |
6288 } else { | 6288 } else { |
6289 return; | 6289 return; |
(...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6494 Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty, | 6494 Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty, |
6495 RegNumT RegNum) { | 6495 RegNumT RegNum) { |
6496 Variable *Reg = makeReg(Ty, RegNum); | 6496 Variable *Reg = makeReg(Ty, RegNum); |
6497 switch (Ty) { | 6497 switch (Ty) { |
6498 case IceType_i1: | 6498 case IceType_i1: |
6499 case IceType_i8: | 6499 case IceType_i8: |
6500 case IceType_i16: | 6500 case IceType_i16: |
6501 case IceType_i32: | 6501 case IceType_i32: |
6502 case IceType_i64: | 6502 case IceType_i64: |
6503 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. | 6503 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. |
6504 _mov(Reg, Ctx->getConstantZero(Ty)); | 6504 _mov(Reg, getConstantZero(Ty)); |
6505 break; | 6505 break; |
6506 case IceType_f32: | 6506 case IceType_f32: |
6507 case IceType_f64: | 6507 case IceType_f64: |
6508 Context.insert<InstFakeDef>(Reg); | 6508 Context.insert<InstFakeDef>(Reg); |
6509 _xorps(Reg, Reg); | 6509 _xorps(Reg, Reg); |
6510 break; | 6510 break; |
6511 default: | 6511 default: |
6512 // All vector types use the same pxor instruction. | 6512 // All vector types use the same pxor instruction. |
6513 assert(isVectorType(Ty)); | 6513 assert(isVectorType(Ty)); |
6514 Context.insert<InstFakeDef>(Reg); | 6514 Context.insert<InstFakeDef>(Reg); |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6566 SizeT Shift = | 6566 SizeT Shift = |
6567 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; | 6567 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; |
6568 _psll(Reg, Ctx->getConstantInt8(Shift)); | 6568 _psll(Reg, Ctx->getConstantInt8(Shift)); |
6569 return Reg; | 6569 return Reg; |
6570 } else { | 6570 } else { |
6571 // SSE has no left shift operation for vectors of 8 bit integers. | 6571 // SSE has no left shift operation for vectors of 8 bit integers. |
6572 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 6572 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
6573 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); | 6573 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); |
6574 Variable *Reg = makeReg(Ty, RegNum); | 6574 Variable *Reg = makeReg(Ty, RegNum); |
6575 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | 6575 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
6576 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | 6576 _pshufd(Reg, Reg, getConstantZero(IceType_i8)); |
6577 return Reg; | 6577 return Reg; |
6578 } | 6578 } |
6579 } | 6579 } |
6580 | 6580 |
6581 /// Construct a mask in a register that can be and'ed with a floating-point | 6581 /// Construct a mask in a register that can be and'ed with a floating-point |
6582 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 | 6582 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 |
6583 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of | 6583 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of |
6584 /// ones logically right shifted one bit. | 6584 /// ones logically right shifted one bit. |
6585 // TODO(stichnot): Fix the wala | 6585 // TODO(stichnot): Fix the wala |
6586 // TODO: above, to represent vector constants in memory. | 6586 // TODO: above, to represent vector constants in memory. |
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6878 // | 6878 // |
6879 // If in the future the implementation is changed to lower undef values to | 6879 // If in the future the implementation is changed to lower undef values to |
6880 // uninitialized registers, a FakeDef will be needed: | 6880 // uninitialized registers, a FakeDef will be needed: |
6881 // Context.insert<InstFakeDef>(Reg); | 6881 // Context.insert<InstFakeDef>(Reg); |
6882 // This is in order to ensure that the live range of Reg is not | 6882 // This is in order to ensure that the live range of Reg is not |
6883 // overestimated. If the constant being lowered is a 64 bit value, then | 6883 // overestimated. If the constant being lowered is a 64 bit value, then |
6884 // the result should be split and the lo and hi components will need to go | 6884 // the result should be split and the lo and hi components will need to go |
6885 // in uninitialized registers. | 6885 // in uninitialized registers. |
6886 if (isVectorType(Ty)) | 6886 if (isVectorType(Ty)) |
6887 return makeVectorOfZeros(Ty, RegNum); | 6887 return makeVectorOfZeros(Ty, RegNum); |
6888 return Ctx->getConstantZero(Ty); | 6888 return getConstantZero(Ty); |
6889 } | 6889 } |
6890 return From; | 6890 return From; |
6891 } | 6891 } |
6892 | 6892 |
6893 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical | 6893 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical |
6894 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be | 6894 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be |
6895 /// copied into a physical register. (Actually, either Src0 or Src1 can be | 6895 /// copied into a physical register. (Actually, either Src0 or Src1 can be |
6896 /// chosen for the physical register, but unfortunately we have to commit to one | 6896 /// chosen for the physical register, but unfortunately we have to commit to one |
6897 /// or the other before register allocation.) | 6897 /// or the other before register allocation.) |
6898 template <typename TraitsType> | 6898 template <typename TraitsType> |
(...skipping 501 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7400 emitGlobal(*Var, SectionSuffix); | 7400 emitGlobal(*Var, SectionSuffix); |
7401 } | 7401 } |
7402 } | 7402 } |
7403 } break; | 7403 } break; |
7404 } | 7404 } |
7405 } | 7405 } |
7406 } // end of namespace X86NAMESPACE | 7406 } // end of namespace X86NAMESPACE |
7407 } // end of namespace Ice | 7407 } // end of namespace Ice |
7408 | 7408 |
7409 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7409 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |