| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 1381 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1392 // bit-manipulation problems below. | 1392 // bit-manipulation problems below. |
| 1393 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes()); | 1393 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes()); |
| 1394 | 1394 |
| 1395 // LLVM enforces power of 2 alignment. | 1395 // LLVM enforces power of 2 alignment. |
| 1396 assert(llvm::isPowerOf2_32(AlignmentParam)); | 1396 assert(llvm::isPowerOf2_32(AlignmentParam)); |
| 1397 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); | 1397 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); |
| 1398 | 1398 |
| 1399 const uint32_t Alignment = | 1399 const uint32_t Alignment = |
| 1400 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); | 1400 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); |
| 1401 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; | 1401 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; |
| 1402 const bool OptM1 = getFlags().getOptLevel() == Opt_m1; | 1402 const bool OptM1 = Func->getOptLevel() == Opt_m1; |
| 1403 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset(); | 1403 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset(); |
| 1404 const bool UseFramePointer = | 1404 const bool UseFramePointer = |
| 1405 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; | 1405 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; |
| 1406 | 1406 |
| 1407 if (UseFramePointer) | 1407 if (UseFramePointer) |
| 1408 setHasFramePointer(); | 1408 setHasFramePointer(); |
| 1409 | 1409 |
| 1410 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType); | 1410 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType); |
| 1411 if (OverAligned) { | 1411 if (OverAligned) { |
| 1412 _and(esp, Ctx->getConstantInt32(-Alignment)); | 1412 _and(esp, Ctx->getConstantInt32(-Alignment)); |
| (...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1522 /// Strength-reduce scalar integer multiplication by a constant (for i32 or | 1522 /// Strength-reduce scalar integer multiplication by a constant (for i32 or |
| 1523 /// narrower) for certain constants. The lea instruction can be used to multiply | 1523 /// narrower) for certain constants. The lea instruction can be used to multiply |
| 1524 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of | 1524 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of |
| 1525 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 | 1525 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 |
| 1526 /// lea-based multiplies by 5, combined with left-shifting by 2. | 1526 /// lea-based multiplies by 5, combined with left-shifting by 2. |
| 1527 template <typename TraitsType> | 1527 template <typename TraitsType> |
| 1528 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, | 1528 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
| 1529 int32_t Src1) { | 1529 int32_t Src1) { |
| 1530 // Disable this optimization for Om1 and O0, just to keep things simple | 1530 // Disable this optimization for Om1 and O0, just to keep things simple |
| 1531 // there. | 1531 // there. |
| 1532 if (getFlags().getOptLevel() < Opt_1) | 1532 if (Func->getOptLevel() < Opt_1) |
| 1533 return false; | 1533 return false; |
| 1534 Type Ty = Dest->getType(); | 1534 Type Ty = Dest->getType(); |
| 1535 if (Src1 == -1) { | 1535 if (Src1 == -1) { |
| 1536 Variable *T = nullptr; | 1536 Variable *T = nullptr; |
| 1537 _mov(T, Src0); | 1537 _mov(T, Src0); |
| 1538 _neg(T); | 1538 _neg(T); |
| 1539 _mov(Dest, T); | 1539 _mov(Dest, T); |
| 1540 return true; | 1540 return true; |
| 1541 } | 1541 } |
| 1542 if (Src1 == 0) { | 1542 if (Src1 == 0) { |
| (...skipping 673 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2216 } | 2216 } |
| 2217 T_edx = makeReg(Ty, Edx); | 2217 T_edx = makeReg(Ty, Edx); |
| 2218 _mov(T, Src0, Eax); | 2218 _mov(T, Src0, Eax); |
| 2219 _mov(T_edx, Ctx->getConstantZero(Ty)); | 2219 _mov(T_edx, Ctx->getConstantZero(Ty)); |
| 2220 _div(T, Src1, T_edx); | 2220 _div(T, Src1, T_edx); |
| 2221 _mov(Dest, T); | 2221 _mov(Dest, T); |
| 2222 } break; | 2222 } break; |
| 2223 case InstArithmetic::Sdiv: | 2223 case InstArithmetic::Sdiv: |
| 2224 // TODO(stichnot): Enable this after doing better performance and cross | 2224 // TODO(stichnot): Enable this after doing better performance and cross |
| 2225 // testing. | 2225 // testing. |
| 2226 if (false && getFlags().getOptLevel() >= Opt_1) { | 2226 if (false && Func->getOptLevel() >= Opt_1) { |
| 2227 // Optimize division by constant power of 2, but not for Om1 or O0, just | 2227 // Optimize division by constant power of 2, but not for Om1 or O0, just |
| 2228 // to keep things simple there. | 2228 // to keep things simple there. |
| 2229 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 2229 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| 2230 const int32_t Divisor = C->getValue(); | 2230 const int32_t Divisor = C->getValue(); |
| 2231 const uint32_t UDivisor = Divisor; | 2231 const uint32_t UDivisor = Divisor; |
| 2232 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { | 2232 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { |
| 2233 uint32_t LogDiv = llvm::Log2_32(UDivisor); | 2233 uint32_t LogDiv = llvm::Log2_32(UDivisor); |
| 2234 // LLVM does the following for dest=src/(1<<log): | 2234 // LLVM does the following for dest=src/(1<<log): |
| 2235 // t=src | 2235 // t=src |
| 2236 // sar t,typewidth-1 // -1 if src is negative, 0 if not | 2236 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2306 } | 2306 } |
| 2307 T_edx = makeReg(Ty, Edx); | 2307 T_edx = makeReg(Ty, Edx); |
| 2308 _mov(T_edx, Ctx->getConstantZero(Ty)); | 2308 _mov(T_edx, Ctx->getConstantZero(Ty)); |
| 2309 _mov(T, Src0, Eax); | 2309 _mov(T, Src0, Eax); |
| 2310 _div(T_edx, Src1, T); | 2310 _div(T_edx, Src1, T); |
| 2311 _mov(Dest, T_edx); | 2311 _mov(Dest, T_edx); |
| 2312 } break; | 2312 } break; |
| 2313 case InstArithmetic::Srem: { | 2313 case InstArithmetic::Srem: { |
| 2314 // TODO(stichnot): Enable this after doing better performance and cross | 2314 // TODO(stichnot): Enable this after doing better performance and cross |
| 2315 // testing. | 2315 // testing. |
| 2316 if (false && getFlags().getOptLevel() >= Opt_1) { | 2316 if (false && Func->getOptLevel() >= Opt_1) { |
| 2317 // Optimize mod by constant power of 2, but not for Om1 or O0, just to | 2317 // Optimize mod by constant power of 2, but not for Om1 or O0, just to |
| 2318 // keep things simple there. | 2318 // keep things simple there. |
| 2319 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 2319 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| 2320 const int32_t Divisor = C->getValue(); | 2320 const int32_t Divisor = C->getValue(); |
| 2321 const uint32_t UDivisor = Divisor; | 2321 const uint32_t UDivisor = Divisor; |
| 2322 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { | 2322 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { |
| 2323 uint32_t LogDiv = llvm::Log2_32(UDivisor); | 2323 uint32_t LogDiv = llvm::Log2_32(UDivisor); |
| 2324 // LLVM does the following for dest=src%(1<<log): | 2324 // LLVM does the following for dest=src%(1<<log): |
| 2325 // t=src | 2325 // t=src |
| 2326 // sar t,typewidth-1 // -1 if src is negative, 0 if not | 2326 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
| (...skipping 1972 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4299 constexpr bool Locked = true; | 4299 constexpr bool Locked = true; |
| 4300 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 4300 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
| 4301 _mov(DestPrev, T_eax); | 4301 _mov(DestPrev, T_eax); |
| 4302 } | 4302 } |
| 4303 | 4303 |
| 4304 template <typename TraitsType> | 4304 template <typename TraitsType> |
| 4305 bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest, | 4305 bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest, |
| 4306 Operand *PtrToMem, | 4306 Operand *PtrToMem, |
| 4307 Operand *Expected, | 4307 Operand *Expected, |
| 4308 Operand *Desired) { | 4308 Operand *Desired) { |
| 4309 if (getFlags().getOptLevel() == Opt_m1) | 4309 if (Func->getOptLevel() == Opt_m1) |
| 4310 return false; | 4310 return false; |
| 4311 // Peek ahead a few instructions and see how Dest is used. | 4311 // Peek ahead a few instructions and see how Dest is used. |
| 4312 // It's very common to have: | 4312 // It's very common to have: |
| 4313 // | 4313 // |
| 4314 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) | 4314 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) |
| 4315 // [%y_phi = ...] // list of phi stores | 4315 // [%y_phi = ...] // list of phi stores |
| 4316 // %p = icmp eq i32 %x, %expected | 4316 // %p = icmp eq i32 %x, %expected |
| 4317 // br i1 %p, label %l1, label %l2 | 4317 // br i1 %p, label %l1, label %l2 |
| 4318 // | 4318 // |
| 4319 // which we can optimize into: | 4319 // which we can optimize into: |
| (...skipping 2689 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 7009 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); | 7009 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); |
| 7010 if (!llvm::isPowerOf2_32(Size)) | 7010 if (!llvm::isPowerOf2_32(Size)) |
| 7011 ++TyIndex; | 7011 ++TyIndex; |
| 7012 uint32_t MaxIndex = MaxSize == NoSizeLimit | 7012 uint32_t MaxIndex = MaxSize == NoSizeLimit |
| 7013 ? llvm::array_lengthof(TypeForSize) - 1 | 7013 ? llvm::array_lengthof(TypeForSize) - 1 |
| 7014 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); | 7014 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); |
| 7015 return TypeForSize[std::min(TyIndex, MaxIndex)]; | 7015 return TypeForSize[std::min(TyIndex, MaxIndex)]; |
| 7016 } | 7016 } |
| 7017 | 7017 |
| 7018 template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() { | 7018 template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() { |
| 7019 if (getFlags().getOptLevel() == Opt_m1) | 7019 if (Func->getOptLevel() == Opt_m1) |
| 7020 return; | 7020 return; |
| 7021 markRedefinitions(); | 7021 markRedefinitions(); |
| 7022 Context.availabilityUpdate(); | 7022 Context.availabilityUpdate(); |
| 7023 } | 7023 } |
| 7024 | 7024 |
| 7025 template <typename TraitsType> | 7025 template <typename TraitsType> |
| 7026 void TargetX86Base<TraitsType>::makeRandomRegisterPermutation( | 7026 void TargetX86Base<TraitsType>::makeRandomRegisterPermutation( |
| 7027 llvm::SmallVectorImpl<RegNumT> &Permutation, | 7027 llvm::SmallVectorImpl<RegNumT> &Permutation, |
| 7028 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const { | 7028 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const { |
| 7029 Traits::makeRandomRegisterPermutation(Func, Permutation, ExcludeRegisters, | 7029 Traits::makeRandomRegisterPermutation(Func, Permutation, ExcludeRegisters, |
| (...skipping 380 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 7410 void TargetDataX86<TraitsType>::lowerGlobals( | 7410 void TargetDataX86<TraitsType>::lowerGlobals( |
| 7411 const VariableDeclarationList &Vars, const std::string &SectionSuffix) { | 7411 const VariableDeclarationList &Vars, const std::string &SectionSuffix) { |
| 7412 const bool IsPIC = getFlags().getUseNonsfi(); | 7412 const bool IsPIC = getFlags().getUseNonsfi(); |
| 7413 switch (getFlags().getOutFileType()) { | 7413 switch (getFlags().getOutFileType()) { |
| 7414 case FT_Elf: { | 7414 case FT_Elf: { |
| 7415 ELFObjectWriter *Writer = Ctx->getObjectWriter(); | 7415 ELFObjectWriter *Writer = Ctx->getObjectWriter(); |
| 7416 Writer->writeDataSection(Vars, Traits::FK_Abs, SectionSuffix, IsPIC); | 7416 Writer->writeDataSection(Vars, Traits::FK_Abs, SectionSuffix, IsPIC); |
| 7417 } break; | 7417 } break; |
| 7418 case FT_Asm: | 7418 case FT_Asm: |
| 7419 case FT_Iasm: { | 7419 case FT_Iasm: { |
| 7420 const std::string TranslateOnly = getFlags().getTranslateOnly(); | |
| 7421 OstreamLocker L(Ctx); | 7420 OstreamLocker L(Ctx); |
| 7422 for (const VariableDeclaration *Var : Vars) { | 7421 for (const VariableDeclaration *Var : Vars) { |
| 7423 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) { | 7422 if (getFlags().matchTranslateOnly(Var->getName(), 0)) { |
| 7424 emitGlobal(*Var, SectionSuffix); | 7423 emitGlobal(*Var, SectionSuffix); |
| 7425 } | 7424 } |
| 7426 } | 7425 } |
| 7427 } break; | 7426 } break; |
| 7428 } | 7427 } |
| 7429 } | 7428 } |
| 7430 } // end of namespace X86NAMESPACE | 7429 } // end of namespace X86NAMESPACE |
| 7431 } // end of namespace Ice | 7430 } // end of namespace Ice |
| 7432 | 7431 |
| 7433 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7432 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |