OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 1381 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1392 // bit-manipulation problems below. | 1392 // bit-manipulation problems below. |
1393 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes()); | 1393 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes()); |
1394 | 1394 |
1395 // LLVM enforces power of 2 alignment. | 1395 // LLVM enforces power of 2 alignment. |
1396 assert(llvm::isPowerOf2_32(AlignmentParam)); | 1396 assert(llvm::isPowerOf2_32(AlignmentParam)); |
1397 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); | 1397 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); |
1398 | 1398 |
1399 const uint32_t Alignment = | 1399 const uint32_t Alignment = |
1400 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); | 1400 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); |
1401 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; | 1401 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; |
1402 const bool OptM1 = getFlags().getOptLevel() == Opt_m1; | 1402 const bool OptM1 = Func->getOptLevel() == Opt_m1; |
1403 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset(); | 1403 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset(); |
1404 const bool UseFramePointer = | 1404 const bool UseFramePointer = |
1405 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; | 1405 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; |
1406 | 1406 |
1407 if (UseFramePointer) | 1407 if (UseFramePointer) |
1408 setHasFramePointer(); | 1408 setHasFramePointer(); |
1409 | 1409 |
1410 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType); | 1410 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType); |
1411 if (OverAligned) { | 1411 if (OverAligned) { |
1412 _and(esp, Ctx->getConstantInt32(-Alignment)); | 1412 _and(esp, Ctx->getConstantInt32(-Alignment)); |
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1522 /// Strength-reduce scalar integer multiplication by a constant (for i32 or | 1522 /// Strength-reduce scalar integer multiplication by a constant (for i32 or |
1523 /// narrower) for certain constants. The lea instruction can be used to multiply | 1523 /// narrower) for certain constants. The lea instruction can be used to multiply |
1524 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of | 1524 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of |
1525 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 | 1525 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 |
1526 /// lea-based multiplies by 5, combined with left-shifting by 2. | 1526 /// lea-based multiplies by 5, combined with left-shifting by 2. |
1527 template <typename TraitsType> | 1527 template <typename TraitsType> |
1528 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, | 1528 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
1529 int32_t Src1) { | 1529 int32_t Src1) { |
1530 // Disable this optimization for Om1 and O0, just to keep things simple | 1530 // Disable this optimization for Om1 and O0, just to keep things simple |
1531 // there. | 1531 // there. |
1532 if (getFlags().getOptLevel() < Opt_1) | 1532 if (Func->getOptLevel() < Opt_1) |
1533 return false; | 1533 return false; |
1534 Type Ty = Dest->getType(); | 1534 Type Ty = Dest->getType(); |
1535 if (Src1 == -1) { | 1535 if (Src1 == -1) { |
1536 Variable *T = nullptr; | 1536 Variable *T = nullptr; |
1537 _mov(T, Src0); | 1537 _mov(T, Src0); |
1538 _neg(T); | 1538 _neg(T); |
1539 _mov(Dest, T); | 1539 _mov(Dest, T); |
1540 return true; | 1540 return true; |
1541 } | 1541 } |
1542 if (Src1 == 0) { | 1542 if (Src1 == 0) { |
(...skipping 673 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2216 } | 2216 } |
2217 T_edx = makeReg(Ty, Edx); | 2217 T_edx = makeReg(Ty, Edx); |
2218 _mov(T, Src0, Eax); | 2218 _mov(T, Src0, Eax); |
2219 _mov(T_edx, Ctx->getConstantZero(Ty)); | 2219 _mov(T_edx, Ctx->getConstantZero(Ty)); |
2220 _div(T, Src1, T_edx); | 2220 _div(T, Src1, T_edx); |
2221 _mov(Dest, T); | 2221 _mov(Dest, T); |
2222 } break; | 2222 } break; |
2223 case InstArithmetic::Sdiv: | 2223 case InstArithmetic::Sdiv: |
2224 // TODO(stichnot): Enable this after doing better performance and cross | 2224 // TODO(stichnot): Enable this after doing better performance and cross |
2225 // testing. | 2225 // testing. |
2226 if (false && getFlags().getOptLevel() >= Opt_1) { | 2226 if (false && Func->getOptLevel() >= Opt_1) { |
2227 // Optimize division by constant power of 2, but not for Om1 or O0, just | 2227 // Optimize division by constant power of 2, but not for Om1 or O0, just |
2228 // to keep things simple there. | 2228 // to keep things simple there. |
2229 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 2229 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
2230 const int32_t Divisor = C->getValue(); | 2230 const int32_t Divisor = C->getValue(); |
2231 const uint32_t UDivisor = Divisor; | 2231 const uint32_t UDivisor = Divisor; |
2232 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { | 2232 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { |
2233 uint32_t LogDiv = llvm::Log2_32(UDivisor); | 2233 uint32_t LogDiv = llvm::Log2_32(UDivisor); |
2234 // LLVM does the following for dest=src/(1<<log): | 2234 // LLVM does the following for dest=src/(1<<log): |
2235 // t=src | 2235 // t=src |
2236 // sar t,typewidth-1 // -1 if src is negative, 0 if not | 2236 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2306 } | 2306 } |
2307 T_edx = makeReg(Ty, Edx); | 2307 T_edx = makeReg(Ty, Edx); |
2308 _mov(T_edx, Ctx->getConstantZero(Ty)); | 2308 _mov(T_edx, Ctx->getConstantZero(Ty)); |
2309 _mov(T, Src0, Eax); | 2309 _mov(T, Src0, Eax); |
2310 _div(T_edx, Src1, T); | 2310 _div(T_edx, Src1, T); |
2311 _mov(Dest, T_edx); | 2311 _mov(Dest, T_edx); |
2312 } break; | 2312 } break; |
2313 case InstArithmetic::Srem: { | 2313 case InstArithmetic::Srem: { |
2314 // TODO(stichnot): Enable this after doing better performance and cross | 2314 // TODO(stichnot): Enable this after doing better performance and cross |
2315 // testing. | 2315 // testing. |
2316 if (false && getFlags().getOptLevel() >= Opt_1) { | 2316 if (false && Func->getOptLevel() >= Opt_1) { |
2317 // Optimize mod by constant power of 2, but not for Om1 or O0, just to | 2317 // Optimize mod by constant power of 2, but not for Om1 or O0, just to |
2318 // keep things simple there. | 2318 // keep things simple there. |
2319 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 2319 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
2320 const int32_t Divisor = C->getValue(); | 2320 const int32_t Divisor = C->getValue(); |
2321 const uint32_t UDivisor = Divisor; | 2321 const uint32_t UDivisor = Divisor; |
2322 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { | 2322 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { |
2323 uint32_t LogDiv = llvm::Log2_32(UDivisor); | 2323 uint32_t LogDiv = llvm::Log2_32(UDivisor); |
2324 // LLVM does the following for dest=src%(1<<log): | 2324 // LLVM does the following for dest=src%(1<<log): |
2325 // t=src | 2325 // t=src |
2326 // sar t,typewidth-1 // -1 if src is negative, 0 if not | 2326 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
(...skipping 1972 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4299 constexpr bool Locked = true; | 4299 constexpr bool Locked = true; |
4300 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 4300 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
4301 _mov(DestPrev, T_eax); | 4301 _mov(DestPrev, T_eax); |
4302 } | 4302 } |
4303 | 4303 |
4304 template <typename TraitsType> | 4304 template <typename TraitsType> |
4305 bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest, | 4305 bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest, |
4306 Operand *PtrToMem, | 4306 Operand *PtrToMem, |
4307 Operand *Expected, | 4307 Operand *Expected, |
4308 Operand *Desired) { | 4308 Operand *Desired) { |
4309 if (getFlags().getOptLevel() == Opt_m1) | 4309 if (Func->getOptLevel() == Opt_m1) |
4310 return false; | 4310 return false; |
4311 // Peek ahead a few instructions and see how Dest is used. | 4311 // Peek ahead a few instructions and see how Dest is used. |
4312 // It's very common to have: | 4312 // It's very common to have: |
4313 // | 4313 // |
4314 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) | 4314 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) |
4315 // [%y_phi = ...] // list of phi stores | 4315 // [%y_phi = ...] // list of phi stores |
4316 // %p = icmp eq i32 %x, %expected | 4316 // %p = icmp eq i32 %x, %expected |
4317 // br i1 %p, label %l1, label %l2 | 4317 // br i1 %p, label %l1, label %l2 |
4318 // | 4318 // |
4319 // which we can optimize into: | 4319 // which we can optimize into: |
(...skipping 2689 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7009 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); | 7009 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); |
7010 if (!llvm::isPowerOf2_32(Size)) | 7010 if (!llvm::isPowerOf2_32(Size)) |
7011 ++TyIndex; | 7011 ++TyIndex; |
7012 uint32_t MaxIndex = MaxSize == NoSizeLimit | 7012 uint32_t MaxIndex = MaxSize == NoSizeLimit |
7013 ? llvm::array_lengthof(TypeForSize) - 1 | 7013 ? llvm::array_lengthof(TypeForSize) - 1 |
7014 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); | 7014 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); |
7015 return TypeForSize[std::min(TyIndex, MaxIndex)]; | 7015 return TypeForSize[std::min(TyIndex, MaxIndex)]; |
7016 } | 7016 } |
7017 | 7017 |
7018 template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() { | 7018 template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() { |
7019 if (getFlags().getOptLevel() == Opt_m1) | 7019 if (Func->getOptLevel() == Opt_m1) |
7020 return; | 7020 return; |
7021 markRedefinitions(); | 7021 markRedefinitions(); |
7022 Context.availabilityUpdate(); | 7022 Context.availabilityUpdate(); |
7023 } | 7023 } |
7024 | 7024 |
7025 template <typename TraitsType> | 7025 template <typename TraitsType> |
7026 void TargetX86Base<TraitsType>::makeRandomRegisterPermutation( | 7026 void TargetX86Base<TraitsType>::makeRandomRegisterPermutation( |
7027 llvm::SmallVectorImpl<RegNumT> &Permutation, | 7027 llvm::SmallVectorImpl<RegNumT> &Permutation, |
7028 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const { | 7028 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const { |
7029 Traits::makeRandomRegisterPermutation(Func, Permutation, ExcludeRegisters, | 7029 Traits::makeRandomRegisterPermutation(Func, Permutation, ExcludeRegisters, |
(...skipping 380 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7410 void TargetDataX86<TraitsType>::lowerGlobals( | 7410 void TargetDataX86<TraitsType>::lowerGlobals( |
7411 const VariableDeclarationList &Vars, const std::string &SectionSuffix) { | 7411 const VariableDeclarationList &Vars, const std::string &SectionSuffix) { |
7412 const bool IsPIC = getFlags().getUseNonsfi(); | 7412 const bool IsPIC = getFlags().getUseNonsfi(); |
7413 switch (getFlags().getOutFileType()) { | 7413 switch (getFlags().getOutFileType()) { |
7414 case FT_Elf: { | 7414 case FT_Elf: { |
7415 ELFObjectWriter *Writer = Ctx->getObjectWriter(); | 7415 ELFObjectWriter *Writer = Ctx->getObjectWriter(); |
7416 Writer->writeDataSection(Vars, Traits::FK_Abs, SectionSuffix, IsPIC); | 7416 Writer->writeDataSection(Vars, Traits::FK_Abs, SectionSuffix, IsPIC); |
7417 } break; | 7417 } break; |
7418 case FT_Asm: | 7418 case FT_Asm: |
7419 case FT_Iasm: { | 7419 case FT_Iasm: { |
7420 const std::string TranslateOnly = getFlags().getTranslateOnly(); | |
7421 OstreamLocker L(Ctx); | 7420 OstreamLocker L(Ctx); |
7422 for (const VariableDeclaration *Var : Vars) { | 7421 for (const VariableDeclaration *Var : Vars) { |
7423 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) { | 7422 if (getFlags().matchTranslateOnly(Var->getName(), 0)) { |
7424 emitGlobal(*Var, SectionSuffix); | 7423 emitGlobal(*Var, SectionSuffix); |
7425 } | 7424 } |
7426 } | 7425 } |
7427 } break; | 7426 } break; |
7428 } | 7427 } |
7429 } | 7428 } |
7430 } // end of namespace X86NAMESPACE | 7429 } // end of namespace X86NAMESPACE |
7431 } // end of namespace Ice | 7430 } // end of namespace Ice |
7432 | 7431 |
7433 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7432 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |