Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(421)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1900543002: Subzero: Allow per-method controls. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: More cleanup Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 1381 matching lines...) Expand 10 before | Expand all | Expand 10 after
1392 // bit-manipulation problems below. 1392 // bit-manipulation problems below.
1393 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes()); 1393 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
1394 1394
1395 // LLVM enforces power of 2 alignment. 1395 // LLVM enforces power of 2 alignment.
1396 assert(llvm::isPowerOf2_32(AlignmentParam)); 1396 assert(llvm::isPowerOf2_32(AlignmentParam));
1397 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); 1397 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
1398 1398
1399 const uint32_t Alignment = 1399 const uint32_t Alignment =
1400 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); 1400 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
1401 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; 1401 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES;
1402 const bool OptM1 = getFlags().getOptLevel() == Opt_m1; 1402 const bool OptM1 = Func->getOptLevel() == Opt_m1;
1403 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset(); 1403 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
1404 const bool UseFramePointer = 1404 const bool UseFramePointer =
1405 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; 1405 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
1406 1406
1407 if (UseFramePointer) 1407 if (UseFramePointer)
1408 setHasFramePointer(); 1408 setHasFramePointer();
1409 1409
1410 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType); 1410 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType);
1411 if (OverAligned) { 1411 if (OverAligned) {
1412 _and(esp, Ctx->getConstantInt32(-Alignment)); 1412 _and(esp, Ctx->getConstantInt32(-Alignment));
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after
1522 /// Strength-reduce scalar integer multiplication by a constant (for i32 or 1522 /// Strength-reduce scalar integer multiplication by a constant (for i32 or
1523 /// narrower) for certain constants. The lea instruction can be used to multiply 1523 /// narrower) for certain constants. The lea instruction can be used to multiply
1524 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of 1524 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
1525 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 1525 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
1526 /// lea-based multiplies by 5, combined with left-shifting by 2. 1526 /// lea-based multiplies by 5, combined with left-shifting by 2.
1527 template <typename TraitsType> 1527 template <typename TraitsType>
1528 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, 1528 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1529 int32_t Src1) { 1529 int32_t Src1) {
1530 // Disable this optimization for Om1 and O0, just to keep things simple 1530 // Disable this optimization for Om1 and O0, just to keep things simple
1531 // there. 1531 // there.
1532 if (getFlags().getOptLevel() < Opt_1) 1532 if (Func->getOptLevel() < Opt_1)
1533 return false; 1533 return false;
1534 Type Ty = Dest->getType(); 1534 Type Ty = Dest->getType();
1535 if (Src1 == -1) { 1535 if (Src1 == -1) {
1536 Variable *T = nullptr; 1536 Variable *T = nullptr;
1537 _mov(T, Src0); 1537 _mov(T, Src0);
1538 _neg(T); 1538 _neg(T);
1539 _mov(Dest, T); 1539 _mov(Dest, T);
1540 return true; 1540 return true;
1541 } 1541 }
1542 if (Src1 == 0) { 1542 if (Src1 == 0) {
(...skipping 673 matching lines...) Expand 10 before | Expand all | Expand 10 after
2216 } 2216 }
2217 T_edx = makeReg(Ty, Edx); 2217 T_edx = makeReg(Ty, Edx);
2218 _mov(T, Src0, Eax); 2218 _mov(T, Src0, Eax);
2219 _mov(T_edx, Ctx->getConstantZero(Ty)); 2219 _mov(T_edx, Ctx->getConstantZero(Ty));
2220 _div(T, Src1, T_edx); 2220 _div(T, Src1, T_edx);
2221 _mov(Dest, T); 2221 _mov(Dest, T);
2222 } break; 2222 } break;
2223 case InstArithmetic::Sdiv: 2223 case InstArithmetic::Sdiv:
2224 // TODO(stichnot): Enable this after doing better performance and cross 2224 // TODO(stichnot): Enable this after doing better performance and cross
2225 // testing. 2225 // testing.
2226 if (false && getFlags().getOptLevel() >= Opt_1) { 2226 if (false && Func->getOptLevel() >= Opt_1) {
2227 // Optimize division by constant power of 2, but not for Om1 or O0, just 2227 // Optimize division by constant power of 2, but not for Om1 or O0, just
2228 // to keep things simple there. 2228 // to keep things simple there.
2229 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 2229 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2230 const int32_t Divisor = C->getValue(); 2230 const int32_t Divisor = C->getValue();
2231 const uint32_t UDivisor = Divisor; 2231 const uint32_t UDivisor = Divisor;
2232 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { 2232 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
2233 uint32_t LogDiv = llvm::Log2_32(UDivisor); 2233 uint32_t LogDiv = llvm::Log2_32(UDivisor);
2234 // LLVM does the following for dest=src/(1<<log): 2234 // LLVM does the following for dest=src/(1<<log):
2235 // t=src 2235 // t=src
2236 // sar t,typewidth-1 // -1 if src is negative, 0 if not 2236 // sar t,typewidth-1 // -1 if src is negative, 0 if not
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
2306 } 2306 }
2307 T_edx = makeReg(Ty, Edx); 2307 T_edx = makeReg(Ty, Edx);
2308 _mov(T_edx, Ctx->getConstantZero(Ty)); 2308 _mov(T_edx, Ctx->getConstantZero(Ty));
2309 _mov(T, Src0, Eax); 2309 _mov(T, Src0, Eax);
2310 _div(T_edx, Src1, T); 2310 _div(T_edx, Src1, T);
2311 _mov(Dest, T_edx); 2311 _mov(Dest, T_edx);
2312 } break; 2312 } break;
2313 case InstArithmetic::Srem: { 2313 case InstArithmetic::Srem: {
2314 // TODO(stichnot): Enable this after doing better performance and cross 2314 // TODO(stichnot): Enable this after doing better performance and cross
2315 // testing. 2315 // testing.
2316 if (false && getFlags().getOptLevel() >= Opt_1) { 2316 if (false && Func->getOptLevel() >= Opt_1) {
2317 // Optimize mod by constant power of 2, but not for Om1 or O0, just to 2317 // Optimize mod by constant power of 2, but not for Om1 or O0, just to
2318 // keep things simple there. 2318 // keep things simple there.
2319 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 2319 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2320 const int32_t Divisor = C->getValue(); 2320 const int32_t Divisor = C->getValue();
2321 const uint32_t UDivisor = Divisor; 2321 const uint32_t UDivisor = Divisor;
2322 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { 2322 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
2323 uint32_t LogDiv = llvm::Log2_32(UDivisor); 2323 uint32_t LogDiv = llvm::Log2_32(UDivisor);
2324 // LLVM does the following for dest=src%(1<<log): 2324 // LLVM does the following for dest=src%(1<<log):
2325 // t=src 2325 // t=src
2326 // sar t,typewidth-1 // -1 if src is negative, 0 if not 2326 // sar t,typewidth-1 // -1 if src is negative, 0 if not
(...skipping 1972 matching lines...) Expand 10 before | Expand all | Expand 10 after
4299 constexpr bool Locked = true; 4299 constexpr bool Locked = true;
4300 _cmpxchg(Addr, T_eax, DesiredReg, Locked); 4300 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
4301 _mov(DestPrev, T_eax); 4301 _mov(DestPrev, T_eax);
4302 } 4302 }
4303 4303
4304 template <typename TraitsType> 4304 template <typename TraitsType>
4305 bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest, 4305 bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
4306 Operand *PtrToMem, 4306 Operand *PtrToMem,
4307 Operand *Expected, 4307 Operand *Expected,
4308 Operand *Desired) { 4308 Operand *Desired) {
4309 if (getFlags().getOptLevel() == Opt_m1) 4309 if (Func->getOptLevel() == Opt_m1)
4310 return false; 4310 return false;
4311 // Peek ahead a few instructions and see how Dest is used. 4311 // Peek ahead a few instructions and see how Dest is used.
4312 // It's very common to have: 4312 // It's very common to have:
4313 // 4313 //
4314 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) 4314 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
4315 // [%y_phi = ...] // list of phi stores 4315 // [%y_phi = ...] // list of phi stores
4316 // %p = icmp eq i32 %x, %expected 4316 // %p = icmp eq i32 %x, %expected
4317 // br i1 %p, label %l1, label %l2 4317 // br i1 %p, label %l1, label %l2
4318 // 4318 //
4319 // which we can optimize into: 4319 // which we can optimize into:
(...skipping 2689 matching lines...) Expand 10 before | Expand all | Expand 10 after
7009 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); 7009 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
7010 if (!llvm::isPowerOf2_32(Size)) 7010 if (!llvm::isPowerOf2_32(Size))
7011 ++TyIndex; 7011 ++TyIndex;
7012 uint32_t MaxIndex = MaxSize == NoSizeLimit 7012 uint32_t MaxIndex = MaxSize == NoSizeLimit
7013 ? llvm::array_lengthof(TypeForSize) - 1 7013 ? llvm::array_lengthof(TypeForSize) - 1
7014 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); 7014 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);
7015 return TypeForSize[std::min(TyIndex, MaxIndex)]; 7015 return TypeForSize[std::min(TyIndex, MaxIndex)];
7016 } 7016 }
7017 7017
7018 template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() { 7018 template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() {
7019 if (getFlags().getOptLevel() == Opt_m1) 7019 if (Func->getOptLevel() == Opt_m1)
7020 return; 7020 return;
7021 markRedefinitions(); 7021 markRedefinitions();
7022 Context.availabilityUpdate(); 7022 Context.availabilityUpdate();
7023 } 7023 }
7024 7024
7025 template <typename TraitsType> 7025 template <typename TraitsType>
7026 void TargetX86Base<TraitsType>::makeRandomRegisterPermutation( 7026 void TargetX86Base<TraitsType>::makeRandomRegisterPermutation(
7027 llvm::SmallVectorImpl<RegNumT> &Permutation, 7027 llvm::SmallVectorImpl<RegNumT> &Permutation,
7028 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const { 7028 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
7029 Traits::makeRandomRegisterPermutation(Func, Permutation, ExcludeRegisters, 7029 Traits::makeRandomRegisterPermutation(Func, Permutation, ExcludeRegisters,
(...skipping 380 matching lines...) Expand 10 before | Expand all | Expand 10 after
7410 void TargetDataX86<TraitsType>::lowerGlobals( 7410 void TargetDataX86<TraitsType>::lowerGlobals(
7411 const VariableDeclarationList &Vars, const std::string &SectionSuffix) { 7411 const VariableDeclarationList &Vars, const std::string &SectionSuffix) {
7412 const bool IsPIC = getFlags().getUseNonsfi(); 7412 const bool IsPIC = getFlags().getUseNonsfi();
7413 switch (getFlags().getOutFileType()) { 7413 switch (getFlags().getOutFileType()) {
7414 case FT_Elf: { 7414 case FT_Elf: {
7415 ELFObjectWriter *Writer = Ctx->getObjectWriter(); 7415 ELFObjectWriter *Writer = Ctx->getObjectWriter();
7416 Writer->writeDataSection(Vars, Traits::FK_Abs, SectionSuffix, IsPIC); 7416 Writer->writeDataSection(Vars, Traits::FK_Abs, SectionSuffix, IsPIC);
7417 } break; 7417 } break;
7418 case FT_Asm: 7418 case FT_Asm:
7419 case FT_Iasm: { 7419 case FT_Iasm: {
7420 const std::string TranslateOnly = getFlags().getTranslateOnly();
7421 OstreamLocker L(Ctx); 7420 OstreamLocker L(Ctx);
7422 for (const VariableDeclaration *Var : Vars) { 7421 for (const VariableDeclaration *Var : Vars) {
7423 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) { 7422 if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
7424 emitGlobal(*Var, SectionSuffix); 7423 emitGlobal(*Var, SectionSuffix);
7425 } 7424 }
7426 } 7425 }
7427 } break; 7426 } break;
7428 } 7427 }
7429 } 7428 }
7430 } // end of namespace X86NAMESPACE 7429 } // end of namespace X86NAMESPACE
7431 } // end of namespace Ice 7430 } // end of namespace Ice
7432 7431
7433 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 7432 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698