src/IceTargetLoweringX86BaseImpl.h - Issue 1900543002: Subzero: Allow per-method controls.

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1900543002: Subzero: Allow per-method controls. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: More cleanup Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 1381 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1392 // bit-manipulation problems below.	1392 // bit-manipulation problems below.

1393 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());	1393 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());

1394	1394

1395 // LLVM enforces power of 2 alignment.	1395 // LLVM enforces power of 2 alignment.

1396 assert(llvm::isPowerOf2_32(AlignmentParam));	1396 assert(llvm::isPowerOf2_32(AlignmentParam));

1397 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));	1397 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));

1398	1398

1399 const uint32_t Alignment =	1399 const uint32_t Alignment =

1400 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);	1400 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);

1401 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES;	1401 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES;

1402 const bool OptM1 = getFlags().getOptLevel() == Opt_m1;	1402 const bool OptM1 = Func->getOptLevel() == Opt_m1;

1403 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();	1403 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();

1404 const bool UseFramePointer =	1404 const bool UseFramePointer =

1405 hasFramePointer() \|\| OverAligned \|\| !AllocaWithKnownOffset \|\| OptM1;	1405 hasFramePointer() \|\| OverAligned \|\| !AllocaWithKnownOffset \|\| OptM1;

1406	1406

1407 if (UseFramePointer)	1407 if (UseFramePointer)

1408 setHasFramePointer();	1408 setHasFramePointer();

1409	1409

1410 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType);	1410 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType);

1411 if (OverAligned) {	1411 if (OverAligned) {

1412 _and(esp, Ctx->getConstantInt32(-Alignment));	1412 _and(esp, Ctx->getConstantInt32(-Alignment));

(...skipping 109 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1522 /// Strength-reduce scalar integer multiplication by a constant (for i32 or	1522 /// Strength-reduce scalar integer multiplication by a constant (for i32 or

1523 /// narrower) for certain constants. The lea instruction can be used to multiply	1523 /// narrower) for certain constants. The lea instruction can be used to multiply

1524 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of	1524 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of

1525 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2	1525 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2

1526 /// lea-based multiplies by 5, combined with left-shifting by 2.	1526 /// lea-based multiplies by 5, combined with left-shifting by 2.

1527 template <typename TraitsType>	1527 template <typename TraitsType>

1528 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable Dest, Operand Src0,	1528 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable Dest, Operand Src0,

1529 int32_t Src1) {	1529 int32_t Src1) {

1530 // Disable this optimization for Om1 and O0, just to keep things simple	1530 // Disable this optimization for Om1 and O0, just to keep things simple

1531 // there.	1531 // there.

1532 if (getFlags().getOptLevel() < Opt_1)	1532 if (Func->getOptLevel() < Opt_1)

1533 return false;	1533 return false;

1534 Type Ty = Dest->getType();	1534 Type Ty = Dest->getType();

1535 if (Src1 == -1) {	1535 if (Src1 == -1) {

1536 Variable *T = nullptr;	1536 Variable *T = nullptr;

1537 _mov(T, Src0);	1537 _mov(T, Src0);

1538 _neg(T);	1538 _neg(T);

1539 _mov(Dest, T);	1539 _mov(Dest, T);

1540 return true;	1540 return true;

1541 }	1541 }

1542 if (Src1 == 0) {	1542 if (Src1 == 0) {

(...skipping 673 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2216 }	2216 }

2217 T_edx = makeReg(Ty, Edx);	2217 T_edx = makeReg(Ty, Edx);

2218 _mov(T, Src0, Eax);	2218 _mov(T, Src0, Eax);

2219 _mov(T_edx, Ctx->getConstantZero(Ty));	2219 _mov(T_edx, Ctx->getConstantZero(Ty));

2220 _div(T, Src1, T_edx);	2220 _div(T, Src1, T_edx);

2221 _mov(Dest, T);	2221 _mov(Dest, T);

2222 } break;	2222 } break;

2223 case InstArithmetic::Sdiv:	2223 case InstArithmetic::Sdiv:

2224 // TODO(stichnot): Enable this after doing better performance and cross	2224 // TODO(stichnot): Enable this after doing better performance and cross

2225 // testing.	2225 // testing.

2226 if (false && getFlags().getOptLevel() >= Opt_1) {	2226 if (false && Func->getOptLevel() >= Opt_1) {

2227 // Optimize division by constant power of 2, but not for Om1 or O0, just	2227 // Optimize division by constant power of 2, but not for Om1 or O0, just

2228 // to keep things simple there.	2228 // to keep things simple there.

2229 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {	2229 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

2230 const int32_t Divisor = C->getValue();	2230 const int32_t Divisor = C->getValue();

2231 const uint32_t UDivisor = Divisor;	2231 const uint32_t UDivisor = Divisor;

2232 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {	2232 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

2233 uint32_t LogDiv = llvm::Log2_32(UDivisor);	2233 uint32_t LogDiv = llvm::Log2_32(UDivisor);

2234 // LLVM does the following for dest=src/(1<<log):	2234 // LLVM does the following for dest=src/(1<<log):

2235 // t=src	2235 // t=src

2236 // sar t,typewidth-1 // -1 if src is negative, 0 if not	2236 // sar t,typewidth-1 // -1 if src is negative, 0 if not

(...skipping 69 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2306 }	2306 }

2307 T_edx = makeReg(Ty, Edx);	2307 T_edx = makeReg(Ty, Edx);

2308 _mov(T_edx, Ctx->getConstantZero(Ty));	2308 _mov(T_edx, Ctx->getConstantZero(Ty));

2309 _mov(T, Src0, Eax);	2309 _mov(T, Src0, Eax);

2310 _div(T_edx, Src1, T);	2310 _div(T_edx, Src1, T);

2311 _mov(Dest, T_edx);	2311 _mov(Dest, T_edx);

2312 } break;	2312 } break;

2313 case InstArithmetic::Srem: {	2313 case InstArithmetic::Srem: {

2314 // TODO(stichnot): Enable this after doing better performance and cross	2314 // TODO(stichnot): Enable this after doing better performance and cross

2315 // testing.	2315 // testing.

2316 if (false && getFlags().getOptLevel() >= Opt_1) {	2316 if (false && Func->getOptLevel() >= Opt_1) {

2317 // Optimize mod by constant power of 2, but not for Om1 or O0, just to	2317 // Optimize mod by constant power of 2, but not for Om1 or O0, just to

2318 // keep things simple there.	2318 // keep things simple there.

2319 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {	2319 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

2320 const int32_t Divisor = C->getValue();	2320 const int32_t Divisor = C->getValue();

2321 const uint32_t UDivisor = Divisor;	2321 const uint32_t UDivisor = Divisor;

2322 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {	2322 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

2323 uint32_t LogDiv = llvm::Log2_32(UDivisor);	2323 uint32_t LogDiv = llvm::Log2_32(UDivisor);

2324 // LLVM does the following for dest=src%(1<<log):	2324 // LLVM does the following for dest=src%(1<<log):

2325 // t=src	2325 // t=src

2326 // sar t,typewidth-1 // -1 if src is negative, 0 if not	2326 // sar t,typewidth-1 // -1 if src is negative, 0 if not

(...skipping 1972 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4299 constexpr bool Locked = true;	4299 constexpr bool Locked = true;

4300 _cmpxchg(Addr, T_eax, DesiredReg, Locked);	4300 _cmpxchg(Addr, T_eax, DesiredReg, Locked);

4301 _mov(DestPrev, T_eax);	4301 _mov(DestPrev, T_eax);

4302 }	4302 }

4303	4303

4304 template <typename TraitsType>	4304 template <typename TraitsType>

4305 bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest,	4305 bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest,

4306 Operand *PtrToMem,	4306 Operand *PtrToMem,

4307 Operand *Expected,	4307 Operand *Expected,

4308 Operand *Desired) {	4308 Operand *Desired) {

4309 if (getFlags().getOptLevel() == Opt_m1)	4309 if (Func->getOptLevel() == Opt_m1)

4310 return false;	4310 return false;

4311 // Peek ahead a few instructions and see how Dest is used.	4311 // Peek ahead a few instructions and see how Dest is used.

4312 // It's very common to have:	4312 // It's very common to have:

4313 //	4313 //

4314 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)	4314 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)

4315 // [%y_phi = ...] // list of phi stores	4315 // [%y_phi = ...] // list of phi stores

4316 // %p = icmp eq i32 %x, %expected	4316 // %p = icmp eq i32 %x, %expected

4317 // br i1 %p, label %l1, label %l2	4317 // br i1 %p, label %l1, label %l2

4318 //	4318 //

4319 // which we can optimize into:	4319 // which we can optimize into:

(...skipping 2689 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
7009 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);	7009 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);

7010 if (!llvm::isPowerOf2_32(Size))	7010 if (!llvm::isPowerOf2_32(Size))

7011 ++TyIndex;	7011 ++TyIndex;

7012 uint32_t MaxIndex = MaxSize == NoSizeLimit	7012 uint32_t MaxIndex = MaxSize == NoSizeLimit

7013 ? llvm::array_lengthof(TypeForSize) - 1	7013 ? llvm::array_lengthof(TypeForSize) - 1

7014 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);	7014 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);

7015 return TypeForSize[std::min(TyIndex, MaxIndex)];	7015 return TypeForSize[std::min(TyIndex, MaxIndex)];

7016 }	7016 }

7017	7017

7018 template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() {	7018 template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() {

7019 if (getFlags().getOptLevel() == Opt_m1)	7019 if (Func->getOptLevel() == Opt_m1)

7020 return;	7020 return;

7021 markRedefinitions();	7021 markRedefinitions();

7022 Context.availabilityUpdate();	7022 Context.availabilityUpdate();

7023 }	7023 }

7024	7024

7025 template <typename TraitsType>	7025 template <typename TraitsType>

7026 void TargetX86Base<TraitsType>::makeRandomRegisterPermutation(	7026 void TargetX86Base<TraitsType>::makeRandomRegisterPermutation(

7027 llvm::SmallVectorImpl<RegNumT> &Permutation,	7027 llvm::SmallVectorImpl<RegNumT> &Permutation,

7028 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {	7028 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {

7029 Traits::makeRandomRegisterPermutation(Func, Permutation, ExcludeRegisters,	7029 Traits::makeRandomRegisterPermutation(Func, Permutation, ExcludeRegisters,

(...skipping 380 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
7410 void TargetDataX86<TraitsType>::lowerGlobals(	7410 void TargetDataX86<TraitsType>::lowerGlobals(

7411 const VariableDeclarationList &Vars, const std::string &SectionSuffix) {	7411 const VariableDeclarationList &Vars, const std::string &SectionSuffix) {

7412 const bool IsPIC = getFlags().getUseNonsfi();	7412 const bool IsPIC = getFlags().getUseNonsfi();

7413 switch (getFlags().getOutFileType()) {	7413 switch (getFlags().getOutFileType()) {

7414 case FT_Elf: {	7414 case FT_Elf: {

7415 ELFObjectWriter *Writer = Ctx->getObjectWriter();	7415 ELFObjectWriter *Writer = Ctx->getObjectWriter();

7416 Writer->writeDataSection(Vars, Traits::FK_Abs, SectionSuffix, IsPIC);	7416 Writer->writeDataSection(Vars, Traits::FK_Abs, SectionSuffix, IsPIC);

7417 } break;	7417 } break;

7418 case FT_Asm:	7418 case FT_Asm:

7419 case FT_Iasm: {	7419 case FT_Iasm: {

7420 const std::string TranslateOnly = getFlags().getTranslateOnly();

7421 OstreamLocker L(Ctx);	7420 OstreamLocker L(Ctx);

7422 for (const VariableDeclaration *Var : Vars) {	7421 for (const VariableDeclaration *Var : Vars) {

7423 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {	7422 if (getFlags().matchTranslateOnly(Var->getName(), 0)) {

7424 emitGlobal(*Var, SectionSuffix);	7423 emitGlobal(*Var, SectionSuffix);

7425 }	7424 }

7426 }	7425 }

7427 } break;	7426 } break;

7428 }	7427 }

7429 }	7428 }

7430 } // end of namespace X86NAMESPACE	7429 } // end of namespace X86NAMESPACE

7431 } // end of namespace Ice	7430 } // end of namespace Ice

7432	7431

7433 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	7432 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« src/IceStringPool.h ('K') | « src/IceTargetLoweringMIPS32.cpp ('k') | src/PNaClTranslator.cpp » ('j') | no next file with comments »