src/IceTargetLoweringX86BaseImpl.h - Issue 1361803002: Subzero: Improve handling of alloca instructions of constant size.

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1361803002: Subzero: Improve handling of alloca instructions of constant size. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Add a couple of basic tests Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 773 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
784 /// This assumes Arg is an argument passed on the stack. This sets the frame	784 /// This assumes Arg is an argument passed on the stack. This sets the frame

785 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an	785 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an

786 /// I64 arg that has been split into Lo and Hi components, it calls itself	786 /// I64 arg that has been split into Lo and Hi components, it calls itself

787 /// recursively on the components, taking care to handle Lo first because of the	787 /// recursively on the components, taking care to handle Lo first because of the

788 /// little-endian architecture. Lastly, this function generates an instruction	788 /// little-endian architecture. Lastly, this function generates an instruction

789 /// to copy Arg into its assigned register if applicable.	789 /// to copy Arg into its assigned register if applicable.

790 template <class Machine>	790 template <class Machine>

791 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,	791 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,

792 Variable *FramePtr,	792 Variable *FramePtr,

793 size_t BasicFrameOffset,	793 size_t BasicFrameOffset,

	794 size_t StackAdjBytes,

794 size_t &InArgsSizeBytes) {	795 size_t &InArgsSizeBytes) {

795 if (!Traits::Is64Bit) {	796 if (!Traits::Is64Bit) {

796 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {	797 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {

797 Variable *Lo = Arg64On32->getLo();	798 Variable *Lo = Arg64On32->getLo();

798 Variable *Hi = Arg64On32->getHi();	799 Variable *Hi = Arg64On32->getHi();

799 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);	800 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, StackAdjBytes,

800 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);	801 InArgsSizeBytes);

	802 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, StackAdjBytes,

	803 InArgsSizeBytes);

801 return;	804 return;

802 }	805 }

803 }	806 }

804 Type Ty = Arg->getType();	807 Type Ty = Arg->getType();

805 if (isVectorType(Ty)) {	808 if (isVectorType(Ty)) {

806 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);	809 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);

807 }	810 }

808 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);	811 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);

809 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);	812 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

810 if (Arg->hasReg()) {	813 if (Arg->hasReg()) {

811 assert(Ty != IceType_i64 \|\| Traits::Is64Bit);	814 assert(Ty != IceType_i64 \|\| Traits::Is64Bit);

812 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(	815 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(

813 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));	816 Func, Ty, FramePtr,

	817 Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes));

814 if (isVectorType(Arg->getType())) {	818 if (isVectorType(Arg->getType())) {

815 _movp(Arg, Mem);	819 _movp(Arg, Mem);

816 } else {	820 } else {

817 _mov(Arg, Mem);	821 _mov(Arg, Mem);

818 }	822 }

819 // This argument-copying instruction uses an explicit Traits::X86OperandMem	823 // This argument-copying instruction uses an explicit Traits::X86OperandMem

820 // operand instead of a Variable, so its fill-from-stack operation has to	824 // operand instead of a Variable, so its fill-from-stack operation has to

821 // be tracked separately for statistics.	825 // be tracked separately for statistics.

822 Ctx->statsUpdateFills();	826 Ctx->statsUpdateFills();

823 }	827 }

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
898	902

899 template <class Machine>	903 template <class Machine>

900 llvm::SmallBitVector	904 llvm::SmallBitVector

901 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,	905 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,

902 RegSetMask Exclude) const {	906 RegSetMask Exclude) const {

903 return Traits::getRegisterSet(Include, Exclude);	907 return Traits::getRegisterSet(Include, Exclude);

904 }	908 }

905	909

906 template <class Machine>	910 template <class Machine>

907 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {	911 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {

908 IsEbpBasedFrame = true;	912 if (!Inst->getKnownFrameOffset())

	913 IsEbpBasedFrame = true;

909 // Conservatively require the stack to be aligned. Some stack adjustment	914 // Conservatively require the stack to be aligned. Some stack adjustment

910 // operations implemented below assume that the stack is aligned before the	915 // operations implemented below assume that the stack is aligned before the

911 // alloca. All the alloca code ensures that the stack alignment is preserved	916 // alloca. All the alloca code ensures that the stack alignment is preserved

912 // after the alloca. The stack alignment restriction can be relaxed in some	917 // after the alloca. The stack alignment restriction can be relaxed in some

913 // cases.	918 // cases.

914 NeedsStackAlignment = true;	919 NeedsStackAlignment = true;

915	920

916 // TODO(stichnot): minimize the number of adjustments of esp, etc.	921 // TODO(stichnot): minimize the number of adjustments of esp, etc.

917 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);	922 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

918 Operand *TotalSize = legalize(Inst->getSizeInBytes());	923 Operand *TotalSize = legalize(Inst->getSizeInBytes());

919 Variable *Dest = Inst->getDest();	924 Variable *Dest = Inst->getDest();

920 uint32_t AlignmentParam = Inst->getAlignInBytes();	925 uint32_t AlignmentParam = Inst->getAlignInBytes();

921 // For default align=0, set it to the real value 1, to avoid any	926 // For default align=0, set it to the real value 1, to avoid any

922 // bit-manipulation problems below.	927 // bit-manipulation problems below.

923 AlignmentParam = std::max(AlignmentParam, 1u);	928 AlignmentParam = std::max(AlignmentParam, 1u);

924	929

925 // LLVM enforces power of 2 alignment.	930 // LLVM enforces power of 2 alignment.

926 assert(llvm::isPowerOf2_32(AlignmentParam));	931 assert(llvm::isPowerOf2_32(AlignmentParam));

927 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));	932 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));

928	933

929 uint32_t Alignment =	934 uint32_t Alignment =

930 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);	935 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);

931 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {	936 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {

932 _and(esp, Ctx->getConstantInt32(-Alignment));	937 _and(esp, Ctx->getConstantInt32(-Alignment));

933 }	938 }

934 if (const auto *ConstantTotalSize =	939 if (const auto *ConstantTotalSize =

935 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {	940 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {

936 uint32_t Value = ConstantTotalSize->getValue();	941 uint32_t Value = ConstantTotalSize->getValue();

937 Value = Utils::applyAlignment(Value, Alignment);	942 Value = Utils::applyAlignment(Value, Alignment);

938 _sub(esp, Ctx->getConstantInt32(Value));	943 if (Inst->getKnownFrameOffset()) {

	944 _adjust_stack(Value);

	945 FixedAllocaSizeBytes += Value;

	946 } else {

	947 _sub(esp, Ctx->getConstantInt32(Value));

	948 }

939 } else {	949 } else {

940 // Non-constant sizes need to be adjusted to the next highest multiple of	950 // Non-constant sizes need to be adjusted to the next highest multiple of

941 // the required alignment at runtime.	951 // the required alignment at runtime.

942 Variable *T = makeReg(IceType_i32);	952 Variable *T = makeReg(IceType_i32);

943 _mov(T, TotalSize);	953 _mov(T, TotalSize);

944 _add(T, Ctx->getConstantInt32(Alignment - 1));	954 _add(T, Ctx->getConstantInt32(Alignment - 1));

945 _and(T, Ctx->getConstantInt32(-Alignment));	955 _and(T, Ctx->getConstantInt32(-Alignment));

946 _sub(esp, T);	956 _sub(esp, T);

947 }	957 }

948 _mov(Dest, esp);	958 _mov(Dest, esp);

(...skipping 4525 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5474 }	5484 }

5475 // the offset is not eligible for blinding or pooling, return the original	5485 // the offset is not eligible for blinding or pooling, return the original

5476 // mem operand	5486 // mem operand

5477 return MemOperand;	5487 return MemOperand;

5478 }	5488 }

5479	5489

5480 } // end of namespace X86Internal	5490 } // end of namespace X86Internal

5481 } // end of namespace Ice	5491 } // end of namespace Ice

5482	5492

5483 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	5493 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/align-spill-locations.ll » ('j') | no next file with comments »