Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(208)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1361803002: Subzero: Improve handling of alloca instructions of constant size. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Add a couple of basic tests Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/align-spill-locations.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 773 matching lines...) Expand 10 before | Expand all | Expand 10 after
784 /// This assumes Arg is an argument passed on the stack. This sets the frame 784 /// This assumes Arg is an argument passed on the stack. This sets the frame
785 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an 785 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
786 /// I64 arg that has been split into Lo and Hi components, it calls itself 786 /// I64 arg that has been split into Lo and Hi components, it calls itself
787 /// recursively on the components, taking care to handle Lo first because of the 787 /// recursively on the components, taking care to handle Lo first because of the
788 /// little-endian architecture. Lastly, this function generates an instruction 788 /// little-endian architecture. Lastly, this function generates an instruction
789 /// to copy Arg into its assigned register if applicable. 789 /// to copy Arg into its assigned register if applicable.
790 template <class Machine> 790 template <class Machine>
791 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, 791 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
792 Variable *FramePtr, 792 Variable *FramePtr,
793 size_t BasicFrameOffset, 793 size_t BasicFrameOffset,
794 size_t StackAdjBytes,
794 size_t &InArgsSizeBytes) { 795 size_t &InArgsSizeBytes) {
795 if (!Traits::Is64Bit) { 796 if (!Traits::Is64Bit) {
796 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { 797 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
797 Variable *Lo = Arg64On32->getLo(); 798 Variable *Lo = Arg64On32->getLo();
798 Variable *Hi = Arg64On32->getHi(); 799 Variable *Hi = Arg64On32->getHi();
799 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); 800 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, StackAdjBytes,
800 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); 801 InArgsSizeBytes);
802 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, StackAdjBytes,
803 InArgsSizeBytes);
801 return; 804 return;
802 } 805 }
803 } 806 }
804 Type Ty = Arg->getType(); 807 Type Ty = Arg->getType();
805 if (isVectorType(Ty)) { 808 if (isVectorType(Ty)) {
806 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); 809 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
807 } 810 }
808 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 811 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
809 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 812 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
810 if (Arg->hasReg()) { 813 if (Arg->hasReg()) {
811 assert(Ty != IceType_i64 || Traits::Is64Bit); 814 assert(Ty != IceType_i64 || Traits::Is64Bit);
812 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( 815 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
813 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); 816 Func, Ty, FramePtr,
817 Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes));
814 if (isVectorType(Arg->getType())) { 818 if (isVectorType(Arg->getType())) {
815 _movp(Arg, Mem); 819 _movp(Arg, Mem);
816 } else { 820 } else {
817 _mov(Arg, Mem); 821 _mov(Arg, Mem);
818 } 822 }
819 // This argument-copying instruction uses an explicit Traits::X86OperandMem 823 // This argument-copying instruction uses an explicit Traits::X86OperandMem
820 // operand instead of a Variable, so its fill-from-stack operation has to 824 // operand instead of a Variable, so its fill-from-stack operation has to
821 // be tracked separately for statistics. 825 // be tracked separately for statistics.
822 Ctx->statsUpdateFills(); 826 Ctx->statsUpdateFills();
823 } 827 }
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
898 902
899 template <class Machine> 903 template <class Machine>
900 llvm::SmallBitVector 904 llvm::SmallBitVector
901 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, 905 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
902 RegSetMask Exclude) const { 906 RegSetMask Exclude) const {
903 return Traits::getRegisterSet(Include, Exclude); 907 return Traits::getRegisterSet(Include, Exclude);
904 } 908 }
905 909
906 template <class Machine> 910 template <class Machine>
907 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { 911 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
908 IsEbpBasedFrame = true; 912 if (!Inst->getKnownFrameOffset())
913 IsEbpBasedFrame = true;
909 // Conservatively require the stack to be aligned. Some stack adjustment 914 // Conservatively require the stack to be aligned. Some stack adjustment
910 // operations implemented below assume that the stack is aligned before the 915 // operations implemented below assume that the stack is aligned before the
911 // alloca. All the alloca code ensures that the stack alignment is preserved 916 // alloca. All the alloca code ensures that the stack alignment is preserved
912 // after the alloca. The stack alignment restriction can be relaxed in some 917 // after the alloca. The stack alignment restriction can be relaxed in some
913 // cases. 918 // cases.
914 NeedsStackAlignment = true; 919 NeedsStackAlignment = true;
915 920
916 // TODO(stichnot): minimize the number of adjustments of esp, etc. 921 // TODO(stichnot): minimize the number of adjustments of esp, etc.
917 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); 922 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
918 Operand *TotalSize = legalize(Inst->getSizeInBytes()); 923 Operand *TotalSize = legalize(Inst->getSizeInBytes());
919 Variable *Dest = Inst->getDest(); 924 Variable *Dest = Inst->getDest();
920 uint32_t AlignmentParam = Inst->getAlignInBytes(); 925 uint32_t AlignmentParam = Inst->getAlignInBytes();
921 // For default align=0, set it to the real value 1, to avoid any 926 // For default align=0, set it to the real value 1, to avoid any
922 // bit-manipulation problems below. 927 // bit-manipulation problems below.
923 AlignmentParam = std::max(AlignmentParam, 1u); 928 AlignmentParam = std::max(AlignmentParam, 1u);
924 929
925 // LLVM enforces power of 2 alignment. 930 // LLVM enforces power of 2 alignment.
926 assert(llvm::isPowerOf2_32(AlignmentParam)); 931 assert(llvm::isPowerOf2_32(AlignmentParam));
927 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); 932 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
928 933
929 uint32_t Alignment = 934 uint32_t Alignment =
930 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); 935 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
931 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { 936 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
932 _and(esp, Ctx->getConstantInt32(-Alignment)); 937 _and(esp, Ctx->getConstantInt32(-Alignment));
933 } 938 }
934 if (const auto *ConstantTotalSize = 939 if (const auto *ConstantTotalSize =
935 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 940 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
936 uint32_t Value = ConstantTotalSize->getValue(); 941 uint32_t Value = ConstantTotalSize->getValue();
937 Value = Utils::applyAlignment(Value, Alignment); 942 Value = Utils::applyAlignment(Value, Alignment);
938 _sub(esp, Ctx->getConstantInt32(Value)); 943 if (Inst->getKnownFrameOffset()) {
944 _adjust_stack(Value);
945 FixedAllocaSizeBytes += Value;
946 } else {
947 _sub(esp, Ctx->getConstantInt32(Value));
948 }
939 } else { 949 } else {
940 // Non-constant sizes need to be adjusted to the next highest multiple of 950 // Non-constant sizes need to be adjusted to the next highest multiple of
941 // the required alignment at runtime. 951 // the required alignment at runtime.
942 Variable *T = makeReg(IceType_i32); 952 Variable *T = makeReg(IceType_i32);
943 _mov(T, TotalSize); 953 _mov(T, TotalSize);
944 _add(T, Ctx->getConstantInt32(Alignment - 1)); 954 _add(T, Ctx->getConstantInt32(Alignment - 1));
945 _and(T, Ctx->getConstantInt32(-Alignment)); 955 _and(T, Ctx->getConstantInt32(-Alignment));
946 _sub(esp, T); 956 _sub(esp, T);
947 } 957 }
948 _mov(Dest, esp); 958 _mov(Dest, esp);
(...skipping 4525 matching lines...) Expand 10 before | Expand all | Expand 10 after
5474 } 5484 }
5475 // the offset is not eligible for blinding or pooling, return the original 5485 // the offset is not eligible for blinding or pooling, return the original
5476 // mem operand 5486 // mem operand
5477 return MemOperand; 5487 return MemOperand;
5478 } 5488 }
5479 5489
5480 } // end of namespace X86Internal 5490 } // end of namespace X86Internal
5481 } // end of namespace Ice 5491 } // end of namespace Ice
5482 5492
5483 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5493 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/align-spill-locations.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698