| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 773 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 784 /// This assumes Arg is an argument passed on the stack. This sets the frame | 784 /// This assumes Arg is an argument passed on the stack. This sets the frame |
| 785 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an | 785 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an |
| 786 /// I64 arg that has been split into Lo and Hi components, it calls itself | 786 /// I64 arg that has been split into Lo and Hi components, it calls itself |
| 787 /// recursively on the components, taking care to handle Lo first because of the | 787 /// recursively on the components, taking care to handle Lo first because of the |
| 788 /// little-endian architecture. Lastly, this function generates an instruction | 788 /// little-endian architecture. Lastly, this function generates an instruction |
| 789 /// to copy Arg into its assigned register if applicable. | 789 /// to copy Arg into its assigned register if applicable. |
| 790 template <class Machine> | 790 template <class Machine> |
| 791 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, | 791 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, |
| 792 Variable *FramePtr, | 792 Variable *FramePtr, |
| 793 size_t BasicFrameOffset, | 793 size_t BasicFrameOffset, |
| 794 size_t StackAdjBytes, |
| 794 size_t &InArgsSizeBytes) { | 795 size_t &InArgsSizeBytes) { |
| 795 if (!Traits::Is64Bit) { | 796 if (!Traits::Is64Bit) { |
| 796 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { | 797 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { |
| 797 Variable *Lo = Arg64On32->getLo(); | 798 Variable *Lo = Arg64On32->getLo(); |
| 798 Variable *Hi = Arg64On32->getHi(); | 799 Variable *Hi = Arg64On32->getHi(); |
| 799 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 800 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, StackAdjBytes, |
| 800 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 801 InArgsSizeBytes); |
| 802 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, StackAdjBytes, |
| 803 InArgsSizeBytes); |
| 801 return; | 804 return; |
| 802 } | 805 } |
| 803 } | 806 } |
| 804 Type Ty = Arg->getType(); | 807 Type Ty = Arg->getType(); |
| 805 if (isVectorType(Ty)) { | 808 if (isVectorType(Ty)) { |
| 806 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); | 809 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); |
| 807 } | 810 } |
| 808 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 811 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
| 809 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 812 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
| 810 if (Arg->hasReg()) { | 813 if (Arg->hasReg()) { |
| 811 assert(Ty != IceType_i64 || Traits::Is64Bit); | 814 assert(Ty != IceType_i64 || Traits::Is64Bit); |
| 812 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( | 815 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( |
| 813 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); | 816 Func, Ty, FramePtr, |
| 817 Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes)); |
| 814 if (isVectorType(Arg->getType())) { | 818 if (isVectorType(Arg->getType())) { |
| 815 _movp(Arg, Mem); | 819 _movp(Arg, Mem); |
| 816 } else { | 820 } else { |
| 817 _mov(Arg, Mem); | 821 _mov(Arg, Mem); |
| 818 } | 822 } |
| 819 // This argument-copying instruction uses an explicit Traits::X86OperandMem | 823 // This argument-copying instruction uses an explicit Traits::X86OperandMem |
| 820 // operand instead of a Variable, so its fill-from-stack operation has to | 824 // operand instead of a Variable, so its fill-from-stack operation has to |
| 821 // be tracked separately for statistics. | 825 // be tracked separately for statistics. |
| 822 Ctx->statsUpdateFills(); | 826 Ctx->statsUpdateFills(); |
| 823 } | 827 } |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 898 | 902 |
| 899 template <class Machine> | 903 template <class Machine> |
| 900 llvm::SmallBitVector | 904 llvm::SmallBitVector |
| 901 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, | 905 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, |
| 902 RegSetMask Exclude) const { | 906 RegSetMask Exclude) const { |
| 903 return Traits::getRegisterSet(Include, Exclude); | 907 return Traits::getRegisterSet(Include, Exclude); |
| 904 } | 908 } |
| 905 | 909 |
| 906 template <class Machine> | 910 template <class Machine> |
| 907 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { | 911 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { |
| 908 IsEbpBasedFrame = true; | 912 if (!Inst->getKnownFrameOffset()) |
| 913 IsEbpBasedFrame = true; |
| 909 // Conservatively require the stack to be aligned. Some stack adjustment | 914 // Conservatively require the stack to be aligned. Some stack adjustment |
| 910 // operations implemented below assume that the stack is aligned before the | 915 // operations implemented below assume that the stack is aligned before the |
| 911 // alloca. All the alloca code ensures that the stack alignment is preserved | 916 // alloca. All the alloca code ensures that the stack alignment is preserved |
| 912 // after the alloca. The stack alignment restriction can be relaxed in some | 917 // after the alloca. The stack alignment restriction can be relaxed in some |
| 913 // cases. | 918 // cases. |
| 914 NeedsStackAlignment = true; | 919 NeedsStackAlignment = true; |
| 915 | 920 |
| 916 // TODO(stichnot): minimize the number of adjustments of esp, etc. | 921 // TODO(stichnot): minimize the number of adjustments of esp, etc. |
| 917 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); | 922 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 918 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | 923 Operand *TotalSize = legalize(Inst->getSizeInBytes()); |
| 919 Variable *Dest = Inst->getDest(); | 924 Variable *Dest = Inst->getDest(); |
| 920 uint32_t AlignmentParam = Inst->getAlignInBytes(); | 925 uint32_t AlignmentParam = Inst->getAlignInBytes(); |
| 921 // For default align=0, set it to the real value 1, to avoid any | 926 // For default align=0, set it to the real value 1, to avoid any |
| 922 // bit-manipulation problems below. | 927 // bit-manipulation problems below. |
| 923 AlignmentParam = std::max(AlignmentParam, 1u); | 928 AlignmentParam = std::max(AlignmentParam, 1u); |
| 924 | 929 |
| 925 // LLVM enforces power of 2 alignment. | 930 // LLVM enforces power of 2 alignment. |
| 926 assert(llvm::isPowerOf2_32(AlignmentParam)); | 931 assert(llvm::isPowerOf2_32(AlignmentParam)); |
| 927 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); | 932 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); |
| 928 | 933 |
| 929 uint32_t Alignment = | 934 uint32_t Alignment = |
| 930 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); | 935 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); |
| 931 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { | 936 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { |
| 932 _and(esp, Ctx->getConstantInt32(-Alignment)); | 937 _and(esp, Ctx->getConstantInt32(-Alignment)); |
| 933 } | 938 } |
| 934 if (const auto *ConstantTotalSize = | 939 if (const auto *ConstantTotalSize = |
| 935 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 940 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
| 936 uint32_t Value = ConstantTotalSize->getValue(); | 941 uint32_t Value = ConstantTotalSize->getValue(); |
| 937 Value = Utils::applyAlignment(Value, Alignment); | 942 Value = Utils::applyAlignment(Value, Alignment); |
| 938 _sub(esp, Ctx->getConstantInt32(Value)); | 943 if (Inst->getKnownFrameOffset()) { |
| 944 _adjust_stack(Value); |
| 945 FixedAllocaSizeBytes += Value; |
| 946 } else { |
| 947 _sub(esp, Ctx->getConstantInt32(Value)); |
| 948 } |
| 939 } else { | 949 } else { |
| 940 // Non-constant sizes need to be adjusted to the next highest multiple of | 950 // Non-constant sizes need to be adjusted to the next highest multiple of |
| 941 // the required alignment at runtime. | 951 // the required alignment at runtime. |
| 942 Variable *T = makeReg(IceType_i32); | 952 Variable *T = makeReg(IceType_i32); |
| 943 _mov(T, TotalSize); | 953 _mov(T, TotalSize); |
| 944 _add(T, Ctx->getConstantInt32(Alignment - 1)); | 954 _add(T, Ctx->getConstantInt32(Alignment - 1)); |
| 945 _and(T, Ctx->getConstantInt32(-Alignment)); | 955 _and(T, Ctx->getConstantInt32(-Alignment)); |
| 946 _sub(esp, T); | 956 _sub(esp, T); |
| 947 } | 957 } |
| 948 _mov(Dest, esp); | 958 _mov(Dest, esp); |
| (...skipping 4525 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5474 } | 5484 } |
| 5475 // the offset is not eligible for blinding or pooling, return the original | 5485 // the offset is not eligible for blinding or pooling, return the original |
| 5476 // mem operand | 5486 // mem operand |
| 5477 return MemOperand; | 5487 return MemOperand; |
| 5478 } | 5488 } |
| 5479 | 5489 |
| 5480 } // end of namespace X86Internal | 5490 } // end of namespace X86Internal |
| 5481 } // end of namespace Ice | 5491 } // end of namespace Ice |
| 5482 | 5492 |
| 5483 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5493 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |