OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
124 } | 124 } |
125 | 125 |
126 // The maximum number of arguments to pass in XMM registers | 126 // The maximum number of arguments to pass in XMM registers |
127 const uint32_t X86_MAX_XMM_ARGS = 4; | 127 const uint32_t X86_MAX_XMM_ARGS = 4; |
128 // The number of bits in a byte | 128 // The number of bits in a byte |
129 const uint32_t X86_CHAR_BIT = 8; | 129 const uint32_t X86_CHAR_BIT = 8; |
130 // Stack alignment | 130 // Stack alignment |
131 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; | 131 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; |
132 // Size of the return address on the stack | 132 // Size of the return address on the stack |
133 const uint32_t X86_RET_IP_SIZE_BYTES = 4; | 133 const uint32_t X86_RET_IP_SIZE_BYTES = 4; |
134 // The base 2 logarithm of the width in bytes of the smallest stack slot | |
135 const uint32_t X86_LOG2_OF_MIN_STACK_SLOT_SIZE = 2; | |
136 // The base 2 logarithm of the width in bytes of the largest stack slot | |
137 const uint32_t X86_LOG2_OF_MAX_STACK_SLOT_SIZE = 4; | |
138 // The number of different NOP instructions | 134 // The number of different NOP instructions |
139 const uint32_t X86_NUM_NOP_VARIANTS = 5; | 135 const uint32_t X86_NUM_NOP_VARIANTS = 5; |
140 | 136 |
141 // Value is in bytes. Return Value adjusted to the next highest multiple | 137 // Value is in bytes. Return Value adjusted to the next highest multiple |
142 // of the stack alignment. | 138 // of the stack alignment. |
143 uint32_t applyStackAlignment(uint32_t Value) { | 139 uint32_t applyStackAlignment(uint32_t Value) { |
144 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); | 140 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); |
145 } | 141 } |
146 | 142 |
147 // In some cases, there are x-macros tables for both high-level and | 143 // In some cases, there are x-macros tables for both high-level and |
(...skipping 545 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
693 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | 689 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); |
694 RegisterArg->setRegNum(RegNum); | 690 RegisterArg->setRegNum(RegNum); |
695 RegisterArg->setIsArg(); | 691 RegisterArg->setIsArg(); |
696 Arg->setIsArg(false); | 692 Arg->setIsArg(false); |
697 | 693 |
698 Args[I] = RegisterArg; | 694 Args[I] = RegisterArg; |
699 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); | 695 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); |
700 } | 696 } |
701 } | 697 } |
702 | 698 |
703 void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const { | |
704 // Sort the variables into buckets according to the log of their width | |
705 // in bytes. | |
706 const SizeT NumBuckets = | |
707 X86_LOG2_OF_MAX_STACK_SLOT_SIZE - X86_LOG2_OF_MIN_STACK_SLOT_SIZE + 1; | |
708 VarList Buckets[NumBuckets]; | |
709 | |
710 for (Variable *Var : Source) { | |
711 uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType()); | |
712 SizeT LogNaturalAlignment = llvm::findFirstSet(NaturalAlignment); | |
713 assert(LogNaturalAlignment >= X86_LOG2_OF_MIN_STACK_SLOT_SIZE); | |
714 assert(LogNaturalAlignment <= X86_LOG2_OF_MAX_STACK_SLOT_SIZE); | |
715 SizeT BucketIndex = LogNaturalAlignment - X86_LOG2_OF_MIN_STACK_SLOT_SIZE; | |
716 Buckets[BucketIndex].push_back(Var); | |
717 } | |
718 | |
719 for (SizeT I = 0, E = NumBuckets; I < E; ++I) { | |
720 VarList &List = Buckets[NumBuckets - I - 1]; | |
721 Dest.insert(Dest.end(), List.begin(), List.end()); | |
722 } | |
723 } | |
724 | |
725 // Helper function for addProlog(). | 699 // Helper function for addProlog(). |
726 // | 700 // |
727 // This assumes Arg is an argument passed on the stack. This sets the | 701 // This assumes Arg is an argument passed on the stack. This sets the |
728 // frame offset for Arg and updates InArgsSizeBytes according to Arg's | 702 // frame offset for Arg and updates InArgsSizeBytes according to Arg's |
729 // width. For an I64 arg that has been split into Lo and Hi components, | 703 // width. For an I64 arg that has been split into Lo and Hi components, |
730 // it calls itself recursively on the components, taking care to handle | 704 // it calls itself recursively on the components, taking care to handle |
731 // Lo first because of the little-endian architecture. Lastly, this | 705 // Lo first because of the little-endian architecture. Lastly, this |
732 // function generates an instruction to copy Arg into its assigned | 706 // function generates an instruction to copy Arg into its assigned |
733 // register if applicable. | 707 // register if applicable. |
734 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, | 708 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
791 // | 765 // |
792 // The following variables record the size in bytes of the given areas: | 766 // The following variables record the size in bytes of the given areas: |
793 // * X86_RET_IP_SIZE_BYTES: area 1 | 767 // * X86_RET_IP_SIZE_BYTES: area 1 |
794 // * PreservedRegsSizeBytes: area 2 | 768 // * PreservedRegsSizeBytes: area 2 |
795 // * SpillAreaPaddingBytes: area 3 | 769 // * SpillAreaPaddingBytes: area 3 |
796 // * GlobalsSize: area 4 | 770 // * GlobalsSize: area 4 |
797 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 | 771 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 |
798 // * LocalsSpillAreaSize: area 6 | 772 // * LocalsSpillAreaSize: area 6 |
799 // * SpillAreaSizeBytes: areas 3 - 7 | 773 // * SpillAreaSizeBytes: areas 3 - 7 |
800 | 774 |
801 // Make a final pass over the Cfg to determine which variables need | |
802 // stack slots. | |
803 llvm::BitVector IsVarReferenced(Func->getNumVariables()); | |
804 for (CfgNode *Node : Func->getNodes()) { | |
805 for (Inst &Inst : Node->getInsts()) { | |
806 if (Inst.isDeleted()) | |
807 continue; | |
808 if (const Variable *Var = Inst.getDest()) | |
809 IsVarReferenced[Var->getIndex()] = true; | |
810 for (SizeT I = 0; I < Inst.getSrcSize(); ++I) { | |
811 Operand *Src = Inst.getSrc(I); | |
812 SizeT NumVars = Src->getNumVars(); | |
813 for (SizeT J = 0; J < NumVars; ++J) { | |
814 const Variable *Var = Src->getVar(J); | |
815 IsVarReferenced[Var->getIndex()] = true; | |
816 } | |
817 } | |
818 } | |
819 } | |
820 | |
821 // If SimpleCoalescing is false, each variable without a register | |
822 // gets its own unique stack slot, which leads to large stack | |
823 // frames. If SimpleCoalescing is true, then each "global" variable | |
824 // without a register gets its own slot, but "local" variable slots | |
825 // are reused across basic blocks. E.g., if A and B are local to | |
826 // block 1 and C is local to block 2, then C may share a slot with A or B. | |
827 // | |
828 // We cannot coalesce stack slots if this function calls a "returns twice" | |
829 // function. In that case, basic blocks may be revisited, and variables | |
830 // local to those basic blocks are actually live until after the | |
831 // called function returns a second time. | |
832 const bool SimpleCoalescing = !callsReturnsTwice(); | |
833 size_t InArgsSizeBytes = 0; | |
834 size_t PreservedRegsSizeBytes = 0; | |
835 SpillAreaSizeBytes = 0; | |
836 const VariablesMetadata *VMetadata = Func->getVMetadata(); | |
837 Context.init(Node); | |
838 Context.setInsertPoint(Context.getCur()); | |
839 | |
840 // Determine stack frame offsets for each Variable without a | 775 // Determine stack frame offsets for each Variable without a |
841 // register assignment. This can be done as one variable per stack | 776 // register assignment. This can be done as one variable per stack |
842 // slot. Or, do coalescing by running the register allocator again | 777 // slot. Or, do coalescing by running the register allocator again |
843 // with an infinite set of registers (as a side effect, this gives | 778 // with an infinite set of registers (as a side effect, this gives |
844 // variables a second chance at physical register assignment). | 779 // variables a second chance at physical register assignment). |
845 // | 780 // |
846 // A middle ground approach is to leverage sparsity and allocate one | 781 // A middle ground approach is to leverage sparsity and allocate one |
847 // block of space on the frame for globals (variables with | 782 // block of space on the frame for globals (variables with |
848 // multi-block lifetime), and one block to share for locals | 783 // multi-block lifetime), and one block to share for locals |
849 // (single-block lifetime). | 784 // (single-block lifetime). |
850 | 785 |
| 786 Context.init(Node); |
| 787 Context.setInsertPoint(Context.getCur()); |
| 788 |
851 llvm::SmallBitVector CalleeSaves = | 789 llvm::SmallBitVector CalleeSaves = |
852 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 790 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
853 | 791 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); |
| 792 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots; |
854 size_t GlobalsSize = 0; | 793 size_t GlobalsSize = 0; |
855 std::vector<size_t> LocalsSize(Func->getNumNodes()); | 794 // If there is a separate locals area, this represents that area. |
856 | 795 // Otherwise it counts any variable not counted by GlobalsSize. |
857 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and | 796 SpillAreaSizeBytes = 0; |
858 // SpillAreaSizeBytes. | |
859 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); | |
860 const VarList &Variables = Func->getVariables(); | |
861 const VarList &Args = Func->getArgs(); | |
862 VarList SpilledVariables, SortedSpilledVariables, VariablesLinkedToSpillSlots; | |
863 | |
864 // If there is a separate locals area, this specifies the alignment | 797 // If there is a separate locals area, this specifies the alignment |
865 // for it. | 798 // for it. |
866 uint32_t LocalsSlotsAlignmentBytes = 0; | 799 uint32_t LocalsSlotsAlignmentBytes = 0; |
867 // The entire spill locations area gets aligned to largest natural | 800 // The entire spill locations area gets aligned to largest natural |
868 // alignment of the variables that have a spill slot. | 801 // alignment of the variables that have a spill slot. |
869 uint32_t SpillAreaAlignmentBytes = 0; | 802 uint32_t SpillAreaAlignmentBytes = 0; |
870 for (Variable *Var : Variables) { | 803 // A spill slot linked to a variable with a stack slot should reuse |
871 if (Var->hasReg()) { | 804 // that stack slot. |
872 RegsUsed[Var->getRegNum()] = true; | 805 std::function<bool(Variable *)> TargetVarHook = |
873 continue; | 806 [&VariablesLinkedToSpillSlots](Variable *Var) { |
874 } | |
875 // An argument either does not need a stack slot (if passed in a | |
876 // register) or already has one (if passed on the stack). | |
877 if (Var->getIsArg()) | |
878 continue; | |
879 // An unreferenced variable doesn't need a stack slot. | |
880 if (!IsVarReferenced[Var->getIndex()]) | |
881 continue; | |
882 // A spill slot linked to a variable with a stack slot should reuse | |
883 // that stack slot. | |
884 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) { | 807 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) { |
885 assert(Var->getWeight().isZero()); | 808 assert(Var->getWeight().isZero()); |
886 if (!SpillVar->getLinkedTo()->hasReg()) { | 809 if (!SpillVar->getLinkedTo()->hasReg()) { |
887 VariablesLinkedToSpillSlots.push_back(Var); | 810 VariablesLinkedToSpillSlots.push_back(Var); |
888 continue; | 811 return true; |
889 } | 812 } |
890 } | 813 } |
891 SpilledVariables.push_back(Var); | 814 return false; |
892 } | 815 }; |
893 | 816 |
894 SortedSpilledVariables.reserve(SpilledVariables.size()); | 817 // Compute the list of spilled variables and bounds for GlobalsSize, etc. |
895 sortByAlignment(SortedSpilledVariables, SpilledVariables); | 818 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, |
896 for (Variable *Var : SortedSpilledVariables) { | 819 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, |
897 size_t Increment = typeWidthInBytesOnStack(Var->getType()); | 820 &LocalsSlotsAlignmentBytes, TargetVarHook); |
898 if (!SpillAreaAlignmentBytes) | |
899 SpillAreaAlignmentBytes = Increment; | |
900 if (SimpleCoalescing && VMetadata->isTracked(Var)) { | |
901 if (VMetadata->isMultiBlock(Var)) { | |
902 GlobalsSize += Increment; | |
903 } else { | |
904 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex(); | |
905 LocalsSize[NodeIndex] += Increment; | |
906 if (LocalsSize[NodeIndex] > SpillAreaSizeBytes) | |
907 SpillAreaSizeBytes = LocalsSize[NodeIndex]; | |
908 if (!LocalsSlotsAlignmentBytes) | |
909 LocalsSlotsAlignmentBytes = Increment; | |
910 } | |
911 } else { | |
912 SpillAreaSizeBytes += Increment; | |
913 } | |
914 } | |
915 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; | 821 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; |
916 | |
917 SpillAreaSizeBytes += GlobalsSize; | 822 SpillAreaSizeBytes += GlobalsSize; |
918 | 823 |
919 // Add push instructions for preserved registers. | 824 // Add push instructions for preserved registers. |
920 uint32_t NumCallee = 0; | 825 uint32_t NumCallee = 0; |
| 826 size_t PreservedRegsSizeBytes = 0; |
921 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 827 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
922 if (CalleeSaves[i] && RegsUsed[i]) { | 828 if (CalleeSaves[i] && RegsUsed[i]) { |
923 ++NumCallee; | 829 ++NumCallee; |
924 PreservedRegsSizeBytes += 4; | 830 PreservedRegsSizeBytes += 4; |
925 _push(getPhysicalRegister(i)); | 831 _push(getPhysicalRegister(i)); |
926 } | 832 } |
927 } | 833 } |
928 Ctx->statsUpdateRegistersSaved(NumCallee); | 834 Ctx->statsUpdateRegistersSaved(NumCallee); |
929 | 835 |
930 // Generate "push ebp; mov ebp, esp" | 836 // Generate "push ebp; mov ebp, esp" |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
980 resetStackAdjustment(); | 886 resetStackAdjustment(); |
981 | 887 |
982 // Fill in stack offsets for stack args, and copy args into registers | 888 // Fill in stack offsets for stack args, and copy args into registers |
983 // for those that were register-allocated. Args are pushed right to | 889 // for those that were register-allocated. Args are pushed right to |
984 // left, so Arg[0] is closest to the stack/frame pointer. | 890 // left, so Arg[0] is closest to the stack/frame pointer. |
985 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 891 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
986 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; | 892 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; |
987 if (!IsEbpBasedFrame) | 893 if (!IsEbpBasedFrame) |
988 BasicFrameOffset += SpillAreaSizeBytes; | 894 BasicFrameOffset += SpillAreaSizeBytes; |
989 | 895 |
| 896 const VarList &Args = Func->getArgs(); |
| 897 size_t InArgsSizeBytes = 0; |
990 unsigned NumXmmArgs = 0; | 898 unsigned NumXmmArgs = 0; |
991 for (SizeT i = 0; i < Args.size(); ++i) { | 899 for (Variable *Arg : Args) { |
992 Variable *Arg = Args[i]; | |
993 // Skip arguments passed in registers. | 900 // Skip arguments passed in registers. |
994 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { | 901 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { |
995 ++NumXmmArgs; | 902 ++NumXmmArgs; |
996 continue; | 903 continue; |
997 } | 904 } |
998 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 905 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
999 } | 906 } |
1000 | 907 |
1001 // Fill in stack offsets for locals. | 908 // Fill in stack offsets for locals. |
1002 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes; | 909 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, |
1003 LocalsSize.assign(LocalsSize.size(), 0); | 910 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, |
1004 size_t NextStackOffset = GlobalsSpaceUsed; | 911 IsEbpBasedFrame); |
1005 for (Variable *Var : SortedSpilledVariables) { | |
1006 size_t Increment = typeWidthInBytesOnStack(Var->getType()); | |
1007 if (SimpleCoalescing && VMetadata->isTracked(Var)) { | |
1008 if (VMetadata->isMultiBlock(Var)) { | |
1009 GlobalsSpaceUsed += Increment; | |
1010 NextStackOffset = GlobalsSpaceUsed; | |
1011 } else { | |
1012 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex(); | |
1013 LocalsSize[NodeIndex] += Increment; | |
1014 NextStackOffset = SpillAreaPaddingBytes + | |
1015 GlobalsAndSubsequentPaddingSize + | |
1016 LocalsSize[NodeIndex]; | |
1017 } | |
1018 } else { | |
1019 NextStackOffset += Increment; | |
1020 } | |
1021 if (IsEbpBasedFrame) | |
1022 Var->setStackOffset(-NextStackOffset); | |
1023 else | |
1024 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset); | |
1025 } | |
1026 this->HasComputedFrame = true; | |
1027 | |
1028 // Assign stack offsets to variables that have been linked to spilled | 912 // Assign stack offsets to variables that have been linked to spilled |
1029 // variables. | 913 // variables. |
1030 for (Variable *Var : VariablesLinkedToSpillSlots) { | 914 for (Variable *Var : VariablesLinkedToSpillSlots) { |
1031 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo(); | 915 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo(); |
1032 Var->setStackOffset(Linked->getStackOffset()); | 916 Var->setStackOffset(Linked->getStackOffset()); |
1033 } | 917 } |
| 918 this->HasComputedFrame = true; |
1034 | 919 |
1035 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) { | 920 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) { |
1036 OstreamLocker L(Func->getContext()); | 921 OstreamLocker L(Func->getContext()); |
1037 Ostream &Str = Func->getContext()->getStrDump(); | 922 Ostream &Str = Func->getContext()->getStrDump(); |
1038 | 923 |
1039 Str << "Stack layout:\n"; | 924 Str << "Stack layout:\n"; |
1040 uint32_t EspAdjustmentPaddingSize = | 925 uint32_t EspAdjustmentPaddingSize = |
1041 SpillAreaSizeBytes - LocalsSpillAreaSize - | 926 SpillAreaSizeBytes - LocalsSpillAreaSize - |
1042 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; | 927 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; |
1043 Str << " in-args = " << InArgsSizeBytes << " bytes\n" | 928 Str << " in-args = " << InArgsSizeBytes << " bytes\n" |
(...skipping 3988 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5032 case FT_Asm: | 4917 case FT_Asm: |
5033 case FT_Iasm: { | 4918 case FT_Iasm: { |
5034 OstreamLocker L(Ctx); | 4919 OstreamLocker L(Ctx); |
5035 emitConstantPool<PoolTypeConverter<float>>(Ctx); | 4920 emitConstantPool<PoolTypeConverter<float>>(Ctx); |
5036 emitConstantPool<PoolTypeConverter<double>>(Ctx); | 4921 emitConstantPool<PoolTypeConverter<double>>(Ctx); |
5037 } break; | 4922 } break; |
5038 } | 4923 } |
5039 } | 4924 } |
5040 | 4925 |
5041 } // end of namespace Ice | 4926 } // end of namespace Ice |
OLD | NEW |