Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1159013002: Subzero ARM: addProlog/addEpilogue -- share some code with x86. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: clang-format and add comment Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
124 } 124 }
125 125
126 // The maximum number of arguments to pass in XMM registers 126 // The maximum number of arguments to pass in XMM registers
127 const uint32_t X86_MAX_XMM_ARGS = 4; 127 const uint32_t X86_MAX_XMM_ARGS = 4;
128 // The number of bits in a byte 128 // The number of bits in a byte
129 const uint32_t X86_CHAR_BIT = 8; 129 const uint32_t X86_CHAR_BIT = 8;
130 // Stack alignment 130 // Stack alignment
131 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; 131 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
132 // Size of the return address on the stack 132 // Size of the return address on the stack
133 const uint32_t X86_RET_IP_SIZE_BYTES = 4; 133 const uint32_t X86_RET_IP_SIZE_BYTES = 4;
134 // The base 2 logarithm of the width in bytes of the smallest stack slot
135 const uint32_t X86_LOG2_OF_MIN_STACK_SLOT_SIZE = 2;
136 // The base 2 logarithm of the width in bytes of the largest stack slot
137 const uint32_t X86_LOG2_OF_MAX_STACK_SLOT_SIZE = 4;
138 // The number of different NOP instructions 134 // The number of different NOP instructions
139 const uint32_t X86_NUM_NOP_VARIANTS = 5; 135 const uint32_t X86_NUM_NOP_VARIANTS = 5;
140 136
141 // Value is in bytes. Return Value adjusted to the next highest multiple 137 // Value is in bytes. Return Value adjusted to the next highest multiple
142 // of the stack alignment. 138 // of the stack alignment.
143 uint32_t applyStackAlignment(uint32_t Value) { 139 uint32_t applyStackAlignment(uint32_t Value) {
144 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); 140 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
145 } 141 }
146 142
147 // In some cases, there are x-macros tables for both high-level and 143 // In some cases, there are x-macros tables for both high-level and
(...skipping 545 matching lines...) Expand 10 before | Expand all | Expand 10 after
693 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); 689 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
694 RegisterArg->setRegNum(RegNum); 690 RegisterArg->setRegNum(RegNum);
695 RegisterArg->setIsArg(); 691 RegisterArg->setIsArg();
696 Arg->setIsArg(false); 692 Arg->setIsArg(false);
697 693
698 Args[I] = RegisterArg; 694 Args[I] = RegisterArg;
699 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); 695 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
700 } 696 }
701 } 697 }
702 698
703 void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const {
704 // Sort the variables into buckets according to the log of their width
705 // in bytes.
706 const SizeT NumBuckets =
707 X86_LOG2_OF_MAX_STACK_SLOT_SIZE - X86_LOG2_OF_MIN_STACK_SLOT_SIZE + 1;
708 VarList Buckets[NumBuckets];
709
710 for (Variable *Var : Source) {
711 uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType());
712 SizeT LogNaturalAlignment = llvm::findFirstSet(NaturalAlignment);
713 assert(LogNaturalAlignment >= X86_LOG2_OF_MIN_STACK_SLOT_SIZE);
714 assert(LogNaturalAlignment <= X86_LOG2_OF_MAX_STACK_SLOT_SIZE);
715 SizeT BucketIndex = LogNaturalAlignment - X86_LOG2_OF_MIN_STACK_SLOT_SIZE;
716 Buckets[BucketIndex].push_back(Var);
717 }
718
719 for (SizeT I = 0, E = NumBuckets; I < E; ++I) {
720 VarList &List = Buckets[NumBuckets - I - 1];
721 Dest.insert(Dest.end(), List.begin(), List.end());
722 }
723 }
724
725 // Helper function for addProlog(). 699 // Helper function for addProlog().
726 // 700 //
727 // This assumes Arg is an argument passed on the stack. This sets the 701 // This assumes Arg is an argument passed on the stack. This sets the
728 // frame offset for Arg and updates InArgsSizeBytes according to Arg's 702 // frame offset for Arg and updates InArgsSizeBytes according to Arg's
729 // width. For an I64 arg that has been split into Lo and Hi components, 703 // width. For an I64 arg that has been split into Lo and Hi components,
730 // it calls itself recursively on the components, taking care to handle 704 // it calls itself recursively on the components, taking care to handle
731 // Lo first because of the little-endian architecture. Lastly, this 705 // Lo first because of the little-endian architecture. Lastly, this
732 // function generates an instruction to copy Arg into its assigned 706 // function generates an instruction to copy Arg into its assigned
733 // register if applicable. 707 // register if applicable.
734 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, 708 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
791 // 765 //
792 // The following variables record the size in bytes of the given areas: 766 // The following variables record the size in bytes of the given areas:
793 // * X86_RET_IP_SIZE_BYTES: area 1 767 // * X86_RET_IP_SIZE_BYTES: area 1
794 // * PreservedRegsSizeBytes: area 2 768 // * PreservedRegsSizeBytes: area 2
795 // * SpillAreaPaddingBytes: area 3 769 // * SpillAreaPaddingBytes: area 3
796 // * GlobalsSize: area 4 770 // * GlobalsSize: area 4
797 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 771 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
798 // * LocalsSpillAreaSize: area 6 772 // * LocalsSpillAreaSize: area 6
799 // * SpillAreaSizeBytes: areas 3 - 7 773 // * SpillAreaSizeBytes: areas 3 - 7
800 774
801 // Make a final pass over the Cfg to determine which variables need
802 // stack slots.
803 llvm::BitVector IsVarReferenced(Func->getNumVariables());
804 for (CfgNode *Node : Func->getNodes()) {
805 for (Inst &Inst : Node->getInsts()) {
806 if (Inst.isDeleted())
807 continue;
808 if (const Variable *Var = Inst.getDest())
809 IsVarReferenced[Var->getIndex()] = true;
810 for (SizeT I = 0; I < Inst.getSrcSize(); ++I) {
811 Operand *Src = Inst.getSrc(I);
812 SizeT NumVars = Src->getNumVars();
813 for (SizeT J = 0; J < NumVars; ++J) {
814 const Variable *Var = Src->getVar(J);
815 IsVarReferenced[Var->getIndex()] = true;
816 }
817 }
818 }
819 }
820
821 // If SimpleCoalescing is false, each variable without a register
822 // gets its own unique stack slot, which leads to large stack
823 // frames. If SimpleCoalescing is true, then each "global" variable
824 // without a register gets its own slot, but "local" variable slots
825 // are reused across basic blocks. E.g., if A and B are local to
826 // block 1 and C is local to block 2, then C may share a slot with A or B.
827 //
828 // We cannot coalesce stack slots if this function calls a "returns twice"
829 // function. In that case, basic blocks may be revisited, and variables
830 // local to those basic blocks are actually live until after the
831 // called function returns a second time.
832 const bool SimpleCoalescing = !callsReturnsTwice();
833 size_t InArgsSizeBytes = 0;
834 size_t PreservedRegsSizeBytes = 0;
835 SpillAreaSizeBytes = 0;
836 const VariablesMetadata *VMetadata = Func->getVMetadata();
837 Context.init(Node);
838 Context.setInsertPoint(Context.getCur());
839
840 // Determine stack frame offsets for each Variable without a 775 // Determine stack frame offsets for each Variable without a
841 // register assignment. This can be done as one variable per stack 776 // register assignment. This can be done as one variable per stack
842 // slot. Or, do coalescing by running the register allocator again 777 // slot. Or, do coalescing by running the register allocator again
843 // with an infinite set of registers (as a side effect, this gives 778 // with an infinite set of registers (as a side effect, this gives
844 // variables a second chance at physical register assignment). 779 // variables a second chance at physical register assignment).
845 // 780 //
846 // A middle ground approach is to leverage sparsity and allocate one 781 // A middle ground approach is to leverage sparsity and allocate one
847 // block of space on the frame for globals (variables with 782 // block of space on the frame for globals (variables with
848 // multi-block lifetime), and one block to share for locals 783 // multi-block lifetime), and one block to share for locals
849 // (single-block lifetime). 784 // (single-block lifetime).
850 785
786 Context.init(Node);
787 Context.setInsertPoint(Context.getCur());
788
851 llvm::SmallBitVector CalleeSaves = 789 llvm::SmallBitVector CalleeSaves =
852 getRegisterSet(RegSet_CalleeSave, RegSet_None); 790 getRegisterSet(RegSet_CalleeSave, RegSet_None);
853 791 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
792 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
854 size_t GlobalsSize = 0; 793 size_t GlobalsSize = 0;
855 std::vector<size_t> LocalsSize(Func->getNumNodes()); 794 // If there is a separate locals area, this represents that area.
856 795 // Otherwise it counts any variable not counted by GlobalsSize.
857 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and 796 SpillAreaSizeBytes = 0;
858 // SpillAreaSizeBytes.
859 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
860 const VarList &Variables = Func->getVariables();
861 const VarList &Args = Func->getArgs();
862 VarList SpilledVariables, SortedSpilledVariables, VariablesLinkedToSpillSlots;
863
864 // If there is a separate locals area, this specifies the alignment 797 // If there is a separate locals area, this specifies the alignment
865 // for it. 798 // for it.
866 uint32_t LocalsSlotsAlignmentBytes = 0; 799 uint32_t LocalsSlotsAlignmentBytes = 0;
867 // The entire spill locations area gets aligned to largest natural 800 // The entire spill locations area gets aligned to largest natural
868 // alignment of the variables that have a spill slot. 801 // alignment of the variables that have a spill slot.
869 uint32_t SpillAreaAlignmentBytes = 0; 802 uint32_t SpillAreaAlignmentBytes = 0;
870 for (Variable *Var : Variables) { 803 // A spill slot linked to a variable with a stack slot should reuse
871 if (Var->hasReg()) { 804 // that stack slot.
872 RegsUsed[Var->getRegNum()] = true; 805 std::function<bool(Variable *)> TargetVarHook =
873 continue; 806 [&VariablesLinkedToSpillSlots](Variable *Var) {
874 }
875 // An argument either does not need a stack slot (if passed in a
876 // register) or already has one (if passed on the stack).
877 if (Var->getIsArg())
878 continue;
879 // An unreferenced variable doesn't need a stack slot.
880 if (!IsVarReferenced[Var->getIndex()])
881 continue;
882 // A spill slot linked to a variable with a stack slot should reuse
883 // that stack slot.
884 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) { 807 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
885 assert(Var->getWeight().isZero()); 808 assert(Var->getWeight().isZero());
886 if (!SpillVar->getLinkedTo()->hasReg()) { 809 if (!SpillVar->getLinkedTo()->hasReg()) {
887 VariablesLinkedToSpillSlots.push_back(Var); 810 VariablesLinkedToSpillSlots.push_back(Var);
888 continue; 811 return true;
889 } 812 }
890 } 813 }
891 SpilledVariables.push_back(Var); 814 return false;
892 } 815 };
893 816
894 SortedSpilledVariables.reserve(SpilledVariables.size()); 817 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
895 sortByAlignment(SortedSpilledVariables, SpilledVariables); 818 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
896 for (Variable *Var : SortedSpilledVariables) { 819 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
897 size_t Increment = typeWidthInBytesOnStack(Var->getType()); 820 &LocalsSlotsAlignmentBytes, TargetVarHook);
898 if (!SpillAreaAlignmentBytes)
899 SpillAreaAlignmentBytes = Increment;
900 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
901 if (VMetadata->isMultiBlock(Var)) {
902 GlobalsSize += Increment;
903 } else {
904 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
905 LocalsSize[NodeIndex] += Increment;
906 if (LocalsSize[NodeIndex] > SpillAreaSizeBytes)
907 SpillAreaSizeBytes = LocalsSize[NodeIndex];
908 if (!LocalsSlotsAlignmentBytes)
909 LocalsSlotsAlignmentBytes = Increment;
910 }
911 } else {
912 SpillAreaSizeBytes += Increment;
913 }
914 }
915 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; 821 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
916
917 SpillAreaSizeBytes += GlobalsSize; 822 SpillAreaSizeBytes += GlobalsSize;
918 823
919 // Add push instructions for preserved registers. 824 // Add push instructions for preserved registers.
920 uint32_t NumCallee = 0; 825 uint32_t NumCallee = 0;
826 size_t PreservedRegsSizeBytes = 0;
921 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 827 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
922 if (CalleeSaves[i] && RegsUsed[i]) { 828 if (CalleeSaves[i] && RegsUsed[i]) {
923 ++NumCallee; 829 ++NumCallee;
924 PreservedRegsSizeBytes += 4; 830 PreservedRegsSizeBytes += 4;
925 _push(getPhysicalRegister(i)); 831 _push(getPhysicalRegister(i));
926 } 832 }
927 } 833 }
928 Ctx->statsUpdateRegistersSaved(NumCallee); 834 Ctx->statsUpdateRegistersSaved(NumCallee);
929 835
930 // Generate "push ebp; mov ebp, esp" 836 // Generate "push ebp; mov ebp, esp"
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
980 resetStackAdjustment(); 886 resetStackAdjustment();
981 887
982 // Fill in stack offsets for stack args, and copy args into registers 888 // Fill in stack offsets for stack args, and copy args into registers
983 // for those that were register-allocated. Args are pushed right to 889 // for those that were register-allocated. Args are pushed right to
984 // left, so Arg[0] is closest to the stack/frame pointer. 890 // left, so Arg[0] is closest to the stack/frame pointer.
985 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 891 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
986 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; 892 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
987 if (!IsEbpBasedFrame) 893 if (!IsEbpBasedFrame)
988 BasicFrameOffset += SpillAreaSizeBytes; 894 BasicFrameOffset += SpillAreaSizeBytes;
989 895
896 const VarList &Args = Func->getArgs();
897 size_t InArgsSizeBytes = 0;
990 unsigned NumXmmArgs = 0; 898 unsigned NumXmmArgs = 0;
991 for (SizeT i = 0; i < Args.size(); ++i) { 899 for (Variable *Arg : Args) {
992 Variable *Arg = Args[i];
993 // Skip arguments passed in registers. 900 // Skip arguments passed in registers.
994 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { 901 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
995 ++NumXmmArgs; 902 ++NumXmmArgs;
996 continue; 903 continue;
997 } 904 }
998 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); 905 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
999 } 906 }
1000 907
1001 // Fill in stack offsets for locals. 908 // Fill in stack offsets for locals.
1002 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes; 909 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1003 LocalsSize.assign(LocalsSize.size(), 0); 910 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1004 size_t NextStackOffset = GlobalsSpaceUsed; 911 IsEbpBasedFrame);
1005 for (Variable *Var : SortedSpilledVariables) {
1006 size_t Increment = typeWidthInBytesOnStack(Var->getType());
1007 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
1008 if (VMetadata->isMultiBlock(Var)) {
1009 GlobalsSpaceUsed += Increment;
1010 NextStackOffset = GlobalsSpaceUsed;
1011 } else {
1012 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
1013 LocalsSize[NodeIndex] += Increment;
1014 NextStackOffset = SpillAreaPaddingBytes +
1015 GlobalsAndSubsequentPaddingSize +
1016 LocalsSize[NodeIndex];
1017 }
1018 } else {
1019 NextStackOffset += Increment;
1020 }
1021 if (IsEbpBasedFrame)
1022 Var->setStackOffset(-NextStackOffset);
1023 else
1024 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
1025 }
1026 this->HasComputedFrame = true;
1027
1028 // Assign stack offsets to variables that have been linked to spilled 912 // Assign stack offsets to variables that have been linked to spilled
1029 // variables. 913 // variables.
1030 for (Variable *Var : VariablesLinkedToSpillSlots) { 914 for (Variable *Var : VariablesLinkedToSpillSlots) {
1031 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo(); 915 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();
1032 Var->setStackOffset(Linked->getStackOffset()); 916 Var->setStackOffset(Linked->getStackOffset());
1033 } 917 }
918 this->HasComputedFrame = true;
1034 919
1035 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) { 920 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {
1036 OstreamLocker L(Func->getContext()); 921 OstreamLocker L(Func->getContext());
1037 Ostream &Str = Func->getContext()->getStrDump(); 922 Ostream &Str = Func->getContext()->getStrDump();
1038 923
1039 Str << "Stack layout:\n"; 924 Str << "Stack layout:\n";
1040 uint32_t EspAdjustmentPaddingSize = 925 uint32_t EspAdjustmentPaddingSize =
1041 SpillAreaSizeBytes - LocalsSpillAreaSize - 926 SpillAreaSizeBytes - LocalsSpillAreaSize -
1042 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; 927 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
1043 Str << " in-args = " << InArgsSizeBytes << " bytes\n" 928 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
(...skipping 3988 matching lines...) Expand 10 before | Expand all | Expand 10 after
5032 case FT_Asm: 4917 case FT_Asm:
5033 case FT_Iasm: { 4918 case FT_Iasm: {
5034 OstreamLocker L(Ctx); 4919 OstreamLocker L(Ctx);
5035 emitConstantPool<PoolTypeConverter<float>>(Ctx); 4920 emitConstantPool<PoolTypeConverter<float>>(Ctx);
5036 emitConstantPool<PoolTypeConverter<double>>(Ctx); 4921 emitConstantPool<PoolTypeConverter<double>>(Ctx);
5037 } break; 4922 } break;
5038 } 4923 }
5039 } 4924 }
5040 4925
5041 } // end of namespace Ice 4926 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698