Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(572)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1616483003: Merged addProlog and addEpilog on x86. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 856 matching lines...) Expand 10 before | Expand all | Expand 10 after
867 "Infinite-weight Variable has no register assigned"); 867 "Infinite-weight Variable has no register assigned");
868 } 868 }
869 int32_t Offset = Var->getStackOffset(); 869 int32_t Offset = Var->getStackOffset();
870 int32_t BaseRegNum = Var->getBaseRegNum(); 870 int32_t BaseRegNum = Var->getBaseRegNum();
871 if (Var->getBaseRegNum() == Variable::NoRegister) 871 if (Var->getBaseRegNum() == Variable::NoRegister)
872 BaseRegNum = getFrameOrStackReg(); 872 BaseRegNum = getFrameOrStackReg();
873 return X86Address(Traits::getEncodedGPR(BaseRegNum), Offset, 873 return X86Address(Traits::getEncodedGPR(BaseRegNum), Offset,
874 AssemblerFixup::NoFixup); 874 AssemblerFixup::NoFixup);
875 } 875 }
876 876
877 template <typename TraitsType>
878 void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
879 // Stack frame layout:
880 //
881 // +------------------------+
882 // | 1. return address |
883 // +------------------------+
884 // | 2. preserved registers |
885 // +------------------------+
886 // | 3. padding |
887 // +------------------------+
888 // | 4. global spill area |
889 // +------------------------+
890 // | 5. padding |
891 // +------------------------+
892 // | 6. local spill area |
893 // +------------------------+
894 // | 7. padding |
895 // +------------------------+
896 // | 8. allocas |
897 // +------------------------+
898 // | 9. padding |
899 // +------------------------+
900 // | 10. out args |
901 // +------------------------+ <--- StackPointer
902 //
903 // The following variables record the size in bytes of the given areas:
904 // * X86_RET_IP_SIZE_BYTES: area 1
905 // * PreservedRegsSizeBytes: area 2
906 // * SpillAreaPaddingBytes: area 3
907 // * GlobalsSize: area 4
908 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
909 // * LocalsSpillAreaSize: area 6
910 // * SpillAreaSizeBytes: areas 3 - 10
911 // * maxOutArgsSizeBytes(): area 10
912
913 // Determine stack frame offsets for each Variable without a register
914 // assignment. This can be done as one variable per stack slot. Or, do
915 // coalescing by running the register allocator again with an infinite set of
916 // registers (as a side effect, this gives variables a second chance at
917 // physical register assignment).
918 //
919 // A middle ground approach is to leverage sparsity and allocate one block of
920 // space on the frame for globals (variables with multi-block lifetime), and
921 // one block to share for locals (single-block lifetime).
922
923 Context.init(Node);
924 Context.setInsertPoint(Context.getCur());
925
926 llvm::SmallBitVector CalleeSaves =
927 getRegisterSet(RegSet_CalleeSave, RegSet_None);
928 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
929 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
930 size_t GlobalsSize = 0;
931 // If there is a separate locals area, this represents that area. Otherwise
932 // it counts any variable not counted by GlobalsSize.
933 SpillAreaSizeBytes = 0;
934 // If there is a separate locals area, this specifies the alignment for it.
935 uint32_t LocalsSlotsAlignmentBytes = 0;
936 // The entire spill locations area gets aligned to largest natural alignment
937 // of the variables that have a spill slot.
938 uint32_t SpillAreaAlignmentBytes = 0;
939 // A spill slot linked to a variable with a stack slot should reuse that
940 // stack slot.
941 std::function<bool(Variable *)> TargetVarHook =
942 [&VariablesLinkedToSpillSlots](Variable *Var) {
943 if (auto *SpillVar =
944 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
945 assert(Var->mustNotHaveReg());
946 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
947 VariablesLinkedToSpillSlots.push_back(Var);
948 return true;
949 }
950 }
951 return false;
952 };
953
954 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
955 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
956 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
957 &LocalsSlotsAlignmentBytes, TargetVarHook);
958 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
959 SpillAreaSizeBytes += GlobalsSize;
960
961 // Add push instructions for preserved registers.
962 uint32_t NumCallee = 0;
963 size_t PreservedRegsSizeBytes = 0;
964 llvm::SmallBitVector Pushed(CalleeSaves.size());
965 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
966 const int32_t Canonical = Traits::getBaseReg(i);
967 assert(Canonical == Traits::getBaseReg(Canonical));
968 if (CalleeSaves[i] && RegsUsed[i]) {
969 Pushed[Canonical] = true;
970 }
971 }
972 for (SizeT i = 0; i < Pushed.size(); ++i) {
973 if (!Pushed[i])
974 continue;
975 assert(static_cast<int32_t>(i) == Traits::getBaseReg(i));
976 ++NumCallee;
977 PreservedRegsSizeBytes += typeWidthInBytes(Traits::WordType);
978 _push_reg(getPhysicalRegister(i, Traits::WordType));
979 }
980 Ctx->statsUpdateRegistersSaved(NumCallee);
981
982 // Generate "push frameptr; mov frameptr, stackptr"
983 if (IsEbpBasedFrame) {
984 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
985 .count() == 0);
986 PreservedRegsSizeBytes += typeWidthInBytes(Traits::WordType);
987 _link_bp();
988 }
989
990 // Align the variables area. SpillAreaPaddingBytes is the size of the region
991 // after the preserved registers and before the spill areas.
992 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
993 // locals area if they are separate.
994 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
995 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
996 uint32_t SpillAreaPaddingBytes = 0;
997 uint32_t LocalsSlotsPaddingBytes = 0;
998 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
999 SpillAreaAlignmentBytes, GlobalsSize,
1000 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
1001 &LocalsSlotsPaddingBytes);
1002 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1003 uint32_t GlobalsAndSubsequentPaddingSize =
1004 GlobalsSize + LocalsSlotsPaddingBytes;
1005
1006 // Functions returning scalar floating point types may need to convert values
1007 // from an in-register xmm value to the top of the x87 floating point stack.
1008 // This is done by a movp[sd] and an fld[sd]. Ensure there is enough scratch
1009 // space on the stack for this.
1010 const Type ReturnType = Func->getReturnType();
1011 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM && isScalarFloatingType(ReturnType)) {
John 2016/01/21 15:04:52 optional: I personally prefer if (!Traits:BLAH) {
sehr 2016/01/21 16:02:10 Kernighan's "Elements of Programming Style" teache
1012 // Avoid misaligned double-precicion load/store.
1013 NeedsStackAlignment = true;
1014 SpillAreaSizeBytes =
1015 std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);
1016 }
1017
1018 // Align esp if necessary.
1019 if (NeedsStackAlignment) {
1020 uint32_t StackOffset =
1021 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
1022 uint32_t StackSize =
1023 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1024 StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes());
1025 SpillAreaSizeBytes = StackSize - StackOffset;
1026 } else {
1027 SpillAreaSizeBytes += maxOutArgsSizeBytes();
1028 }
1029
1030 // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
1031 // fixed allocations in the prolog.
1032 if (PrologEmitsFixedAllocas)
1033 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1034 if (SpillAreaSizeBytes) {
1035 // Generate "sub stackptr, SpillAreaSizeBytes"
1036 _sub_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
1037 // If the fixed allocas are aligned more than the stack frame, align the
1038 // stack pointer accordingly.
1039 if (PrologEmitsFixedAllocas &&
1040 FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {
1041 assert(IsEbpBasedFrame);
1042 _and(getPhysicalRegister(getStackReg(), Traits::WordType),
1043 Ctx->getConstantInt32(-FixedAllocaAlignBytes));
1044 }
1045 }
1046
1047 // Account for known-frame-offset alloca instructions that were not already
1048 // combined into the prolog.
1049 if (!PrologEmitsFixedAllocas)
1050 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1051
1052 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1053
1054 // Fill in stack offsets for stack args, and copy args into registers for
1055 // those that were register-allocated. Args are pushed right to left, so
1056 // Arg[0] is closest to the stack/frame pointer.
1057 Variable *FramePtr =
1058 getPhysicalRegister(getFrameOrStackReg(), Traits::WordType);
1059 size_t BasicFrameOffset =
1060 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
1061 if (!IsEbpBasedFrame)
1062 BasicFrameOffset += SpillAreaSizeBytes;
1063
1064 emitGetIP(Node);
1065
1066 const VarList &Args = Func->getArgs();
1067 size_t InArgsSizeBytes = 0;
1068 unsigned NumXmmArgs = 0;
1069 unsigned NumGPRArgs = 0;
1070 for (Variable *Arg : Args) {
1071 // Skip arguments passed in registers.
1072 if (isVectorType(Arg->getType())) {
1073 if (Traits::getRegisterForXmmArgNum(NumXmmArgs) != Variable::NoRegister) {
1074 ++NumXmmArgs;
1075 continue;
1076 }
1077 } else if (isScalarFloatingType(Arg->getType())) {
1078 if (Traits::X86_PASS_SCALAR_FP_IN_XMM &&
1079 Traits::getRegisterForXmmArgNum(NumXmmArgs) != Variable::NoRegister) {
1080 ++NumXmmArgs;
1081 continue;
1082 }
1083 } else {
1084 assert(isScalarIntegerType(Arg->getType()));
1085 if (Traits::getRegisterForGprArgNum(Traits::WordType, NumGPRArgs) !=
1086 Variable::NoRegister) {
1087 ++NumGPRArgs;
1088 continue;
1089 }
1090 }
1091 // For esp-based frames where the allocas are done outside the prolog, the
1092 // esp value may not stabilize to its home value until after all the
1093 // fixed-size alloca instructions have executed. In this case, a stack
1094 // adjustment is needed when accessing in-args in order to copy them into
1095 // registers.
1096 size_t StackAdjBytes = 0;
1097 if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas)
1098 StackAdjBytes -= FixedAllocaSizeBytes;
1099 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes,
1100 InArgsSizeBytes);
1101 }
1102
1103 // Fill in stack offsets for locals.
1104 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1105 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1106 IsEbpBasedFrame);
1107 // Assign stack offsets to variables that have been linked to spilled
1108 // variables.
1109 for (Variable *Var : VariablesLinkedToSpillSlots) {
1110 Variable *Linked =
1111 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
1112 Var->setStackOffset(Linked->getStackOffset());
1113 }
1114 this->HasComputedFrame = true;
1115
1116 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1117 OstreamLocker L(Func->getContext());
1118 Ostream &Str = Func->getContext()->getStrDump();
1119
1120 Str << "Stack layout:\n";
1121 uint32_t EspAdjustmentPaddingSize =
1122 SpillAreaSizeBytes - LocalsSpillAreaSize -
1123 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1124 maxOutArgsSizeBytes();
1125 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1126 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
1127 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1128 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1129 << " globals spill area = " << GlobalsSize << " bytes\n"
1130 << " globals-locals spill areas intermediate padding = "
1131 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1132 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1133 << " esp alignment padding = " << EspAdjustmentPaddingSize
1134 << " bytes\n";
1135
1136 Str << "Stack details:\n"
1137 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
1138 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1139 << " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n"
1140 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1141 << " bytes\n"
1142 << " is ebp based = " << IsEbpBasedFrame << "\n";
1143 }
1144 }
1145
877 /// Helper function for addProlog(). 1146 /// Helper function for addProlog().
878 /// 1147 ///
879 /// This assumes Arg is an argument passed on the stack. This sets the frame 1148 /// This assumes Arg is an argument passed on the stack. This sets the frame
880 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an 1149 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
881 /// I64 arg that has been split into Lo and Hi components, it calls itself 1150 /// I64 arg that has been split into Lo and Hi components, it calls itself
882 /// recursively on the components, taking care to handle Lo first because of the 1151 /// recursively on the components, taking care to handle Lo first because of the
883 /// little-endian architecture. Lastly, this function generates an instruction 1152 /// little-endian architecture. Lastly, this function generates an instruction
884 /// to copy Arg into its assigned register if applicable. 1153 /// to copy Arg into its assigned register if applicable.
885 template <typename TraitsType> 1154 template <typename TraitsType>
886 void TargetX86Base<TraitsType>::finishArgumentLowering( 1155 void TargetX86Base<TraitsType>::finishArgumentLowering(
(...skipping 26 matching lines...) Expand all
913 } else { 1182 } else {
914 _mov(Arg, Mem); 1183 _mov(Arg, Mem);
915 } 1184 }
916 // This argument-copying instruction uses an explicit X86OperandMem 1185 // This argument-copying instruction uses an explicit X86OperandMem
917 // operand instead of a Variable, so its fill-from-stack operation has to 1186 // operand instead of a Variable, so its fill-from-stack operation has to
918 // be tracked separately for statistics. 1187 // be tracked separately for statistics.
919 Ctx->statsUpdateFills(); 1188 Ctx->statsUpdateFills();
920 } 1189 }
921 } 1190 }
922 1191
1192 template <typename TraitsType>
1193 void TargetX86Base<TraitsType>::addEpilog(CfgNode *Node) {
1194 InstList &Insts = Node->getInsts();
1195 InstList::reverse_iterator RI, E;
1196 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1197 if (llvm::isa<typename Traits::Insts::Ret>(*RI))
1198 break;
1199 }
1200 if (RI == E)
1201 return;
1202
1203 // Convert the reverse_iterator position into its corresponding (forward)
1204 // iterator position.
1205 InstList::iterator InsertPoint = RI.base();
1206 --InsertPoint;
1207 Context.init(Node);
1208 Context.setInsertPoint(InsertPoint);
1209
1210 if (IsEbpBasedFrame) {
1211 _unlink_bp();
1212 } else {
1213 // add stackptr, SpillAreaSizeBytes
1214 if (SpillAreaSizeBytes != 0) {
1215 _add_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
1216 }
1217 }
1218
1219 // Add pop instructions for preserved registers.
1220 llvm::SmallBitVector CalleeSaves =
1221 getRegisterSet(RegSet_CalleeSave, RegSet_None);
1222 llvm::SmallBitVector Popped(CalleeSaves.size());
1223 for (int32_t i = CalleeSaves.size() - 1; i >= 0; --i) {
1224 if (static_cast<SizeT>(i) == getFrameReg() && IsEbpBasedFrame)
1225 continue;
1226 const SizeT Canonical = Traits::getBaseReg(i);
1227 if (CalleeSaves[i] && RegsUsed[i]) {
1228 Popped[Canonical] = true;
1229 }
1230 }
1231 for (int32_t i = Popped.size() - 1; i >= 0; --i) {
1232 if (!Popped[i])
1233 continue;
1234 assert(i == Traits::getBaseReg(i));
1235 _pop(getPhysicalRegister(i, Traits::WordType));
1236 }
1237
1238 if (!NeedSandboxing) {
1239 return;
1240 }
1241 emitSandboxedReturn();
1242 if (RI->getSrcSize()) {
1243 auto *RetValue = llvm::cast<Variable>(RI->getSrc(0));
1244 Context.insert<InstFakeUse>(RetValue);
1245 }
1246 RI->setDeleted();
1247 }
1248
923 template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() { 1249 template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() {
924 return Traits::WordType; 1250 return Traits::WordType;
925 } 1251 }
926 1252
927 template <typename TraitsType> 1253 template <typename TraitsType>
928 template <typename T> 1254 template <typename T>
929 typename std::enable_if<!T::Is64Bit, Operand>::type * 1255 typename std::enable_if<!T::Is64Bit, Operand>::type *
930 TargetX86Base<TraitsType>::loOperand(Operand *Operand) { 1256 TargetX86Base<TraitsType>::loOperand(Operand *Operand) {
931 assert(Operand->getType() == IceType_i64 || 1257 assert(Operand->getType() == IceType_i64 ||
932 Operand->getType() == IceType_f64); 1258 Operand->getType() == IceType_f64);
(...skipping 5965 matching lines...) Expand 10 before | Expand all | Expand 10 after
6898 Func, MemOperand->getType(), RegTemp, nullptr, MemOperand->getIndex(), 7224 Func, MemOperand->getType(), RegTemp, nullptr, MemOperand->getIndex(),
6899 MemOperand->getShift(), MemOperand->getSegmentRegister()); 7225 MemOperand->getShift(), MemOperand->getSegmentRegister());
6900 return NewMemOperand; 7226 return NewMemOperand;
6901 } 7227 }
6902 } 7228 }
6903 } 7229 }
6904 } // end of namespace X86NAMESPACE 7230 } // end of namespace X86NAMESPACE
6905 } // end of namespace Ice 7231 } // end of namespace Ice
6906 7232
6907 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 7233 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« src/IceTargetLoweringX86Base.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698