Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(391)

Side by Side Diff: src/IceTargetLoweringMIPS32.cpp

Issue 2067183002: [Subzero][MIPS32] Implements lowering of alloca instruction (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringMIPS32.h ('k') | tests_lit/llvm2ice_tests/alloc.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // 1 //
2 // The Subzero Code Generator 2 // The Subzero Code Generator
3 // 3 //
4 // This file is distributed under the University of Illinois Open Source 4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details. 5 // License. See LICENSE.TXT for details.
6 // 6 //
7 //===----------------------------------------------------------------------===// 7 //===----------------------------------------------------------------------===//
8 /// 8 ///
9 /// \file 9 /// \file
10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost 10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after
156 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; 156 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
157 157
158 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i) 158 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
159 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i]; 159 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
160 160
161 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet, 161 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
162 llvm::array_lengthof(TypeToRegisterSet), 162 llvm::array_lengthof(TypeToRegisterSet),
163 RegMIPS32::getRegName, getRegClassName); 163 RegMIPS32::getRegName, getRegClassName);
164 } 164 }
165 165
166 void TargetMIPS32::unsetIfNonLeafFunc() {
Jim Stichnoth 2016/06/15 13:41:19 I think that the ARM target folds this computation
Jim Stichnoth 2016/06/15 13:43:19 Actually, it just occurred to me that statisticall
sagar.thakur 2016/06/16 08:05:54 This pass is needed because findMaxStackOutArgsSiz
167 for (CfgNode *Node : Func->getNodes()) {
168 for (Inst &Instr : Node->getInsts()) {
169 if (llvm::isa<InstCall>(&Instr)) {
170 // Unset MaybeLeafFunc if call instruction exists.
171 MaybeLeafFunc = false;
172 break;
Jim Stichnoth 2016/06/15 13:41:19 Can you just early-return here, and avoid the brea
sagar.thakur 2016/06/16 08:05:54 Done.
173 }
174 }
175 if (!MaybeLeafFunc)
176 break;
177 }
178 }
179
180 uint32_t TargetMIPS32::getStackAlignment() const {
181 return MIPS32_STACK_ALIGNMENT_BYTES;
182 }
183
166 void TargetMIPS32::findMaxStackOutArgsSize() { 184 void TargetMIPS32::findMaxStackOutArgsSize() {
167 // MinNeededOutArgsBytes should be updated if the Target ever creates a 185 // MinNeededOutArgsBytes should be updated if the Target ever creates a
168 // high-level InstCall that requires more stack bytes. 186 // high-level InstCall that requires more stack bytes.
169 constexpr size_t MinNeededOutArgsBytes = 16; 187 size_t MinNeededOutArgsBytes = 0;
188 if (!MaybeLeafFunc)
189 MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
170 MaxOutArgsSizeBytes = MinNeededOutArgsBytes; 190 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
171 for (CfgNode *Node : Func->getNodes()) { 191 for (CfgNode *Node : Func->getNodes()) {
172 Context.init(Node); 192 Context.init(Node);
173 while (!Context.atEnd()) { 193 while (!Context.atEnd()) {
174 PostIncrLoweringContext PostIncrement(Context); 194 PostIncrLoweringContext PostIncrement(Context);
175 Inst *CurInstr = iteratorToInst(Context.getCur()); 195 Inst *CurInstr = iteratorToInst(Context.getCur());
176 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { 196 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
177 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); 197 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
178 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); 198 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
179 } 199 }
180 } 200 }
181 } 201 }
182 } 202 }
183 203
184 void TargetMIPS32::translateO2() { 204 void TargetMIPS32::translateO2() {
185 TimerMarker T(TimerStack::TT_O2, Func); 205 TimerMarker T(TimerStack::TT_O2, Func);
186 206
187 // TODO(stichnot): share passes with X86? 207 // TODO(stichnot): share passes with X86?
188 // https://code.google.com/p/nativeclient/issues/detail?id=4094 208 // https://code.google.com/p/nativeclient/issues/detail?id=4094
189 genTargetHelperCalls(); 209 genTargetHelperCalls();
190 210
211 unsetIfNonLeafFunc();
212
191 findMaxStackOutArgsSize(); 213 findMaxStackOutArgsSize();
192 214
193 // Merge Alloca instructions, and lay out the stack. 215 // Merge Alloca instructions, and lay out the stack.
194 static constexpr bool SortAndCombineAllocas = false; 216 static constexpr bool SortAndCombineAllocas = true;
195 Func->processAllocas(SortAndCombineAllocas); 217 Func->processAllocas(SortAndCombineAllocas);
196 Func->dump("After Alloca processing"); 218 Func->dump("After Alloca processing");
197 219
198 if (!getFlags().getEnablePhiEdgeSplit()) { 220 if (!getFlags().getEnablePhiEdgeSplit()) {
199 // Lower Phi instructions. 221 // Lower Phi instructions.
200 Func->placePhiLoads(); 222 Func->placePhiLoads();
201 if (Func->hasError()) 223 if (Func->hasError())
202 return; 224 return;
203 Func->placePhiStores(); 225 Func->placePhiStores();
204 if (Func->hasError()) 226 if (Func->hasError())
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
284 Func->doNopInsertion(); 306 Func->doNopInsertion();
285 } 307 }
286 } 308 }
287 309
288 void TargetMIPS32::translateOm1() { 310 void TargetMIPS32::translateOm1() {
289 TimerMarker T(TimerStack::TT_Om1, Func); 311 TimerMarker T(TimerStack::TT_Om1, Func);
290 312
291 // TODO: share passes with X86? 313 // TODO: share passes with X86?
292 genTargetHelperCalls(); 314 genTargetHelperCalls();
293 315
316 unsetIfNonLeafFunc();
317
294 findMaxStackOutArgsSize(); 318 findMaxStackOutArgsSize();
295 319
296 // Do not merge Alloca instructions, and lay out the stack. 320 // Do not merge Alloca instructions, and lay out the stack.
297 static constexpr bool SortAndCombineAllocas = false; 321 static constexpr bool SortAndCombineAllocas = false;
298 Func->processAllocas(SortAndCombineAllocas); 322 Func->processAllocas(SortAndCombineAllocas);
299 Func->dump("After Alloca processing"); 323 Func->dump("After Alloca processing");
300 324
301 Func->placePhiLoads(); 325 Func->placePhiLoads();
302 if (Func->hasError()) 326 if (Func->hasError())
303 return; 327 return;
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after
434 // transformation. 458 // transformation.
435 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { 459 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
436 return Mem; 460 return Mem;
437 } 461 }
438 462
439 // If we didn't do address mode optimization, then we only have a base/offset 463 // If we didn't do address mode optimization, then we only have a base/offset
440 // to work with. MIPS always requires a base register, so just use that to 464 // to work with. MIPS always requires a base register, so just use that to
441 // hold the operand. 465 // hold the operand.
442 auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg)); 466 auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg));
443 return OperandMIPS32Mem::create( 467 return OperandMIPS32Mem::create(
444 Func, Ty, Base, 468 Func, Ty, Base, llvm::cast<ConstantInteger32>(
445 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); 469 Ctx->getConstantInt32(Base->getStackOffset())));
446 } 470 }
447 471
448 void TargetMIPS32::emitVariable(const Variable *Var) const { 472 void TargetMIPS32::emitVariable(const Variable *Var) const {
449 if (!BuildDefs::dump()) 473 if (!BuildDefs::dump())
450 return; 474 return;
451 Ostream &Str = Ctx->getStrEmit(); 475 Ostream &Str = Ctx->getStrEmit();
452 const Type FrameSPTy = IceType_i32; 476 const Type FrameSPTy = IceType_i32;
453 if (Var->hasReg()) { 477 if (Var->hasReg()) {
454 Str << '$' << getRegName(Var->getRegNum(), Var->getType()); 478 Str << '$' << getRegName(Var->getRegNum(), Var->getType());
455 return; 479 return;
(...skipping 345 matching lines...) Expand 10 before | Expand all | Expand 10 after
801 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); 825 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
802 uint32_t SpillAreaPaddingBytes = 0; 826 uint32_t SpillAreaPaddingBytes = 0;
803 uint32_t LocalsSlotsPaddingBytes = 0; 827 uint32_t LocalsSlotsPaddingBytes = 0;
804 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, 828 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
805 GlobalsSize, LocalsSlotsAlignmentBytes, 829 GlobalsSize, LocalsSlotsAlignmentBytes,
806 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); 830 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
807 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; 831 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
808 uint32_t GlobalsAndSubsequentPaddingSize = 832 uint32_t GlobalsAndSubsequentPaddingSize =
809 GlobalsSize + LocalsSlotsPaddingBytes; 833 GlobalsSize + LocalsSlotsPaddingBytes;
810 834
811 if (MaybeLeafFunc)
812 MaxOutArgsSizeBytes = 0;
813
814 // Adds the out args space to the stack, and align SP if necessary. 835 // Adds the out args space to the stack, and align SP if necessary.
815 uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes; 836 uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes +
816 837 FixedAllocaSizeBytes + MaxOutArgsSizeBytes;
817 // TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with
818 // TotalStackSizeBytes once lowerAlloca is implemented and leaf function
819 // information is generated by lowerCall.
820 838
821 // Generate "addiu sp, sp, -TotalStackSizeBytes" 839 // Generate "addiu sp, sp, -TotalStackSizeBytes"
822 if (TotalStackSizeBytes) { 840 if (TotalStackSizeBytes) {
823 // Use the scratch register if needed to legalize the immediate. 841 // Use the scratch register if needed to legalize the immediate.
824 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); 842 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
825 _addiu(SP, SP, -(TotalStackSizeBytes)); 843 _addiu(SP, SP, -(TotalStackSizeBytes));
826 } 844 }
827 845
828 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes); 846 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);
829 847
(...skipping 17 matching lines...) Expand all
847 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); 865 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
848 _mov(FP, SP); 866 _mov(FP, SP);
849 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). 867 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
850 Context.insert<InstFakeUse>(FP); 868 Context.insert<InstFakeUse>(FP);
851 } 869 }
852 870
853 // Fill in stack offsets for stack args, and copy args into registers for 871 // Fill in stack offsets for stack args, and copy args into registers for
854 // those that were register-allocated. Args are pushed right to left, so 872 // those that were register-allocated. Args are pushed right to left, so
855 // Arg[0] is closest to the stack/frame pointer. 873 // Arg[0] is closest to the stack/frame pointer.
856 const VarList &Args = Func->getArgs(); 874 const VarList &Args = Func->getArgs();
857 size_t InArgsSizeBytes = 0; 875 size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
858 TargetMIPS32::CallingConv CC; 876 TargetMIPS32::CallingConv CC;
859 uint32_t ArgNo = 0; 877 uint32_t ArgNo = 0;
860 878
861 for (Variable *Arg : Args) { 879 for (Variable *Arg : Args) {
862 RegNumT DummyReg; 880 RegNumT DummyReg;
863 const Type Ty = Arg->getType(); 881 const Type Ty = Arg->getType();
864 // Skip arguments passed in registers. 882 // Skip arguments passed in registers.
865 if (CC.argInReg(Ty, ArgNo, &DummyReg)) { 883 if (CC.argInReg(Ty, ArgNo, &DummyReg)) {
866 ArgNo++; 884 ArgNo++;
867 continue; 885 continue;
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after
995 Registers[RegMIPS32::val] = false; 1013 Registers[RegMIPS32::val] = false;
996 1014
997 REGMIPS32_TABLE 1015 REGMIPS32_TABLE
998 1016
999 #undef X 1017 #undef X
1000 1018
1001 return Registers; 1019 return Registers;
1002 } 1020 }
1003 1021
1004 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) { 1022 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
1005 UsesFramePointer = true;
1006 // Conservatively require the stack to be aligned. Some stack adjustment 1023 // Conservatively require the stack to be aligned. Some stack adjustment
1007 // operations implemented below assume that the stack is aligned before the 1024 // operations implemented below assume that the stack is aligned before the
1008 // alloca. All the alloca code ensures that the stack alignment is preserved 1025 // alloca. All the alloca code ensures that the stack alignment is preserved
1009 // after the alloca. The stack alignment restriction can be relaxed in some 1026 // after the alloca. The stack alignment restriction can be relaxed in some
1010 // cases. 1027 // cases.
1011 NeedsStackAlignment = true; 1028 NeedsStackAlignment = true;
1012 UnimplementedLoweringError(this, Instr); 1029
1030 // For default align=0, set it to the real value 1, to avoid any
1031 // bit-manipulation problems below.
1032 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
1033
1034 // LLVM enforces power of 2 alignment.
1035 assert(llvm::isPowerOf2_32(AlignmentParam));
1036 assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
1037
1038 const uint32_t Alignment =
1039 std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
1040 const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
1041 const bool OptM1 = getFlags().getOptLevel() == Opt_m1;
1042 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
1043 const bool UseFramePointer =
1044 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
1045
1046 if (UseFramePointer)
1047 setHasFramePointer();
1048
1049 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1050
1051 Variable *Dest = Instr->getDest();
1052 Operand *TotalSize = Instr->getSizeInBytes();
1053
1054 if (const auto *ConstantTotalSize =
1055 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1056 const uint32_t Value =
1057 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
1058 FixedAllocaSizeBytes += Value;
1059 // Constant size alloca.
1060 if (!UseFramePointer) {
1061 // If we don't need a Frame Pointer, this alloca has a known offset to the
1062 // stack pointer. We don't need adjust the stack pointer, nor assign any
1063 // value to Dest, as Dest is rematerializable.
1064 assert(Dest->isRematerializable());
1065 Context.insert<InstFakeDef>(Dest);
1066 return;
1067 }
1068 } else {
1069 UnimplementedLoweringError(this, Instr);
1070 return;
1071 }
1072
1073 // Add enough to the returned address to account for the out args area.
1074 if (MaxOutArgsSizeBytes > 0) {
1075 Variable *T = makeReg(getPointerType());
1076 _addiu(T, SP, MaxOutArgsSizeBytes);
1077 _mov(Dest, T);
1078 } else {
1079 _mov(Dest, SP);
1080 }
1013 } 1081 }
1014 1082
1015 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr, 1083 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
1016 Variable *Dest, Operand *Src0, 1084 Variable *Dest, Operand *Src0,
1017 Operand *Src1) { 1085 Operand *Src1) {
1018 InstArithmetic::OpKind Op = Instr->getOp(); 1086 InstArithmetic::OpKind Op = Instr->getOp();
1019 switch (Op) { 1087 switch (Op) {
1020 case InstArithmetic::Add: 1088 case InstArithmetic::Add:
1021 case InstArithmetic::And: 1089 case InstArithmetic::And:
1022 case InstArithmetic::Or: 1090 case InstArithmetic::Or:
(...skipping 1112 matching lines...) Expand 10 before | Expand all | Expand 10 after
2135 Str << "\t.set\t" 2203 Str << "\t.set\t"
2136 << "nomips16\n"; 2204 << "nomips16\n";
2137 } 2205 }
2138 2206
2139 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM]; 2207 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
2140 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM]; 2208 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
2141 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM]; 2209 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
2142 2210
2143 } // end of namespace MIPS32 2211 } // end of namespace MIPS32
2144 } // end of namespace Ice 2212 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringMIPS32.h ('k') | tests_lit/llvm2ice_tests/alloc.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698