Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(464)

Side by Side Diff: src/IceTargetLoweringMIPS32.cpp

Issue 2067183002: [Subzero][MIPS32] Implements lowering of alloca instruction (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addressed review comments Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringMIPS32.h ('k') | tests_lit/llvm2ice_tests/alloc.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // 1 //
2 // The Subzero Code Generator 2 // The Subzero Code Generator
3 // 3 //
4 // This file is distributed under the University of Illinois Open Source 4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details. 5 // License. See LICENSE.TXT for details.
6 // 6 //
7 //===----------------------------------------------------------------------===// 7 //===----------------------------------------------------------------------===//
8 /// 8 ///
9 /// \file 9 /// \file
10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost 10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after
156 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; 156 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
157 157
158 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i) 158 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
159 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i]; 159 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
160 160
161 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet, 161 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
162 llvm::array_lengthof(TypeToRegisterSet), 162 llvm::array_lengthof(TypeToRegisterSet),
163 RegMIPS32::getRegName, getRegClassName); 163 RegMIPS32::getRegName, getRegClassName);
164 } 164 }
165 165
166 void TargetMIPS32::unsetIfNonLeafFunc() {
167 for (CfgNode *Node : Func->getNodes()) {
168 for (Inst &Instr : Node->getInsts()) {
169 if (llvm::isa<InstCall>(&Instr)) {
170 // Unset MaybeLeafFunc if call instruction exists.
171 MaybeLeafFunc = false;
172 return;
173 }
174 }
175 }
176 }
177
178 uint32_t TargetMIPS32::getStackAlignment() const {
179 return MIPS32_STACK_ALIGNMENT_BYTES;
180 }
181
166 void TargetMIPS32::findMaxStackOutArgsSize() { 182 void TargetMIPS32::findMaxStackOutArgsSize() {
167 // MinNeededOutArgsBytes should be updated if the Target ever creates a 183 // MinNeededOutArgsBytes should be updated if the Target ever creates a
168 // high-level InstCall that requires more stack bytes. 184 // high-level InstCall that requires more stack bytes.
169 constexpr size_t MinNeededOutArgsBytes = 16; 185 size_t MinNeededOutArgsBytes = 0;
186 if (!MaybeLeafFunc)
187 MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
170 MaxOutArgsSizeBytes = MinNeededOutArgsBytes; 188 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
171 for (CfgNode *Node : Func->getNodes()) { 189 for (CfgNode *Node : Func->getNodes()) {
172 Context.init(Node); 190 Context.init(Node);
173 while (!Context.atEnd()) { 191 while (!Context.atEnd()) {
174 PostIncrLoweringContext PostIncrement(Context); 192 PostIncrLoweringContext PostIncrement(Context);
175 Inst *CurInstr = iteratorToInst(Context.getCur()); 193 Inst *CurInstr = iteratorToInst(Context.getCur());
176 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { 194 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
177 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); 195 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
178 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); 196 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
179 } 197 }
180 } 198 }
181 } 199 }
182 } 200 }
183 201
184 void TargetMIPS32::translateO2() { 202 void TargetMIPS32::translateO2() {
185 TimerMarker T(TimerStack::TT_O2, Func); 203 TimerMarker T(TimerStack::TT_O2, Func);
186 204
187 // TODO(stichnot): share passes with X86? 205 // TODO(stichnot): share passes with X86?
188 // https://code.google.com/p/nativeclient/issues/detail?id=4094 206 // https://code.google.com/p/nativeclient/issues/detail?id=4094
189 genTargetHelperCalls(); 207 genTargetHelperCalls();
190 208
209 unsetIfNonLeafFunc();
210
191 findMaxStackOutArgsSize(); 211 findMaxStackOutArgsSize();
192 212
193 // Merge Alloca instructions, and lay out the stack. 213 // Merge Alloca instructions, and lay out the stack.
194 static constexpr bool SortAndCombineAllocas = false; 214 static constexpr bool SortAndCombineAllocas = true;
195 Func->processAllocas(SortAndCombineAllocas); 215 Func->processAllocas(SortAndCombineAllocas);
196 Func->dump("After Alloca processing"); 216 Func->dump("After Alloca processing");
197 217
198 if (!getFlags().getEnablePhiEdgeSplit()) { 218 if (!getFlags().getEnablePhiEdgeSplit()) {
199 // Lower Phi instructions. 219 // Lower Phi instructions.
200 Func->placePhiLoads(); 220 Func->placePhiLoads();
201 if (Func->hasError()) 221 if (Func->hasError())
202 return; 222 return;
203 Func->placePhiStores(); 223 Func->placePhiStores();
204 if (Func->hasError()) 224 if (Func->hasError())
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
284 Func->doNopInsertion(); 304 Func->doNopInsertion();
285 } 305 }
286 } 306 }
287 307
288 void TargetMIPS32::translateOm1() { 308 void TargetMIPS32::translateOm1() {
289 TimerMarker T(TimerStack::TT_Om1, Func); 309 TimerMarker T(TimerStack::TT_Om1, Func);
290 310
291 // TODO: share passes with X86? 311 // TODO: share passes with X86?
292 genTargetHelperCalls(); 312 genTargetHelperCalls();
293 313
314 unsetIfNonLeafFunc();
315
294 findMaxStackOutArgsSize(); 316 findMaxStackOutArgsSize();
295 317
296 // Do not merge Alloca instructions, and lay out the stack. 318 // Do not merge Alloca instructions, and lay out the stack.
297 static constexpr bool SortAndCombineAllocas = false; 319 static constexpr bool SortAndCombineAllocas = false;
298 Func->processAllocas(SortAndCombineAllocas); 320 Func->processAllocas(SortAndCombineAllocas);
299 Func->dump("After Alloca processing"); 321 Func->dump("After Alloca processing");
300 322
301 Func->placePhiLoads(); 323 Func->placePhiLoads();
302 if (Func->hasError()) 324 if (Func->hasError())
303 return; 325 return;
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after
434 // transformation. 456 // transformation.
435 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { 457 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
436 return Mem; 458 return Mem;
437 } 459 }
438 460
439 // If we didn't do address mode optimization, then we only have a base/offset 461 // If we didn't do address mode optimization, then we only have a base/offset
440 // to work with. MIPS always requires a base register, so just use that to 462 // to work with. MIPS always requires a base register, so just use that to
441 // hold the operand. 463 // hold the operand.
442 auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg)); 464 auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg));
443 return OperandMIPS32Mem::create( 465 return OperandMIPS32Mem::create(
444 Func, Ty, Base, 466 Func, Ty, Base, llvm::cast<ConstantInteger32>(
445 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); 467 Ctx->getConstantInt32(Base->getStackOffset())));
446 } 468 }
447 469
448 void TargetMIPS32::emitVariable(const Variable *Var) const { 470 void TargetMIPS32::emitVariable(const Variable *Var) const {
449 if (!BuildDefs::dump()) 471 if (!BuildDefs::dump())
450 return; 472 return;
451 Ostream &Str = Ctx->getStrEmit(); 473 Ostream &Str = Ctx->getStrEmit();
452 const Type FrameSPTy = IceType_i32; 474 const Type FrameSPTy = IceType_i32;
453 if (Var->hasReg()) { 475 if (Var->hasReg()) {
454 Str << '$' << getRegName(Var->getRegNum(), Var->getType()); 476 Str << '$' << getRegName(Var->getRegNum(), Var->getType());
455 return; 477 return;
(...skipping 345 matching lines...) Expand 10 before | Expand all | Expand 10 after
801 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); 823 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
802 uint32_t SpillAreaPaddingBytes = 0; 824 uint32_t SpillAreaPaddingBytes = 0;
803 uint32_t LocalsSlotsPaddingBytes = 0; 825 uint32_t LocalsSlotsPaddingBytes = 0;
804 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, 826 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
805 GlobalsSize, LocalsSlotsAlignmentBytes, 827 GlobalsSize, LocalsSlotsAlignmentBytes,
806 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); 828 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
807 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; 829 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
808 uint32_t GlobalsAndSubsequentPaddingSize = 830 uint32_t GlobalsAndSubsequentPaddingSize =
809 GlobalsSize + LocalsSlotsPaddingBytes; 831 GlobalsSize + LocalsSlotsPaddingBytes;
810 832
811 if (MaybeLeafFunc)
812 MaxOutArgsSizeBytes = 0;
813
814 // Adds the out args space to the stack, and align SP if necessary. 833 // Adds the out args space to the stack, and align SP if necessary.
815 uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes; 834 uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes +
816 835 FixedAllocaSizeBytes + MaxOutArgsSizeBytes;
817 // TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with
818 // TotalStackSizeBytes once lowerAlloca is implemented and leaf function
819 // information is generated by lowerCall.
820 836
821 // Generate "addiu sp, sp, -TotalStackSizeBytes" 837 // Generate "addiu sp, sp, -TotalStackSizeBytes"
822 if (TotalStackSizeBytes) { 838 if (TotalStackSizeBytes) {
823 // Use the scratch register if needed to legalize the immediate. 839 // Use the scratch register if needed to legalize the immediate.
824 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); 840 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
825 _addiu(SP, SP, -(TotalStackSizeBytes)); 841 _addiu(SP, SP, -(TotalStackSizeBytes));
826 } 842 }
827 843
828 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes); 844 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);
829 845
(...skipping 17 matching lines...) Expand all
847 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); 863 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
848 _mov(FP, SP); 864 _mov(FP, SP);
849 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). 865 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
850 Context.insert<InstFakeUse>(FP); 866 Context.insert<InstFakeUse>(FP);
851 } 867 }
852 868
853 // Fill in stack offsets for stack args, and copy args into registers for 869 // Fill in stack offsets for stack args, and copy args into registers for
854 // those that were register-allocated. Args are pushed right to left, so 870 // those that were register-allocated. Args are pushed right to left, so
855 // Arg[0] is closest to the stack/frame pointer. 871 // Arg[0] is closest to the stack/frame pointer.
856 const VarList &Args = Func->getArgs(); 872 const VarList &Args = Func->getArgs();
857 size_t InArgsSizeBytes = 0; 873 size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
858 TargetMIPS32::CallingConv CC; 874 TargetMIPS32::CallingConv CC;
859 uint32_t ArgNo = 0; 875 uint32_t ArgNo = 0;
860 876
861 for (Variable *Arg : Args) { 877 for (Variable *Arg : Args) {
862 RegNumT DummyReg; 878 RegNumT DummyReg;
863 const Type Ty = Arg->getType(); 879 const Type Ty = Arg->getType();
864 // Skip arguments passed in registers. 880 // Skip arguments passed in registers.
865 if (CC.argInReg(Ty, ArgNo, &DummyReg)) { 881 if (CC.argInReg(Ty, ArgNo, &DummyReg)) {
866 ArgNo++; 882 ArgNo++;
867 continue; 883 continue;
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after
995 Registers[RegMIPS32::val] = false; 1011 Registers[RegMIPS32::val] = false;
996 1012
997 REGMIPS32_TABLE 1013 REGMIPS32_TABLE
998 1014
999 #undef X 1015 #undef X
1000 1016
1001 return Registers; 1017 return Registers;
1002 } 1018 }
1003 1019
1004 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) { 1020 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
1005 UsesFramePointer = true;
1006 // Conservatively require the stack to be aligned. Some stack adjustment 1021 // Conservatively require the stack to be aligned. Some stack adjustment
1007 // operations implemented below assume that the stack is aligned before the 1022 // operations implemented below assume that the stack is aligned before the
1008 // alloca. All the alloca code ensures that the stack alignment is preserved 1023 // alloca. All the alloca code ensures that the stack alignment is preserved
1009 // after the alloca. The stack alignment restriction can be relaxed in some 1024 // after the alloca. The stack alignment restriction can be relaxed in some
1010 // cases. 1025 // cases.
1011 NeedsStackAlignment = true; 1026 NeedsStackAlignment = true;
1012 UnimplementedLoweringError(this, Instr); 1027
1028 // For default align=0, set it to the real value 1, to avoid any
1029 // bit-manipulation problems below.
1030 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
1031
1032 // LLVM enforces power of 2 alignment.
1033 assert(llvm::isPowerOf2_32(AlignmentParam));
1034 assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
1035
1036 const uint32_t Alignment =
1037 std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
1038 const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
1039 const bool OptM1 = getFlags().getOptLevel() == Opt_m1;
1040 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
1041 const bool UseFramePointer =
1042 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
1043
1044 if (UseFramePointer)
1045 setHasFramePointer();
1046
1047 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1048
1049 Variable *Dest = Instr->getDest();
1050 Operand *TotalSize = Instr->getSizeInBytes();
1051
1052 if (const auto *ConstantTotalSize =
1053 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1054 const uint32_t Value =
1055 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
1056 FixedAllocaSizeBytes += Value;
1057 // Constant size alloca.
1058 if (!UseFramePointer) {
1059 // If we don't need a Frame Pointer, this alloca has a known offset to the
1060 // stack pointer. We don't need adjust the stack pointer, nor assign any
1061 // value to Dest, as Dest is rematerializable.
1062 assert(Dest->isRematerializable());
1063 Context.insert<InstFakeDef>(Dest);
1064 return;
1065 }
1066 } else {
1067 UnimplementedLoweringError(this, Instr);
1068 return;
1069 }
1070
1071 // Add enough to the returned address to account for the out args area.
1072 if (MaxOutArgsSizeBytes > 0) {
1073 Variable *T = makeReg(getPointerType());
1074 _addiu(T, SP, MaxOutArgsSizeBytes);
1075 _mov(Dest, T);
1076 } else {
1077 _mov(Dest, SP);
1078 }
1013 } 1079 }
1014 1080
1015 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr, 1081 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
1016 Variable *Dest, Operand *Src0, 1082 Variable *Dest, Operand *Src0,
1017 Operand *Src1) { 1083 Operand *Src1) {
1018 InstArithmetic::OpKind Op = Instr->getOp(); 1084 InstArithmetic::OpKind Op = Instr->getOp();
1019 switch (Op) { 1085 switch (Op) {
1020 case InstArithmetic::Add: 1086 case InstArithmetic::Add:
1021 case InstArithmetic::And: 1087 case InstArithmetic::And:
1022 case InstArithmetic::Or: 1088 case InstArithmetic::Or:
(...skipping 1112 matching lines...) Expand 10 before | Expand all | Expand 10 after
2135 Str << "\t.set\t" 2201 Str << "\t.set\t"
2136 << "nomips16\n"; 2202 << "nomips16\n";
2137 } 2203 }
2138 2204
2139 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM]; 2205 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
2140 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM]; 2206 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
2141 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM]; 2207 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
2142 2208
2143 } // end of namespace MIPS32 2209 } // end of namespace MIPS32
2144 } // end of namespace Ice 2210 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringMIPS32.h ('k') | tests_lit/llvm2ice_tests/alloc.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698