OLD | NEW |
---|---|
1 // | 1 // |
2 // The Subzero Code Generator | 2 // The Subzero Code Generator |
3 // | 3 // |
4 // This file is distributed under the University of Illinois Open Source | 4 // This file is distributed under the University of Illinois Open Source |
5 // License. See LICENSE.TXT for details. | 5 // License. See LICENSE.TXT for details. |
6 // | 6 // |
7 //===----------------------------------------------------------------------===// | 7 //===----------------------------------------------------------------------===// |
8 /// | 8 /// |
9 /// \file | 9 /// \file |
10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost | 10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost |
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
156 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; | 156 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; |
157 | 157 |
158 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i) | 158 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i) |
159 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i]; | 159 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i]; |
160 | 160 |
161 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet, | 161 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet, |
162 llvm::array_lengthof(TypeToRegisterSet), | 162 llvm::array_lengthof(TypeToRegisterSet), |
163 RegMIPS32::getRegName, getRegClassName); | 163 RegMIPS32::getRegName, getRegClassName); |
164 } | 164 } |
165 | 165 |
166 void TargetMIPS32::unsetIfNonLeafFunc() { | |
Jim Stichnoth
2016/06/15 13:41:19
I think that the ARM target folds this computation
Jim Stichnoth
2016/06/15 13:43:19
Actually, it just occurred to me that statisticall
sagar.thakur
2016/06/16 08:05:54
This pass is needed because findMaxStackOutArgsSiz
| |
167 for (CfgNode *Node : Func->getNodes()) { | |
168 for (Inst &Instr : Node->getInsts()) { | |
169 if (llvm::isa<InstCall>(&Instr)) { | |
170 // Unset MaybeLeafFunc if call instruction exists. | |
171 MaybeLeafFunc = false; | |
172 break; | |
Jim Stichnoth
2016/06/15 13:41:19
Can you just early-return here, and avoid the brea
sagar.thakur
2016/06/16 08:05:54
Done.
| |
173 } | |
174 } | |
175 if (!MaybeLeafFunc) | |
176 break; | |
177 } | |
178 } | |
179 | |
180 uint32_t TargetMIPS32::getStackAlignment() const { | |
181 return MIPS32_STACK_ALIGNMENT_BYTES; | |
182 } | |
183 | |
166 void TargetMIPS32::findMaxStackOutArgsSize() { | 184 void TargetMIPS32::findMaxStackOutArgsSize() { |
167 // MinNeededOutArgsBytes should be updated if the Target ever creates a | 185 // MinNeededOutArgsBytes should be updated if the Target ever creates a |
168 // high-level InstCall that requires more stack bytes. | 186 // high-level InstCall that requires more stack bytes. |
169 constexpr size_t MinNeededOutArgsBytes = 16; | 187 size_t MinNeededOutArgsBytes = 0; |
188 if (!MaybeLeafFunc) | |
189 MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4; | |
170 MaxOutArgsSizeBytes = MinNeededOutArgsBytes; | 190 MaxOutArgsSizeBytes = MinNeededOutArgsBytes; |
171 for (CfgNode *Node : Func->getNodes()) { | 191 for (CfgNode *Node : Func->getNodes()) { |
172 Context.init(Node); | 192 Context.init(Node); |
173 while (!Context.atEnd()) { | 193 while (!Context.atEnd()) { |
174 PostIncrLoweringContext PostIncrement(Context); | 194 PostIncrLoweringContext PostIncrement(Context); |
175 Inst *CurInstr = iteratorToInst(Context.getCur()); | 195 Inst *CurInstr = iteratorToInst(Context.getCur()); |
176 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { | 196 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { |
177 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); | 197 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); |
178 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); | 198 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); |
179 } | 199 } |
180 } | 200 } |
181 } | 201 } |
182 } | 202 } |
183 | 203 |
184 void TargetMIPS32::translateO2() { | 204 void TargetMIPS32::translateO2() { |
185 TimerMarker T(TimerStack::TT_O2, Func); | 205 TimerMarker T(TimerStack::TT_O2, Func); |
186 | 206 |
187 // TODO(stichnot): share passes with X86? | 207 // TODO(stichnot): share passes with X86? |
188 // https://code.google.com/p/nativeclient/issues/detail?id=4094 | 208 // https://code.google.com/p/nativeclient/issues/detail?id=4094 |
189 genTargetHelperCalls(); | 209 genTargetHelperCalls(); |
190 | 210 |
211 unsetIfNonLeafFunc(); | |
212 | |
191 findMaxStackOutArgsSize(); | 213 findMaxStackOutArgsSize(); |
192 | 214 |
193 // Merge Alloca instructions, and lay out the stack. | 215 // Merge Alloca instructions, and lay out the stack. |
194 static constexpr bool SortAndCombineAllocas = false; | 216 static constexpr bool SortAndCombineAllocas = true; |
195 Func->processAllocas(SortAndCombineAllocas); | 217 Func->processAllocas(SortAndCombineAllocas); |
196 Func->dump("After Alloca processing"); | 218 Func->dump("After Alloca processing"); |
197 | 219 |
198 if (!getFlags().getEnablePhiEdgeSplit()) { | 220 if (!getFlags().getEnablePhiEdgeSplit()) { |
199 // Lower Phi instructions. | 221 // Lower Phi instructions. |
200 Func->placePhiLoads(); | 222 Func->placePhiLoads(); |
201 if (Func->hasError()) | 223 if (Func->hasError()) |
202 return; | 224 return; |
203 Func->placePhiStores(); | 225 Func->placePhiStores(); |
204 if (Func->hasError()) | 226 if (Func->hasError()) |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
284 Func->doNopInsertion(); | 306 Func->doNopInsertion(); |
285 } | 307 } |
286 } | 308 } |
287 | 309 |
288 void TargetMIPS32::translateOm1() { | 310 void TargetMIPS32::translateOm1() { |
289 TimerMarker T(TimerStack::TT_Om1, Func); | 311 TimerMarker T(TimerStack::TT_Om1, Func); |
290 | 312 |
291 // TODO: share passes with X86? | 313 // TODO: share passes with X86? |
292 genTargetHelperCalls(); | 314 genTargetHelperCalls(); |
293 | 315 |
316 unsetIfNonLeafFunc(); | |
317 | |
294 findMaxStackOutArgsSize(); | 318 findMaxStackOutArgsSize(); |
295 | 319 |
296 // Do not merge Alloca instructions, and lay out the stack. | 320 // Do not merge Alloca instructions, and lay out the stack. |
297 static constexpr bool SortAndCombineAllocas = false; | 321 static constexpr bool SortAndCombineAllocas = false; |
298 Func->processAllocas(SortAndCombineAllocas); | 322 Func->processAllocas(SortAndCombineAllocas); |
299 Func->dump("After Alloca processing"); | 323 Func->dump("After Alloca processing"); |
300 | 324 |
301 Func->placePhiLoads(); | 325 Func->placePhiLoads(); |
302 if (Func->hasError()) | 326 if (Func->hasError()) |
303 return; | 327 return; |
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
434 // transformation. | 458 // transformation. |
435 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { | 459 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { |
436 return Mem; | 460 return Mem; |
437 } | 461 } |
438 | 462 |
439 // If we didn't do address mode optimization, then we only have a base/offset | 463 // If we didn't do address mode optimization, then we only have a base/offset |
440 // to work with. MIPS always requires a base register, so just use that to | 464 // to work with. MIPS always requires a base register, so just use that to |
441 // hold the operand. | 465 // hold the operand. |
442 auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg)); | 466 auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg)); |
443 return OperandMIPS32Mem::create( | 467 return OperandMIPS32Mem::create( |
444 Func, Ty, Base, | 468 Func, Ty, Base, llvm::cast<ConstantInteger32>( |
445 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); | 469 Ctx->getConstantInt32(Base->getStackOffset()))); |
446 } | 470 } |
447 | 471 |
448 void TargetMIPS32::emitVariable(const Variable *Var) const { | 472 void TargetMIPS32::emitVariable(const Variable *Var) const { |
449 if (!BuildDefs::dump()) | 473 if (!BuildDefs::dump()) |
450 return; | 474 return; |
451 Ostream &Str = Ctx->getStrEmit(); | 475 Ostream &Str = Ctx->getStrEmit(); |
452 const Type FrameSPTy = IceType_i32; | 476 const Type FrameSPTy = IceType_i32; |
453 if (Var->hasReg()) { | 477 if (Var->hasReg()) { |
454 Str << '$' << getRegName(Var->getRegNum(), Var->getType()); | 478 Str << '$' << getRegName(Var->getRegNum(), Var->getType()); |
455 return; | 479 return; |
(...skipping 345 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
801 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | 825 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
802 uint32_t SpillAreaPaddingBytes = 0; | 826 uint32_t SpillAreaPaddingBytes = 0; |
803 uint32_t LocalsSlotsPaddingBytes = 0; | 827 uint32_t LocalsSlotsPaddingBytes = 0; |
804 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, | 828 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, |
805 GlobalsSize, LocalsSlotsAlignmentBytes, | 829 GlobalsSize, LocalsSlotsAlignmentBytes, |
806 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); | 830 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); |
807 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; | 831 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
808 uint32_t GlobalsAndSubsequentPaddingSize = | 832 uint32_t GlobalsAndSubsequentPaddingSize = |
809 GlobalsSize + LocalsSlotsPaddingBytes; | 833 GlobalsSize + LocalsSlotsPaddingBytes; |
810 | 834 |
811 if (MaybeLeafFunc) | |
812 MaxOutArgsSizeBytes = 0; | |
813 | |
814 // Adds the out args space to the stack, and align SP if necessary. | 835 // Adds the out args space to the stack, and align SP if necessary. |
815 uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes; | 836 uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes + |
816 | 837 FixedAllocaSizeBytes + MaxOutArgsSizeBytes; |
817 // TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with | |
818 // TotalStackSizeBytes once lowerAlloca is implemented and leaf function | |
819 // information is generated by lowerCall. | |
820 | 838 |
821 // Generate "addiu sp, sp, -TotalStackSizeBytes" | 839 // Generate "addiu sp, sp, -TotalStackSizeBytes" |
822 if (TotalStackSizeBytes) { | 840 if (TotalStackSizeBytes) { |
823 // Use the scratch register if needed to legalize the immediate. | 841 // Use the scratch register if needed to legalize the immediate. |
824 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); | 842 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
825 _addiu(SP, SP, -(TotalStackSizeBytes)); | 843 _addiu(SP, SP, -(TotalStackSizeBytes)); |
826 } | 844 } |
827 | 845 |
828 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes); | 846 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes); |
829 | 847 |
(...skipping 17 matching lines...) Expand all Loading... | |
847 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); | 865 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
848 _mov(FP, SP); | 866 _mov(FP, SP); |
849 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). | 867 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). |
850 Context.insert<InstFakeUse>(FP); | 868 Context.insert<InstFakeUse>(FP); |
851 } | 869 } |
852 | 870 |
853 // Fill in stack offsets for stack args, and copy args into registers for | 871 // Fill in stack offsets for stack args, and copy args into registers for |
854 // those that were register-allocated. Args are pushed right to left, so | 872 // those that were register-allocated. Args are pushed right to left, so |
855 // Arg[0] is closest to the stack/frame pointer. | 873 // Arg[0] is closest to the stack/frame pointer. |
856 const VarList &Args = Func->getArgs(); | 874 const VarList &Args = Func->getArgs(); |
857 size_t InArgsSizeBytes = 0; | 875 size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4; |
858 TargetMIPS32::CallingConv CC; | 876 TargetMIPS32::CallingConv CC; |
859 uint32_t ArgNo = 0; | 877 uint32_t ArgNo = 0; |
860 | 878 |
861 for (Variable *Arg : Args) { | 879 for (Variable *Arg : Args) { |
862 RegNumT DummyReg; | 880 RegNumT DummyReg; |
863 const Type Ty = Arg->getType(); | 881 const Type Ty = Arg->getType(); |
864 // Skip arguments passed in registers. | 882 // Skip arguments passed in registers. |
865 if (CC.argInReg(Ty, ArgNo, &DummyReg)) { | 883 if (CC.argInReg(Ty, ArgNo, &DummyReg)) { |
866 ArgNo++; | 884 ArgNo++; |
867 continue; | 885 continue; |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
995 Registers[RegMIPS32::val] = false; | 1013 Registers[RegMIPS32::val] = false; |
996 | 1014 |
997 REGMIPS32_TABLE | 1015 REGMIPS32_TABLE |
998 | 1016 |
999 #undef X | 1017 #undef X |
1000 | 1018 |
1001 return Registers; | 1019 return Registers; |
1002 } | 1020 } |
1003 | 1021 |
1004 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) { | 1022 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) { |
1005 UsesFramePointer = true; | |
1006 // Conservatively require the stack to be aligned. Some stack adjustment | 1023 // Conservatively require the stack to be aligned. Some stack adjustment |
1007 // operations implemented below assume that the stack is aligned before the | 1024 // operations implemented below assume that the stack is aligned before the |
1008 // alloca. All the alloca code ensures that the stack alignment is preserved | 1025 // alloca. All the alloca code ensures that the stack alignment is preserved |
1009 // after the alloca. The stack alignment restriction can be relaxed in some | 1026 // after the alloca. The stack alignment restriction can be relaxed in some |
1010 // cases. | 1027 // cases. |
1011 NeedsStackAlignment = true; | 1028 NeedsStackAlignment = true; |
1012 UnimplementedLoweringError(this, Instr); | 1029 |
1030 // For default align=0, set it to the real value 1, to avoid any | |
1031 // bit-manipulation problems below. | |
1032 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes()); | |
1033 | |
1034 // LLVM enforces power of 2 alignment. | |
1035 assert(llvm::isPowerOf2_32(AlignmentParam)); | |
1036 assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES)); | |
1037 | |
1038 const uint32_t Alignment = | |
1039 std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES); | |
1040 const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES; | |
1041 const bool OptM1 = getFlags().getOptLevel() == Opt_m1; | |
1042 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset(); | |
1043 const bool UseFramePointer = | |
1044 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; | |
1045 | |
1046 if (UseFramePointer) | |
1047 setHasFramePointer(); | |
1048 | |
1049 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); | |
1050 | |
1051 Variable *Dest = Instr->getDest(); | |
1052 Operand *TotalSize = Instr->getSizeInBytes(); | |
1053 | |
1054 if (const auto *ConstantTotalSize = | |
1055 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | |
1056 const uint32_t Value = | |
1057 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment); | |
1058 FixedAllocaSizeBytes += Value; | |
1059 // Constant size alloca. | |
1060 if (!UseFramePointer) { | |
1061 // If we don't need a Frame Pointer, this alloca has a known offset to the | |
1062 // stack pointer. We don't need adjust the stack pointer, nor assign any | |
1063 // value to Dest, as Dest is rematerializable. | |
1064 assert(Dest->isRematerializable()); | |
1065 Context.insert<InstFakeDef>(Dest); | |
1066 return; | |
1067 } | |
1068 } else { | |
1069 UnimplementedLoweringError(this, Instr); | |
1070 return; | |
1071 } | |
1072 | |
1073 // Add enough to the returned address to account for the out args area. | |
1074 if (MaxOutArgsSizeBytes > 0) { | |
1075 Variable *T = makeReg(getPointerType()); | |
1076 _addiu(T, SP, MaxOutArgsSizeBytes); | |
1077 _mov(Dest, T); | |
1078 } else { | |
1079 _mov(Dest, SP); | |
1080 } | |
1013 } | 1081 } |
1014 | 1082 |
1015 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr, | 1083 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr, |
1016 Variable *Dest, Operand *Src0, | 1084 Variable *Dest, Operand *Src0, |
1017 Operand *Src1) { | 1085 Operand *Src1) { |
1018 InstArithmetic::OpKind Op = Instr->getOp(); | 1086 InstArithmetic::OpKind Op = Instr->getOp(); |
1019 switch (Op) { | 1087 switch (Op) { |
1020 case InstArithmetic::Add: | 1088 case InstArithmetic::Add: |
1021 case InstArithmetic::And: | 1089 case InstArithmetic::And: |
1022 case InstArithmetic::Or: | 1090 case InstArithmetic::Or: |
(...skipping 1112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2135 Str << "\t.set\t" | 2203 Str << "\t.set\t" |
2136 << "nomips16\n"; | 2204 << "nomips16\n"; |
2137 } | 2205 } |
2138 | 2206 |
2139 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM]; | 2207 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM]; |
2140 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM]; | 2208 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM]; |
2141 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM]; | 2209 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM]; |
2142 | 2210 |
2143 } // end of namespace MIPS32 | 2211 } // end of namespace MIPS32 |
2144 } // end of namespace Ice | 2212 } // end of namespace Ice |
OLD | NEW |