OLD | NEW |
1 // | 1 // |
2 // The Subzero Code Generator | 2 // The Subzero Code Generator |
3 // | 3 // |
4 // This file is distributed under the University of Illinois Open Source | 4 // This file is distributed under the University of Illinois Open Source |
5 // License. See LICENSE.TXT for details. | 5 // License. See LICENSE.TXT for details. |
6 // | 6 // |
7 //===----------------------------------------------------------------------===// | 7 //===----------------------------------------------------------------------===// |
8 /// | 8 /// |
9 /// \file | 9 /// \file |
10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost | 10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost |
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
156 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; | 156 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; |
157 | 157 |
158 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i) | 158 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i) |
159 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i]; | 159 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i]; |
160 | 160 |
161 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet, | 161 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet, |
162 llvm::array_lengthof(TypeToRegisterSet), | 162 llvm::array_lengthof(TypeToRegisterSet), |
163 RegMIPS32::getRegName, getRegClassName); | 163 RegMIPS32::getRegName, getRegClassName); |
164 } | 164 } |
165 | 165 |
| 166 void TargetMIPS32::unsetIfNonLeafFunc() { |
| 167 for (CfgNode *Node : Func->getNodes()) { |
| 168 for (Inst &Instr : Node->getInsts()) { |
| 169 if (llvm::isa<InstCall>(&Instr)) { |
| 170 // Unset MaybeLeafFunc if call instruction exists. |
| 171 MaybeLeafFunc = false; |
| 172 return; |
| 173 } |
| 174 } |
| 175 } |
| 176 } |
| 177 |
| 178 uint32_t TargetMIPS32::getStackAlignment() const { |
| 179 return MIPS32_STACK_ALIGNMENT_BYTES; |
| 180 } |
| 181 |
166 void TargetMIPS32::findMaxStackOutArgsSize() { | 182 void TargetMIPS32::findMaxStackOutArgsSize() { |
167 // MinNeededOutArgsBytes should be updated if the Target ever creates a | 183 // MinNeededOutArgsBytes should be updated if the Target ever creates a |
168 // high-level InstCall that requires more stack bytes. | 184 // high-level InstCall that requires more stack bytes. |
169 constexpr size_t MinNeededOutArgsBytes = 16; | 185 size_t MinNeededOutArgsBytes = 0; |
| 186 if (!MaybeLeafFunc) |
| 187 MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4; |
170 MaxOutArgsSizeBytes = MinNeededOutArgsBytes; | 188 MaxOutArgsSizeBytes = MinNeededOutArgsBytes; |
171 for (CfgNode *Node : Func->getNodes()) { | 189 for (CfgNode *Node : Func->getNodes()) { |
172 Context.init(Node); | 190 Context.init(Node); |
173 while (!Context.atEnd()) { | 191 while (!Context.atEnd()) { |
174 PostIncrLoweringContext PostIncrement(Context); | 192 PostIncrLoweringContext PostIncrement(Context); |
175 Inst *CurInstr = iteratorToInst(Context.getCur()); | 193 Inst *CurInstr = iteratorToInst(Context.getCur()); |
176 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { | 194 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { |
177 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); | 195 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); |
178 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); | 196 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); |
179 } | 197 } |
180 } | 198 } |
181 } | 199 } |
182 } | 200 } |
183 | 201 |
184 void TargetMIPS32::translateO2() { | 202 void TargetMIPS32::translateO2() { |
185 TimerMarker T(TimerStack::TT_O2, Func); | 203 TimerMarker T(TimerStack::TT_O2, Func); |
186 | 204 |
187 // TODO(stichnot): share passes with X86? | 205 // TODO(stichnot): share passes with X86? |
188 // https://code.google.com/p/nativeclient/issues/detail?id=4094 | 206 // https://code.google.com/p/nativeclient/issues/detail?id=4094 |
189 genTargetHelperCalls(); | 207 genTargetHelperCalls(); |
190 | 208 |
| 209 unsetIfNonLeafFunc(); |
| 210 |
191 findMaxStackOutArgsSize(); | 211 findMaxStackOutArgsSize(); |
192 | 212 |
193 // Merge Alloca instructions, and lay out the stack. | 213 // Merge Alloca instructions, and lay out the stack. |
194 static constexpr bool SortAndCombineAllocas = false; | 214 static constexpr bool SortAndCombineAllocas = true; |
195 Func->processAllocas(SortAndCombineAllocas); | 215 Func->processAllocas(SortAndCombineAllocas); |
196 Func->dump("After Alloca processing"); | 216 Func->dump("After Alloca processing"); |
197 | 217 |
198 if (!getFlags().getEnablePhiEdgeSplit()) { | 218 if (!getFlags().getEnablePhiEdgeSplit()) { |
199 // Lower Phi instructions. | 219 // Lower Phi instructions. |
200 Func->placePhiLoads(); | 220 Func->placePhiLoads(); |
201 if (Func->hasError()) | 221 if (Func->hasError()) |
202 return; | 222 return; |
203 Func->placePhiStores(); | 223 Func->placePhiStores(); |
204 if (Func->hasError()) | 224 if (Func->hasError()) |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
284 Func->doNopInsertion(); | 304 Func->doNopInsertion(); |
285 } | 305 } |
286 } | 306 } |
287 | 307 |
288 void TargetMIPS32::translateOm1() { | 308 void TargetMIPS32::translateOm1() { |
289 TimerMarker T(TimerStack::TT_Om1, Func); | 309 TimerMarker T(TimerStack::TT_Om1, Func); |
290 | 310 |
291 // TODO: share passes with X86? | 311 // TODO: share passes with X86? |
292 genTargetHelperCalls(); | 312 genTargetHelperCalls(); |
293 | 313 |
| 314 unsetIfNonLeafFunc(); |
| 315 |
294 findMaxStackOutArgsSize(); | 316 findMaxStackOutArgsSize(); |
295 | 317 |
296 // Do not merge Alloca instructions, and lay out the stack. | 318 // Do not merge Alloca instructions, and lay out the stack. |
297 static constexpr bool SortAndCombineAllocas = false; | 319 static constexpr bool SortAndCombineAllocas = false; |
298 Func->processAllocas(SortAndCombineAllocas); | 320 Func->processAllocas(SortAndCombineAllocas); |
299 Func->dump("After Alloca processing"); | 321 Func->dump("After Alloca processing"); |
300 | 322 |
301 Func->placePhiLoads(); | 323 Func->placePhiLoads(); |
302 if (Func->hasError()) | 324 if (Func->hasError()) |
303 return; | 325 return; |
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
434 // transformation. | 456 // transformation. |
435 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { | 457 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { |
436 return Mem; | 458 return Mem; |
437 } | 459 } |
438 | 460 |
439 // If we didn't do address mode optimization, then we only have a base/offset | 461 // If we didn't do address mode optimization, then we only have a base/offset |
440 // to work with. MIPS always requires a base register, so just use that to | 462 // to work with. MIPS always requires a base register, so just use that to |
441 // hold the operand. | 463 // hold the operand. |
442 auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg)); | 464 auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg)); |
443 return OperandMIPS32Mem::create( | 465 return OperandMIPS32Mem::create( |
444 Func, Ty, Base, | 466 Func, Ty, Base, llvm::cast<ConstantInteger32>( |
445 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); | 467 Ctx->getConstantInt32(Base->getStackOffset()))); |
446 } | 468 } |
447 | 469 |
448 void TargetMIPS32::emitVariable(const Variable *Var) const { | 470 void TargetMIPS32::emitVariable(const Variable *Var) const { |
449 if (!BuildDefs::dump()) | 471 if (!BuildDefs::dump()) |
450 return; | 472 return; |
451 Ostream &Str = Ctx->getStrEmit(); | 473 Ostream &Str = Ctx->getStrEmit(); |
452 const Type FrameSPTy = IceType_i32; | 474 const Type FrameSPTy = IceType_i32; |
453 if (Var->hasReg()) { | 475 if (Var->hasReg()) { |
454 Str << '$' << getRegName(Var->getRegNum(), Var->getType()); | 476 Str << '$' << getRegName(Var->getRegNum(), Var->getType()); |
455 return; | 477 return; |
(...skipping 345 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
801 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | 823 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
802 uint32_t SpillAreaPaddingBytes = 0; | 824 uint32_t SpillAreaPaddingBytes = 0; |
803 uint32_t LocalsSlotsPaddingBytes = 0; | 825 uint32_t LocalsSlotsPaddingBytes = 0; |
804 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, | 826 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, |
805 GlobalsSize, LocalsSlotsAlignmentBytes, | 827 GlobalsSize, LocalsSlotsAlignmentBytes, |
806 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); | 828 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); |
807 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; | 829 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
808 uint32_t GlobalsAndSubsequentPaddingSize = | 830 uint32_t GlobalsAndSubsequentPaddingSize = |
809 GlobalsSize + LocalsSlotsPaddingBytes; | 831 GlobalsSize + LocalsSlotsPaddingBytes; |
810 | 832 |
811 if (MaybeLeafFunc) | |
812 MaxOutArgsSizeBytes = 0; | |
813 | |
814 // Adds the out args space to the stack, and align SP if necessary. | 833 // Adds the out args space to the stack, and align SP if necessary. |
815 uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes; | 834 uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes + |
816 | 835 FixedAllocaSizeBytes + MaxOutArgsSizeBytes; |
817 // TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with | |
818 // TotalStackSizeBytes once lowerAlloca is implemented and leaf function | |
819 // information is generated by lowerCall. | |
820 | 836 |
821 // Generate "addiu sp, sp, -TotalStackSizeBytes" | 837 // Generate "addiu sp, sp, -TotalStackSizeBytes" |
822 if (TotalStackSizeBytes) { | 838 if (TotalStackSizeBytes) { |
823 // Use the scratch register if needed to legalize the immediate. | 839 // Use the scratch register if needed to legalize the immediate. |
824 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); | 840 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
825 _addiu(SP, SP, -(TotalStackSizeBytes)); | 841 _addiu(SP, SP, -(TotalStackSizeBytes)); |
826 } | 842 } |
827 | 843 |
828 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes); | 844 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes); |
829 | 845 |
(...skipping 17 matching lines...) Expand all Loading... |
847 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); | 863 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
848 _mov(FP, SP); | 864 _mov(FP, SP); |
849 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). | 865 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). |
850 Context.insert<InstFakeUse>(FP); | 866 Context.insert<InstFakeUse>(FP); |
851 } | 867 } |
852 | 868 |
853 // Fill in stack offsets for stack args, and copy args into registers for | 869 // Fill in stack offsets for stack args, and copy args into registers for |
854 // those that were register-allocated. Args are pushed right to left, so | 870 // those that were register-allocated. Args are pushed right to left, so |
855 // Arg[0] is closest to the stack/frame pointer. | 871 // Arg[0] is closest to the stack/frame pointer. |
856 const VarList &Args = Func->getArgs(); | 872 const VarList &Args = Func->getArgs(); |
857 size_t InArgsSizeBytes = 0; | 873 size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4; |
858 TargetMIPS32::CallingConv CC; | 874 TargetMIPS32::CallingConv CC; |
859 uint32_t ArgNo = 0; | 875 uint32_t ArgNo = 0; |
860 | 876 |
861 for (Variable *Arg : Args) { | 877 for (Variable *Arg : Args) { |
862 RegNumT DummyReg; | 878 RegNumT DummyReg; |
863 const Type Ty = Arg->getType(); | 879 const Type Ty = Arg->getType(); |
864 // Skip arguments passed in registers. | 880 // Skip arguments passed in registers. |
865 if (CC.argInReg(Ty, ArgNo, &DummyReg)) { | 881 if (CC.argInReg(Ty, ArgNo, &DummyReg)) { |
866 ArgNo++; | 882 ArgNo++; |
867 continue; | 883 continue; |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
995 Registers[RegMIPS32::val] = false; | 1011 Registers[RegMIPS32::val] = false; |
996 | 1012 |
997 REGMIPS32_TABLE | 1013 REGMIPS32_TABLE |
998 | 1014 |
999 #undef X | 1015 #undef X |
1000 | 1016 |
1001 return Registers; | 1017 return Registers; |
1002 } | 1018 } |
1003 | 1019 |
1004 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) { | 1020 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) { |
1005 UsesFramePointer = true; | |
1006 // Conservatively require the stack to be aligned. Some stack adjustment | 1021 // Conservatively require the stack to be aligned. Some stack adjustment |
1007 // operations implemented below assume that the stack is aligned before the | 1022 // operations implemented below assume that the stack is aligned before the |
1008 // alloca. All the alloca code ensures that the stack alignment is preserved | 1023 // alloca. All the alloca code ensures that the stack alignment is preserved |
1009 // after the alloca. The stack alignment restriction can be relaxed in some | 1024 // after the alloca. The stack alignment restriction can be relaxed in some |
1010 // cases. | 1025 // cases. |
1011 NeedsStackAlignment = true; | 1026 NeedsStackAlignment = true; |
1012 UnimplementedLoweringError(this, Instr); | 1027 |
| 1028 // For default align=0, set it to the real value 1, to avoid any |
| 1029 // bit-manipulation problems below. |
| 1030 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes()); |
| 1031 |
| 1032 // LLVM enforces power of 2 alignment. |
| 1033 assert(llvm::isPowerOf2_32(AlignmentParam)); |
| 1034 assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES)); |
| 1035 |
| 1036 const uint32_t Alignment = |
| 1037 std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES); |
| 1038 const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES; |
| 1039 const bool OptM1 = getFlags().getOptLevel() == Opt_m1; |
| 1040 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset(); |
| 1041 const bool UseFramePointer = |
| 1042 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; |
| 1043 |
| 1044 if (UseFramePointer) |
| 1045 setHasFramePointer(); |
| 1046 |
| 1047 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
| 1048 |
| 1049 Variable *Dest = Instr->getDest(); |
| 1050 Operand *TotalSize = Instr->getSizeInBytes(); |
| 1051 |
| 1052 if (const auto *ConstantTotalSize = |
| 1053 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
| 1054 const uint32_t Value = |
| 1055 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment); |
| 1056 FixedAllocaSizeBytes += Value; |
| 1057 // Constant size alloca. |
| 1058 if (!UseFramePointer) { |
| 1059 // If we don't need a Frame Pointer, this alloca has a known offset to the |
| 1060 // stack pointer. We don't need adjust the stack pointer, nor assign any |
| 1061 // value to Dest, as Dest is rematerializable. |
| 1062 assert(Dest->isRematerializable()); |
| 1063 Context.insert<InstFakeDef>(Dest); |
| 1064 return; |
| 1065 } |
| 1066 } else { |
| 1067 UnimplementedLoweringError(this, Instr); |
| 1068 return; |
| 1069 } |
| 1070 |
| 1071 // Add enough to the returned address to account for the out args area. |
| 1072 if (MaxOutArgsSizeBytes > 0) { |
| 1073 Variable *T = makeReg(getPointerType()); |
| 1074 _addiu(T, SP, MaxOutArgsSizeBytes); |
| 1075 _mov(Dest, T); |
| 1076 } else { |
| 1077 _mov(Dest, SP); |
| 1078 } |
1013 } | 1079 } |
1014 | 1080 |
1015 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr, | 1081 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr, |
1016 Variable *Dest, Operand *Src0, | 1082 Variable *Dest, Operand *Src0, |
1017 Operand *Src1) { | 1083 Operand *Src1) { |
1018 InstArithmetic::OpKind Op = Instr->getOp(); | 1084 InstArithmetic::OpKind Op = Instr->getOp(); |
1019 switch (Op) { | 1085 switch (Op) { |
1020 case InstArithmetic::Add: | 1086 case InstArithmetic::Add: |
1021 case InstArithmetic::And: | 1087 case InstArithmetic::And: |
1022 case InstArithmetic::Or: | 1088 case InstArithmetic::Or: |
(...skipping 1112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2135 Str << "\t.set\t" | 2201 Str << "\t.set\t" |
2136 << "nomips16\n"; | 2202 << "nomips16\n"; |
2137 } | 2203 } |
2138 | 2204 |
2139 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM]; | 2205 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM]; |
2140 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM]; | 2206 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM]; |
2141 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM]; | 2207 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM]; |
2142 | 2208 |
2143 } // end of namespace MIPS32 | 2209 } // end of namespace MIPS32 |
2144 } // end of namespace Ice | 2210 } // end of namespace Ice |
OLD | NEW |