| OLD | NEW |
| 1 // | 1 // |
| 2 // The Subzero Code Generator | 2 // The Subzero Code Generator |
| 3 // | 3 // |
| 4 // This file is distributed under the University of Illinois Open Source | 4 // This file is distributed under the University of Illinois Open Source |
| 5 // License. See LICENSE.TXT for details. | 5 // License. See LICENSE.TXT for details. |
| 6 // | 6 // |
| 7 //===----------------------------------------------------------------------===// | 7 //===----------------------------------------------------------------------===// |
| 8 /// | 8 /// |
| 9 /// \file | 9 /// \file |
| 10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost | 10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost |
| (...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 156 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; | 156 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; |
| 157 | 157 |
| 158 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i) | 158 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i) |
| 159 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i]; | 159 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i]; |
| 160 | 160 |
| 161 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet, | 161 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet, |
| 162 llvm::array_lengthof(TypeToRegisterSet), | 162 llvm::array_lengthof(TypeToRegisterSet), |
| 163 RegMIPS32::getRegName, getRegClassName); | 163 RegMIPS32::getRegName, getRegClassName); |
| 164 } | 164 } |
| 165 | 165 |
| 166 void TargetMIPS32::unsetIfNonLeafFunc() { |
| 167 for (CfgNode *Node : Func->getNodes()) { |
| 168 for (Inst &Instr : Node->getInsts()) { |
| 169 if (llvm::isa<InstCall>(&Instr)) { |
| 170 // Unset MaybeLeafFunc if call instruction exists. |
| 171 MaybeLeafFunc = false; |
| 172 return; |
| 173 } |
| 174 } |
| 175 } |
| 176 } |
| 177 |
| 178 uint32_t TargetMIPS32::getStackAlignment() const { |
| 179 return MIPS32_STACK_ALIGNMENT_BYTES; |
| 180 } |
| 181 |
| 166 void TargetMIPS32::findMaxStackOutArgsSize() { | 182 void TargetMIPS32::findMaxStackOutArgsSize() { |
| 167 // MinNeededOutArgsBytes should be updated if the Target ever creates a | 183 // MinNeededOutArgsBytes should be updated if the Target ever creates a |
| 168 // high-level InstCall that requires more stack bytes. | 184 // high-level InstCall that requires more stack bytes. |
| 169 constexpr size_t MinNeededOutArgsBytes = 16; | 185 size_t MinNeededOutArgsBytes = 0; |
| 186 if (!MaybeLeafFunc) |
| 187 MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4; |
| 170 MaxOutArgsSizeBytes = MinNeededOutArgsBytes; | 188 MaxOutArgsSizeBytes = MinNeededOutArgsBytes; |
| 171 for (CfgNode *Node : Func->getNodes()) { | 189 for (CfgNode *Node : Func->getNodes()) { |
| 172 Context.init(Node); | 190 Context.init(Node); |
| 173 while (!Context.atEnd()) { | 191 while (!Context.atEnd()) { |
| 174 PostIncrLoweringContext PostIncrement(Context); | 192 PostIncrLoweringContext PostIncrement(Context); |
| 175 Inst *CurInstr = iteratorToInst(Context.getCur()); | 193 Inst *CurInstr = iteratorToInst(Context.getCur()); |
| 176 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { | 194 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { |
| 177 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); | 195 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); |
| 178 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); | 196 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); |
| 179 } | 197 } |
| 180 } | 198 } |
| 181 } | 199 } |
| 182 } | 200 } |
| 183 | 201 |
| 184 void TargetMIPS32::translateO2() { | 202 void TargetMIPS32::translateO2() { |
| 185 TimerMarker T(TimerStack::TT_O2, Func); | 203 TimerMarker T(TimerStack::TT_O2, Func); |
| 186 | 204 |
| 187 // TODO(stichnot): share passes with X86? | 205 // TODO(stichnot): share passes with X86? |
| 188 // https://code.google.com/p/nativeclient/issues/detail?id=4094 | 206 // https://code.google.com/p/nativeclient/issues/detail?id=4094 |
| 189 genTargetHelperCalls(); | 207 genTargetHelperCalls(); |
| 190 | 208 |
| 209 unsetIfNonLeafFunc(); |
| 210 |
| 191 findMaxStackOutArgsSize(); | 211 findMaxStackOutArgsSize(); |
| 192 | 212 |
| 193 // Merge Alloca instructions, and lay out the stack. | 213 // Merge Alloca instructions, and lay out the stack. |
| 194 static constexpr bool SortAndCombineAllocas = false; | 214 static constexpr bool SortAndCombineAllocas = true; |
| 195 Func->processAllocas(SortAndCombineAllocas); | 215 Func->processAllocas(SortAndCombineAllocas); |
| 196 Func->dump("After Alloca processing"); | 216 Func->dump("After Alloca processing"); |
| 197 | 217 |
| 198 if (!getFlags().getEnablePhiEdgeSplit()) { | 218 if (!getFlags().getEnablePhiEdgeSplit()) { |
| 199 // Lower Phi instructions. | 219 // Lower Phi instructions. |
| 200 Func->placePhiLoads(); | 220 Func->placePhiLoads(); |
| 201 if (Func->hasError()) | 221 if (Func->hasError()) |
| 202 return; | 222 return; |
| 203 Func->placePhiStores(); | 223 Func->placePhiStores(); |
| 204 if (Func->hasError()) | 224 if (Func->hasError()) |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 284 Func->doNopInsertion(); | 304 Func->doNopInsertion(); |
| 285 } | 305 } |
| 286 } | 306 } |
| 287 | 307 |
| 288 void TargetMIPS32::translateOm1() { | 308 void TargetMIPS32::translateOm1() { |
| 289 TimerMarker T(TimerStack::TT_Om1, Func); | 309 TimerMarker T(TimerStack::TT_Om1, Func); |
| 290 | 310 |
| 291 // TODO: share passes with X86? | 311 // TODO: share passes with X86? |
| 292 genTargetHelperCalls(); | 312 genTargetHelperCalls(); |
| 293 | 313 |
| 314 unsetIfNonLeafFunc(); |
| 315 |
| 294 findMaxStackOutArgsSize(); | 316 findMaxStackOutArgsSize(); |
| 295 | 317 |
| 296 // Do not merge Alloca instructions, and lay out the stack. | 318 // Do not merge Alloca instructions, and lay out the stack. |
| 297 static constexpr bool SortAndCombineAllocas = false; | 319 static constexpr bool SortAndCombineAllocas = false; |
| 298 Func->processAllocas(SortAndCombineAllocas); | 320 Func->processAllocas(SortAndCombineAllocas); |
| 299 Func->dump("After Alloca processing"); | 321 Func->dump("After Alloca processing"); |
| 300 | 322 |
| 301 Func->placePhiLoads(); | 323 Func->placePhiLoads(); |
| 302 if (Func->hasError()) | 324 if (Func->hasError()) |
| 303 return; | 325 return; |
| (...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 434 // transformation. | 456 // transformation. |
| 435 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { | 457 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { |
| 436 return Mem; | 458 return Mem; |
| 437 } | 459 } |
| 438 | 460 |
| 439 // If we didn't do address mode optimization, then we only have a base/offset | 461 // If we didn't do address mode optimization, then we only have a base/offset |
| 440 // to work with. MIPS always requires a base register, so just use that to | 462 // to work with. MIPS always requires a base register, so just use that to |
| 441 // hold the operand. | 463 // hold the operand. |
| 442 auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg)); | 464 auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg)); |
| 443 return OperandMIPS32Mem::create( | 465 return OperandMIPS32Mem::create( |
| 444 Func, Ty, Base, | 466 Func, Ty, Base, llvm::cast<ConstantInteger32>( |
| 445 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); | 467 Ctx->getConstantInt32(Base->getStackOffset()))); |
| 446 } | 468 } |
| 447 | 469 |
| 448 void TargetMIPS32::emitVariable(const Variable *Var) const { | 470 void TargetMIPS32::emitVariable(const Variable *Var) const { |
| 449 if (!BuildDefs::dump()) | 471 if (!BuildDefs::dump()) |
| 450 return; | 472 return; |
| 451 Ostream &Str = Ctx->getStrEmit(); | 473 Ostream &Str = Ctx->getStrEmit(); |
| 452 const Type FrameSPTy = IceType_i32; | 474 const Type FrameSPTy = IceType_i32; |
| 453 if (Var->hasReg()) { | 475 if (Var->hasReg()) { |
| 454 Str << '$' << getRegName(Var->getRegNum(), Var->getType()); | 476 Str << '$' << getRegName(Var->getRegNum(), Var->getType()); |
| 455 return; | 477 return; |
| (...skipping 345 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 801 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | 823 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
| 802 uint32_t SpillAreaPaddingBytes = 0; | 824 uint32_t SpillAreaPaddingBytes = 0; |
| 803 uint32_t LocalsSlotsPaddingBytes = 0; | 825 uint32_t LocalsSlotsPaddingBytes = 0; |
| 804 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, | 826 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, |
| 805 GlobalsSize, LocalsSlotsAlignmentBytes, | 827 GlobalsSize, LocalsSlotsAlignmentBytes, |
| 806 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); | 828 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); |
| 807 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; | 829 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
| 808 uint32_t GlobalsAndSubsequentPaddingSize = | 830 uint32_t GlobalsAndSubsequentPaddingSize = |
| 809 GlobalsSize + LocalsSlotsPaddingBytes; | 831 GlobalsSize + LocalsSlotsPaddingBytes; |
| 810 | 832 |
| 811 if (MaybeLeafFunc) | |
| 812 MaxOutArgsSizeBytes = 0; | |
| 813 | |
| 814 // Adds the out args space to the stack, and align SP if necessary. | 833 // Adds the out args space to the stack, and align SP if necessary. |
| 815 uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes; | 834 uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes + |
| 816 | 835 FixedAllocaSizeBytes + MaxOutArgsSizeBytes; |
| 817 // TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with | |
| 818 // TotalStackSizeBytes once lowerAlloca is implemented and leaf function | |
| 819 // information is generated by lowerCall. | |
| 820 | 836 |
| 821 // Generate "addiu sp, sp, -TotalStackSizeBytes" | 837 // Generate "addiu sp, sp, -TotalStackSizeBytes" |
| 822 if (TotalStackSizeBytes) { | 838 if (TotalStackSizeBytes) { |
| 823 // Use the scratch register if needed to legalize the immediate. | 839 // Use the scratch register if needed to legalize the immediate. |
| 824 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); | 840 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
| 825 _addiu(SP, SP, -(TotalStackSizeBytes)); | 841 _addiu(SP, SP, -(TotalStackSizeBytes)); |
| 826 } | 842 } |
| 827 | 843 |
| 828 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes); | 844 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes); |
| 829 | 845 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 847 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); | 863 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
| 848 _mov(FP, SP); | 864 _mov(FP, SP); |
| 849 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). | 865 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). |
| 850 Context.insert<InstFakeUse>(FP); | 866 Context.insert<InstFakeUse>(FP); |
| 851 } | 867 } |
| 852 | 868 |
| 853 // Fill in stack offsets for stack args, and copy args into registers for | 869 // Fill in stack offsets for stack args, and copy args into registers for |
| 854 // those that were register-allocated. Args are pushed right to left, so | 870 // those that were register-allocated. Args are pushed right to left, so |
| 855 // Arg[0] is closest to the stack/frame pointer. | 871 // Arg[0] is closest to the stack/frame pointer. |
| 856 const VarList &Args = Func->getArgs(); | 872 const VarList &Args = Func->getArgs(); |
| 857 size_t InArgsSizeBytes = 0; | 873 size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4; |
| 858 TargetMIPS32::CallingConv CC; | 874 TargetMIPS32::CallingConv CC; |
| 859 uint32_t ArgNo = 0; | 875 uint32_t ArgNo = 0; |
| 860 | 876 |
| 861 for (Variable *Arg : Args) { | 877 for (Variable *Arg : Args) { |
| 862 RegNumT DummyReg; | 878 RegNumT DummyReg; |
| 863 const Type Ty = Arg->getType(); | 879 const Type Ty = Arg->getType(); |
| 864 // Skip arguments passed in registers. | 880 // Skip arguments passed in registers. |
| 865 if (CC.argInReg(Ty, ArgNo, &DummyReg)) { | 881 if (CC.argInReg(Ty, ArgNo, &DummyReg)) { |
| 866 ArgNo++; | 882 ArgNo++; |
| 867 continue; | 883 continue; |
| (...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 995 Registers[RegMIPS32::val] = false; | 1011 Registers[RegMIPS32::val] = false; |
| 996 | 1012 |
| 997 REGMIPS32_TABLE | 1013 REGMIPS32_TABLE |
| 998 | 1014 |
| 999 #undef X | 1015 #undef X |
| 1000 | 1016 |
| 1001 return Registers; | 1017 return Registers; |
| 1002 } | 1018 } |
| 1003 | 1019 |
| 1004 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) { | 1020 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) { |
| 1005 UsesFramePointer = true; | |
| 1006 // Conservatively require the stack to be aligned. Some stack adjustment | 1021 // Conservatively require the stack to be aligned. Some stack adjustment |
| 1007 // operations implemented below assume that the stack is aligned before the | 1022 // operations implemented below assume that the stack is aligned before the |
| 1008 // alloca. All the alloca code ensures that the stack alignment is preserved | 1023 // alloca. All the alloca code ensures that the stack alignment is preserved |
| 1009 // after the alloca. The stack alignment restriction can be relaxed in some | 1024 // after the alloca. The stack alignment restriction can be relaxed in some |
| 1010 // cases. | 1025 // cases. |
| 1011 NeedsStackAlignment = true; | 1026 NeedsStackAlignment = true; |
| 1012 UnimplementedLoweringError(this, Instr); | 1027 |
| 1028 // For default align=0, set it to the real value 1, to avoid any |
| 1029 // bit-manipulation problems below. |
| 1030 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes()); |
| 1031 |
| 1032 // LLVM enforces power of 2 alignment. |
| 1033 assert(llvm::isPowerOf2_32(AlignmentParam)); |
| 1034 assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES)); |
| 1035 |
| 1036 const uint32_t Alignment = |
| 1037 std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES); |
| 1038 const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES; |
| 1039 const bool OptM1 = getFlags().getOptLevel() == Opt_m1; |
| 1040 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset(); |
| 1041 const bool UseFramePointer = |
| 1042 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; |
| 1043 |
| 1044 if (UseFramePointer) |
| 1045 setHasFramePointer(); |
| 1046 |
| 1047 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
| 1048 |
| 1049 Variable *Dest = Instr->getDest(); |
| 1050 Operand *TotalSize = Instr->getSizeInBytes(); |
| 1051 |
| 1052 if (const auto *ConstantTotalSize = |
| 1053 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
| 1054 const uint32_t Value = |
| 1055 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment); |
| 1056 FixedAllocaSizeBytes += Value; |
| 1057 // Constant size alloca. |
| 1058 if (!UseFramePointer) { |
| 1059 // If we don't need a Frame Pointer, this alloca has a known offset to the |
| 1060 // stack pointer. We don't need adjust the stack pointer, nor assign any |
| 1061 // value to Dest, as Dest is rematerializable. |
| 1062 assert(Dest->isRematerializable()); |
| 1063 Context.insert<InstFakeDef>(Dest); |
| 1064 return; |
| 1065 } |
| 1066 } else { |
| 1067 UnimplementedLoweringError(this, Instr); |
| 1068 return; |
| 1069 } |
| 1070 |
| 1071 // Add enough to the returned address to account for the out args area. |
| 1072 if (MaxOutArgsSizeBytes > 0) { |
| 1073 Variable *T = makeReg(getPointerType()); |
| 1074 _addiu(T, SP, MaxOutArgsSizeBytes); |
| 1075 _mov(Dest, T); |
| 1076 } else { |
| 1077 _mov(Dest, SP); |
| 1078 } |
| 1013 } | 1079 } |
| 1014 | 1080 |
| 1015 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr, | 1081 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr, |
| 1016 Variable *Dest, Operand *Src0, | 1082 Variable *Dest, Operand *Src0, |
| 1017 Operand *Src1) { | 1083 Operand *Src1) { |
| 1018 InstArithmetic::OpKind Op = Instr->getOp(); | 1084 InstArithmetic::OpKind Op = Instr->getOp(); |
| 1019 switch (Op) { | 1085 switch (Op) { |
| 1020 case InstArithmetic::Add: | 1086 case InstArithmetic::Add: |
| 1021 case InstArithmetic::And: | 1087 case InstArithmetic::And: |
| 1022 case InstArithmetic::Or: | 1088 case InstArithmetic::Or: |
| (...skipping 1112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2135 Str << "\t.set\t" | 2201 Str << "\t.set\t" |
| 2136 << "nomips16\n"; | 2202 << "nomips16\n"; |
| 2137 } | 2203 } |
| 2138 | 2204 |
| 2139 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM]; | 2205 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM]; |
| 2140 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM]; | 2206 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM]; |
| 2141 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM]; | 2207 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM]; |
| 2142 | 2208 |
| 2143 } // end of namespace MIPS32 | 2209 } // end of namespace MIPS32 |
| 2144 } // end of namespace Ice | 2210 } // end of namespace Ice |
| OLD | NEW |