Chromium Code Reviews| Index: src/IceTargetLoweringMIPS32.cpp |
| diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp |
| index 70ddef531f88350f82bf5fc12e8d6b01cdca4ba9..269ecdb7a4f02d9ca4301f813a067093287ca5e1 100644 |
| --- a/src/IceTargetLoweringMIPS32.cpp |
| +++ b/src/IceTargetLoweringMIPS32.cpp |
| @@ -64,6 +64,14 @@ namespace { |
| // The maximum number of arguments to pass in GPR registers. |
| constexpr uint32_t MIPS32_MAX_GPR_ARG = 4; |
| +std::array<RegNumT, MIPS32_MAX_GPR_ARG> GPRArgInitializer; |
| +std::array<RegNumT, MIPS32_MAX_GPR_ARG / 2> I64ArgInitializer; |
| + |
| +constexpr uint32_t MIPS32_MAX_FP_ARG = 2; |
| + |
| +std::array<RegNumT, MIPS32_MAX_FP_ARG> FP32ArgInitializer; |
| +std::array<RegNumT, MIPS32_MAX_FP_ARG> FP64ArgInitializer; |
| + |
| const char *getRegClassName(RegClass C) { |
| auto ClassNum = static_cast<RegClassMIPS32>(C); |
| assert(ClassNum < RCMIPS32_NUM); |
| @@ -75,6 +83,18 @@ const char *getRegClassName(RegClass C) { |
| } |
| } |
| +// Stack alignment |
| +constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 8; |
| + |
| +// Value is in bytes. Return Value adjusted to the next highest multiple of the |
| +// stack alignment required for the given type. |
| +uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { |
| + size_t typeAlignInBytes = typeWidthInBytes(Ty); |
| + if (isVectorType(Ty)) |
| + UnimplementedError(getFlags()); |
| + return Utils::applyAlignment(Value, typeAlignInBytes); |
| +} |
| + |
| } // end of anonymous namespace |
| TargetMIPS32::TargetMIPS32(Cfg *Func) : TargetLowering(Func) {} |
| @@ -105,6 +125,20 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) { |
| assert(RegisterAliases[RegMIPS32::val][RegMIPS32::val]); |
| REGMIPS32_TABLE; |
| #undef X |
| + |
| + // TODO(mohit.bhakkad): Change these inits once we provide argument related |
| + // field in register tables |
| + for (size_t i = 0; i < MIPS32_MAX_GPR_ARG; i++) |
| + GPRArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0 + i); |
| + |
| + for (size_t i = 0; i < MIPS32_MAX_GPR_ARG / 2; i++) |
| + I64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0A1 + i); |
| + |
| + for (size_t i = 0; i < MIPS32_MAX_FP_ARG; i++) { |
| + FP32ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12 + i * 2); |
| + FP64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12F13 + i); |
| + } |
| + |
| TypeToRegisterSet[IceType_void] = InvalidRegisters; |
| TypeToRegisterSet[IceType_i1] = IntegerRegisters; |
| TypeToRegisterSet[IceType_i8] = IntegerRegisters; |
| @@ -129,6 +163,24 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) { |
| RegMIPS32::getRegName, getRegClassName); |
| } |
| +void TargetMIPS32::findMaxStackOutArgsSize() { |
| + // MinNeededOutArgsBytes should be updated if the Target ever creates a |
| + // high-level InstCall that requires more stack bytes. |
| + constexpr size_t MinNeededOutArgsBytes = 16; |
| + MaxOutArgsSizeBytes = MinNeededOutArgsBytes; |
| + for (CfgNode *Node : Func->getNodes()) { |
| + Context.init(Node); |
| + while (!Context.atEnd()) { |
| + PostIncrLoweringContext PostIncrement(Context); |
| + Inst *CurInstr = iteratorToInst(Context.getCur()); |
| + if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { |
| + SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); |
| + MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); |
| + } |
| + } |
| + } |
| +} |
| + |
| void TargetMIPS32::translateO2() { |
| TimerMarker T(TimerStack::TT_O2, Func); |
| @@ -136,6 +188,8 @@ void TargetMIPS32::translateO2() { |
| // https://code.google.com/p/nativeclient/issues/detail?id=4094 |
| genTargetHelperCalls(); |
| + findMaxStackOutArgsSize(); |
| + |
| // Merge Alloca instructions, and lay out the stack. |
| static constexpr bool SortAndCombineAllocas = false; |
| Func->processAllocas(SortAndCombineAllocas); |
| @@ -237,6 +291,8 @@ void TargetMIPS32::translateOm1() { |
| // TODO: share passes with X86? |
| genTargetHelperCalls(); |
| + findMaxStackOutArgsSize(); |
| + |
| // Do not merge Alloca instructions, and lay out the stack. |
| static constexpr bool SortAndCombineAllocas = false; |
| Func->processAllocas(SortAndCombineAllocas); |
| @@ -403,88 +459,450 @@ void TargetMIPS32::emitVariable(const Variable *Var) const { |
| Str << "($" << getRegName(getFrameOrStackReg(), FrameSPTy); |
| Str << ")"; |
| } |
| + // UnimplementedError(getFlags()); |
|
Jim Stichnoth
2016/06/13 20:14:19
remove this
|
| +} |
| + |
| +TargetMIPS32::CallingConv::CallingConv() |
| + : GPRegsUsed(RegMIPS32::Reg_NUM), |
| + GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()), |
| + I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()), |
| + VFPRegsUsed(RegMIPS32::Reg_NUM), |
| + FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()), |
| + FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()) {} |
| + |
| +// In MIPS O32 abi FP argument registers can be used only if first argument is |
| +// of type float/double. UseFPRegs flag is used to care of that. Also FP arg |
| +// registers can be used only for first 2 arguments, so we require argument |
| +// number to make register allocation decisions. |
| +bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo, |
| + RegNumT *Reg) { |
| + if (isScalarIntegerType(Ty)) |
| + return argInGPR(Ty, Reg); |
| + if (isScalarFloatingType(Ty)) { |
| + if (ArgNo == 0) { |
| + UseFPRegs = true; |
| + return argInVFP(Ty, Reg); |
| + } |
| + if (UseFPRegs && ArgNo == 1) { |
| + UseFPRegs = false; |
| + return argInVFP(Ty, Reg); |
| + } |
| + return argInGPR(Ty, Reg); |
| + } |
| UnimplementedError(getFlags()); |
| + return false; |
| +} |
| + |
| +bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) { |
| + CfgVector<RegNumT> *Source; |
| + |
| + switch (Ty) { |
| + default: { |
| + UnimplementedError(getFlags()); |
| + return false; |
| + } break; |
| + case IceType_i32: |
| + case IceType_f32: { |
| + Source = &GPRArgs; |
| + } break; |
| + case IceType_i64: |
| + case IceType_f64: { |
| + Source = &I64Args; |
| + } break; |
| + } |
| + |
| + discardUnavailableGPRsAndTheirAliases(Source); |
| + |
| + if (Source->empty()) { |
| + GPRegsUsed.set(); |
| + return false; |
| + } |
| + |
| + *Reg = Source->back(); |
| + // Note that we don't Source->pop_back() here. This is intentional. Notice how |
| + // we mark all of Reg's aliases as Used. So, for the next argument, |
| + // Source->back() is marked as unavailable, and it is thus implicitly popped |
| + // from the stack. |
| + GPRegsUsed |= RegisterAliases[*Reg]; |
| + return true; |
| +} |
| + |
| +inline void TargetMIPS32::CallingConv::discardNextGPRAndItsAliases( |
| + CfgVector<RegNumT> *Regs) { |
| + GPRegsUsed |= RegisterAliases[Regs->back()]; |
| + Regs->pop_back(); |
| +} |
| + |
| +// GPR are not packed when passing parameters. Thus, a function foo(i32, i64, |
| +// i32) will have the first argument in a0, the second in a2-a3, and the third |
| +// on the stack. To model this behavior, whenever we pop a register from Regs, |
| +// we remove all of its aliases from the pool of available GPRs. This has the |
| +// effect of computing the "closure" on the GPR registers. |
| +void TargetMIPS32::CallingConv::discardUnavailableGPRsAndTheirAliases( |
| + CfgVector<RegNumT> *Regs) { |
| + while (!Regs->empty() && GPRegsUsed[Regs->back()]) { |
| + discardNextGPRAndItsAliases(Regs); |
| + } |
| +} |
| + |
| +bool TargetMIPS32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) { |
| + CfgVector<RegNumT> *Source; |
| + |
| + switch (Ty) { |
| + default: { |
| + UnimplementedError(getFlags()); |
| + return false; |
| + } break; |
| + case IceType_f32: { |
| + Source = &FP32Args; |
| + } break; |
| + case IceType_f64: { |
| + Source = &FP64Args; |
| + } break; |
| + } |
| + |
| + discardUnavailableVFPRegsAndTheirAliases(Source); |
| + |
| + if (Source->empty()) { |
| + VFPRegsUsed.set(); |
| + return false; |
| + } |
| + |
| + *Reg = Source->back(); |
| + VFPRegsUsed |= RegisterAliases[*Reg]; |
| + |
| + // In MIPS O32 abi if fun arguments are (f32, i32) then one can not use reg_a0 |
| + // for second argument even though it's free. f32 arg goes in reg_f12, i32 arg |
| + // goes in reg_a1. Similarly if arguments are (f64, i32) second argument goes |
| + // in reg_a3 and a0, a1 are not used. |
| + Source = &GPRArgs; |
| + // Discard one GPR reg for f32(4 bytes), two for f64(4 + 4 bytes) |
| + discardNextGPRAndItsAliases(Source); |
| + if (Ty == IceType_f64) |
| + discardNextGPRAndItsAliases(Source); |
| + |
| + return true; |
| +} |
| + |
| +void TargetMIPS32::CallingConv::discardUnavailableVFPRegsAndTheirAliases( |
| + CfgVector<RegNumT> *Regs) { |
| + while (!Regs->empty() && VFPRegsUsed[Regs->back()]) { |
| + Regs->pop_back(); |
| + } |
| } |
| void TargetMIPS32::lowerArguments() { |
| VarList &Args = Func->getArgs(); |
| - // We are only handling integer registers for now. The Mips o32 ABI is |
| - // somewhat complex but will be implemented in its totality through follow |
| - // on patches. |
| - // |
| - unsigned NumGPRRegsUsed = 0; |
| - // For each register argument, replace Arg in the argument list with the |
| - // home register. Then generate an instruction in the prolog to copy the |
| - // home register to the assigned location of Arg. |
| + TargetMIPS32::CallingConv CC; |
| + |
| + // For each register argument, replace Arg in the argument list with the home |
| + // register. Then generate an instruction in the prolog to copy the home |
| + // register to the assigned location of Arg. |
| Context.init(Func->getEntryNode()); |
| Context.setInsertPoint(Context.getCur()); |
| + |
| for (SizeT I = 0, E = Args.size(); I < E; ++I) { |
| Variable *Arg = Args[I]; |
| Type Ty = Arg->getType(); |
| - // TODO(rkotler): handle float/vector types. |
| - if (isVectorType(Ty)) { |
| - UnimplementedError(getFlags()); |
| + RegNumT RegNum; |
| + if (!CC.argInReg(Ty, I, &RegNum)) { |
| continue; |
| } |
| - if (isFloatingType(Ty)) { |
| - UnimplementedError(getFlags()); |
| - continue; |
| + Variable *RegisterArg = Func->makeVariable(Ty); |
| + if (BuildDefs::dump()) { |
| + RegisterArg->setName(Func, "home_reg:" + Arg->getName()); |
| } |
| - if (Ty == IceType_i64) { |
| - if (NumGPRRegsUsed >= MIPS32_MAX_GPR_ARG) |
| - continue; |
| - auto RegLo = RegNumT::fixme(RegMIPS32::Reg_A0 + NumGPRRegsUsed); |
| - auto RegHi = RegNumT::fixme(RegLo + 1); |
| - ++NumGPRRegsUsed; |
| - // Always start i64 registers at an even register, so this may end |
| - // up padding away a register. |
| - if (RegLo % 2 != 0) { |
| - RegLo = RegNumT::fixme(RegLo + 1); |
| - ++NumGPRRegsUsed; |
| - } |
| - // If this leaves us without room to consume another register, |
| - // leave any previously speculatively consumed registers as consumed. |
| - if (NumGPRRegsUsed >= MIPS32_MAX_GPR_ARG) |
| - continue; |
| - // RegHi = RegNumT::fixme(RegMIPS32::Reg_A0 + NumGPRRegsUsed); |
| - ++NumGPRRegsUsed; |
| - Variable *RegisterArg = Func->makeVariable(Ty); |
| - auto *RegisterArg64On32 = llvm::cast<Variable64On32>(RegisterArg); |
| - if (BuildDefs::dump()) |
| - RegisterArg64On32->setName(Func, "home_reg:" + Arg->getName()); |
| - RegisterArg64On32->initHiLo(Func); |
| - RegisterArg64On32->setIsArg(); |
| - RegisterArg64On32->getLo()->setRegNum(RegLo); |
| - RegisterArg64On32->getHi()->setRegNum(RegHi); |
| - Arg->setIsArg(false); |
| - Args[I] = RegisterArg64On32; |
| - Context.insert<InstAssign>(Arg, RegisterArg); |
| - continue; |
| - } else { |
| - assert(Ty == IceType_i32); |
| - if (NumGPRRegsUsed >= MIPS32_MAX_GPR_ARG) |
| - continue; |
| - const auto RegNum = RegNumT::fixme(RegMIPS32::Reg_A0 + NumGPRRegsUsed); |
| - ++NumGPRRegsUsed; |
| - Variable *RegisterArg = Func->makeVariable(Ty); |
| - if (BuildDefs::dump()) { |
| - RegisterArg->setName(Func, "home_reg:" + Arg->getName()); |
| - } |
| - RegisterArg->setRegNum(RegNum); |
| - RegisterArg->setIsArg(); |
| - Arg->setIsArg(false); |
| - Args[I] = RegisterArg; |
| - Context.insert<InstAssign>(Arg, RegisterArg); |
| + RegisterArg->setIsArg(); |
| + Arg->setIsArg(false); |
| + Args[I] = RegisterArg; |
| + switch (Ty) { |
| + default: { RegisterArg->setRegNum(RegNum); } break; |
| + case IceType_i64: { |
| + auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg); |
| + RegisterArg64->initHiLo(Func); |
| + RegisterArg64->getLo()->setRegNum( |
| + RegNumT::fixme(RegMIPS32::getI64PairFirstGPRNum(RegNum))); |
| + RegisterArg64->getHi()->setRegNum( |
| + RegNumT::fixme(RegMIPS32::getI64PairSecondGPRNum(RegNum))); |
| + } break; |
| } |
| + Context.insert<InstAssign>(Arg, RegisterArg); |
| } |
| } |
| Type TargetMIPS32::stackSlotType() { return IceType_i32; } |
| +// Helper function for addProlog(). |
| +// |
| +// This assumes Arg is an argument passed on the stack. This sets the frame |
| +// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an |
| +// I64 arg that has been split into Lo and Hi components, it calls itself |
| +// recursively on the components, taking care to handle Lo first because of the |
| +// little-endian architecture. Lastly, this function generates an instruction |
| +// to copy Arg into its assigned register if applicable. |
| +void TargetMIPS32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
| + size_t BasicFrameOffset, |
| + size_t *InArgsSizeBytes) { |
| + const Type Ty = Arg->getType(); |
| + *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty); |
| + |
| + if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { |
| + Variable *const Lo = Arg64On32->getLo(); |
| + Variable *const Hi = Arg64On32->getHi(); |
| + finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| + finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| + return; |
| + } |
| + assert(Ty != IceType_i64); |
| + |
| + const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes; |
| + *InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
| + |
| + if (!Arg->hasReg()) { |
| + Arg->setStackOffset(ArgStackOffset); |
| + return; |
| + } |
| + |
| + // If the argument variable has been assigned a register, we need to copy the |
| + // value from the stack slot. |
| + Variable *Parameter = Func->makeVariable(Ty); |
| + Parameter->setMustNotHaveReg(); |
| + Parameter->setStackOffset(ArgStackOffset); |
| + _mov(Arg, Parameter); |
| +} |
| + |
| void TargetMIPS32::addProlog(CfgNode *Node) { |
| - (void)Node; |
| + // Stack frame layout: |
| + // |
| + // +------------------------+ |
| + // | 1. preserved registers | |
| + // +------------------------+ |
| + // | 2. padding | |
| + // +------------------------+ |
| + // | 3. global spill area | |
| + // +------------------------+ |
| + // | 4. padding | |
| + // +------------------------+ |
| + // | 5. local spill area | |
| + // +------------------------+ |
| + // | 6. padding | |
| + // +------------------------+ |
| + // | 7. allocas | |
| + // +------------------------+ |
| + // | 8. padding | |
| + // +------------------------+ |
| + // | 9. out args | |
| + // +------------------------+ <--- StackPointer |
| + // |
| + // The following variables record the size in bytes of the given areas: |
| + // * PreservedRegsSizeBytes: area 1 |
| + // * SpillAreaPaddingBytes: area 2 |
| + // * GlobalsSize: area 3 |
| + // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 |
| + // * LocalsSpillAreaSize: area 5 |
| + // * SpillAreaSizeBytes: areas 2 - 9 |
| + // * maxOutArgsSizeBytes(): area 9 |
| + |
| + Context.init(Node); |
| + Context.setInsertPoint(Context.getCur()); |
| + |
| + SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None); |
| + RegsUsed = SmallBitVector(CalleeSaves.size()); |
| + |
| + VarList SortedSpilledVariables; |
| + |
| + size_t GlobalsSize = 0; |
| + // If there is a separate locals area, this represents that area. Otherwise |
| + // it counts any variable not counted by GlobalsSize. |
| + SpillAreaSizeBytes = 0; |
| + // If there is a separate locals area, this specifies the alignment for it. |
| + uint32_t LocalsSlotsAlignmentBytes = 0; |
| + // The entire spill locations area gets aligned to largest natural alignment |
| + // of the variables that have a spill slot. |
| + uint32_t SpillAreaAlignmentBytes = 0; |
| + // For now, we don't have target-specific variables that need special |
| + // treatment (no stack-slot-linked SpillVariable type). |
| + std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) { |
| + static constexpr bool AssignStackSlot = false; |
| + static constexpr bool DontAssignStackSlot = !AssignStackSlot; |
| + if (llvm::isa<Variable64On32>(Var)) { |
| + return DontAssignStackSlot; |
| + } |
| + return AssignStackSlot; |
| + }; |
| + |
| + // Compute the list of spilled variables and bounds for GlobalsSize, etc. |
| + getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, |
| + &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, |
| + &LocalsSlotsAlignmentBytes, TargetVarHook); |
| + uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; |
| + SpillAreaSizeBytes += GlobalsSize; |
| + |
| + PreservedGPRs.reserve(CalleeSaves.size()); |
| + |
| + // Consider FP and RA as callee-save / used as needed. |
| + if (UsesFramePointer) { |
| + if (RegsUsed[RegMIPS32::Reg_FP]) { |
| + llvm::report_fatal_error("Frame pointer has been used."); |
| + } |
| + CalleeSaves[RegMIPS32::Reg_FP] = true; |
| + RegsUsed[RegMIPS32::Reg_FP] = true; |
| + } |
| + if (!MaybeLeafFunc) { |
| + CalleeSaves[RegMIPS32::Reg_RA] = true; |
| + RegsUsed[RegMIPS32::Reg_RA] = true; |
| + } |
| + |
| + // Make two passes over the used registers. The first pass records all the |
| + // used registers -- and their aliases. Then, we figure out which GPR |
| + // registers should be saved. |
| + SmallBitVector ToPreserve(RegMIPS32::Reg_NUM); |
| + for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| + if (CalleeSaves[i] && RegsUsed[i]) { |
| + ToPreserve |= RegisterAliases[i]; |
| + } |
| + } |
| + |
| + uint32_t NumCallee = 0; |
| + size_t PreservedRegsSizeBytes = 0; |
| + |
| + // RegClasses is a tuple of |
| + // |
| + // <First Register in Class, Last Register in Class, Vector of Save Registers> |
| + // |
| + // We use this tuple to figure out which register we should save/restore |
| + // during |
| + // prolog/epilog. |
| + using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>; |
| + const RegClassType RegClass = RegClassType( |
| + RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_GPR_Last, &PreservedGPRs); |
| + const uint32_t FirstRegInClass = std::get<0>(RegClass); |
| + const uint32_t LastRegInClass = std::get<1>(RegClass); |
| + VarList *const PreservedRegsInClass = std::get<2>(RegClass); |
| + for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) { |
| + if (!ToPreserve[Reg]) { |
| + continue; |
| + } |
| + ++NumCallee; |
| + Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg)); |
| + PreservedRegsSizeBytes += |
| + typeWidthInBytesOnStack(PhysicalRegister->getType()); |
| + PreservedRegsInClass->push_back(PhysicalRegister); |
| + } |
| + |
| + Ctx->statsUpdateRegistersSaved(NumCallee); |
| + |
| + // Align the variables area. SpillAreaPaddingBytes is the size of the region |
| + // after the preserved registers and before the spill areas. |
| + // LocalsSlotsPaddingBytes is the amount of padding between the globals and |
| + // locals area if they are separate. |
| + assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES); |
| + (void)MIPS32_STACK_ALIGNMENT_BYTES; |
| + assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
| + uint32_t SpillAreaPaddingBytes = 0; |
| + uint32_t LocalsSlotsPaddingBytes = 0; |
| + alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, |
| + GlobalsSize, LocalsSlotsAlignmentBytes, |
| + &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); |
| + SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
| + uint32_t GlobalsAndSubsequentPaddingSize = |
| + GlobalsSize + LocalsSlotsPaddingBytes; |
| + |
| + if (MaybeLeafFunc) |
| + MaxOutArgsSizeBytes = 0; |
| + |
| + // Adds the out args space to the stack, and align SP if necessary. |
| + uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes; |
| + |
| + // TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with |
| + // TotalStackSizeBytes once lowerAlloca is implemented and leaf function |
| + // information is generated by lowerCall. |
| + |
| + // Generate "addiu sp, sp, -TotalStackSizeBytes" |
| + if (TotalStackSizeBytes) { |
| + // Use the scratch register if needed to legalize the immediate. |
| + Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
| + _addiu(SP, SP, -(TotalStackSizeBytes)); |
| + } |
| + |
| + Ctx->statsUpdateFrameBytes(TotalStackSizeBytes); |
| + |
| + if (!PreservedGPRs.empty()) { |
| + uint32_t StackOffset = TotalStackSizeBytes; |
| + for (Variable *Var : *PreservedRegsInClass) { |
| + Variable *PhysicalRegister = getPhysicalRegister(Var->getRegNum()); |
| + StackOffset -= typeWidthInBytesOnStack(PhysicalRegister->getType()); |
| + Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
| + OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create( |
| + Func, IceType_i32, SP, |
| + llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset))); |
| + _sw(PhysicalRegister, MemoryLocation); |
| + } |
| + } |
| + |
| + Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP); |
| + |
| + // Generate "mov FP, SP" if needed. |
| + if (UsesFramePointer) { |
| + Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
| + _mov(FP, SP); |
| + // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). |
| + Context.insert<InstFakeUse>(FP); |
| + } |
| + |
| + // Fill in stack offsets for stack args, and copy args into registers for |
| + // those that were register-allocated. Args are pushed right to left, so |
| + // Arg[0] is closest to the stack/frame pointer. |
| + const VarList &Args = Func->getArgs(); |
| + size_t InArgsSizeBytes = 0; |
| + TargetMIPS32::CallingConv CC; |
| + uint32_t ArgNo = 0; |
| + |
| + for (Variable *Arg : Args) { |
| + RegNumT DummyReg; |
| + const Type Ty = Arg->getType(); |
| + // Skip arguments passed in registers. |
| + if (CC.argInReg(Ty, ArgNo, &DummyReg)) { |
| + ArgNo++; |
| + continue; |
| + } else { |
| + finishArgumentLowering(Arg, FP, TotalStackSizeBytes, &InArgsSizeBytes); |
| + } |
| + } |
| + |
| + // Fill in stack offsets for locals. |
| + assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, |
| + SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, |
| + UsesFramePointer); |
| + this->HasComputedFrame = true; |
| + |
| + if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { |
| + OstreamLocker _(Func->getContext()); |
| + Ostream &Str = Func->getContext()->getStrDump(); |
| + |
| + Str << "Stack layout:\n"; |
| + uint32_t SPAdjustmentPaddingSize = |
| + SpillAreaSizeBytes - LocalsSpillAreaSize - |
| + GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes - |
| + MaxOutArgsSizeBytes; |
| + Str << " in-args = " << InArgsSizeBytes << " bytes\n" |
| + << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" |
| + << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" |
| + << " globals spill area = " << GlobalsSize << " bytes\n" |
| + << " globals-locals spill areas intermediate padding = " |
| + << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" |
| + << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" |
| + << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n"; |
| + |
| + Str << "Stack details:\n" |
| + << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n" |
| + << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" |
| + << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n" |
| + << " locals spill area alignment = " << LocalsSlotsAlignmentBytes |
| + << " bytes\n" |
| + << " is FP based = " << 1 << "\n"; |
| + } |
| return; |
| - UnimplementedError(getFlags()); |
| } |
| void TargetMIPS32::addEpilog(CfgNode *Node) { |
| @@ -707,17 +1125,6 @@ void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) { |
| UnimplementedLoweringError(this, Instr); |
| return; |
| } |
| - switch (Instr->getOp()) { |
| - default: |
| - break; |
| - case InstArithmetic::Fadd: |
| - case InstArithmetic::Fsub: |
| - case InstArithmetic::Fmul: |
| - case InstArithmetic::Fdiv: |
| - case InstArithmetic::Frem: |
| - UnimplementedLoweringError(this, Instr); |
| - return; |
| - } |
| // At this point Dest->getType() is non-i64 scalar |
| @@ -796,13 +1203,54 @@ void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) { |
| _mov(Dest, T); |
| return; |
| } |
| - case InstArithmetic::Fadd: |
| + case InstArithmetic::Fadd: { |
| + if (DestTy == IceType_f32) { |
| + _add_s(T, Src0R, Src1R); |
| + _mov_s(Dest, T); |
|
Jim Stichnoth
2016/06/13 20:14:19
From this, I am seeing invalid asm code being gene
|
| + return; |
| + } |
| + if (DestTy == IceType_f64) { |
| + _add_d(T, Src0R, Src1R); |
| + _mov_d(Dest, T); |
| + return; |
| + } |
| break; |
| + } |
| case InstArithmetic::Fsub: |
| + if (DestTy == IceType_f32) { |
| + _sub_s(T, Src0R, Src1R); |
| + _mov_s(Dest, T); |
| + return; |
| + } |
| + if (DestTy == IceType_f64) { |
| + _sub_d(T, Src0R, Src1R); |
| + _mov_d(Dest, T); |
| + return; |
| + } |
| break; |
| case InstArithmetic::Fmul: |
| + if (DestTy == IceType_f32) { |
| + _mul_s(T, Src0R, Src1R); |
| + _mov_s(Dest, T); |
| + return; |
| + } |
| + if (DestTy == IceType_f64) { |
| + _mul_d(T, Src0R, Src1R); |
| + _mov_d(Dest, T); |
| + return; |
| + } |
| break; |
| case InstArithmetic::Fdiv: |
| + if (DestTy == IceType_f32) { |
| + _div_s(T, Src0R, Src1R); |
| + _mov_s(Dest, T); |
| + return; |
| + } |
| + if (DestTy == IceType_f64) { |
| + _div_d(T, Src0R, Src1R); |
| + _mov_d(Dest, T); |
| + return; |
| + } |
| break; |
| case InstArithmetic::Frem: |
|
Jim Stichnoth
2016/06/13 20:14:19
For now, avoid a liveness validation error by addi
|
| break; |
| @@ -934,10 +1382,10 @@ void TargetMIPS32::lowerBr(const InstBr *Instr) { |
| void TargetMIPS32::lowerCall(const InstCall *Instr) { |
| // TODO(rkotler): assign arguments to registers and stack. Also reserve stack. |
| - if (Instr->getNumArgs()) { |
| - UnimplementedLoweringError(this, Instr); |
| - return; |
| - } |
| + // if (Instr->getNumArgs()) { |
|
Jim Stichnoth
2016/06/13 20:14:19
remove this
|
| + // UnimplementedLoweringError(this, Instr); |
| + // return; |
| + //} |
| // Generate the call instruction. Assign its result to a temporary with high |
| // register allocation weight. |
| Variable *Dest = Instr->getDest(); |
| @@ -962,19 +1410,22 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
| ReturnRegHi = I32Reg(RegMIPS32::Reg_V1); |
| break; |
| case IceType_f32: |
| + ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0); |
| + break; |
| case IceType_f64: |
| - UnimplementedLoweringError(this, Instr); |
| - return; |
| + ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0F1); |
| + break; |
| case IceType_v4i1: |
| case IceType_v8i1: |
| case IceType_v16i1: |
| case IceType_v16i8: |
| case IceType_v8i16: |
| case IceType_v4i32: |
| - case IceType_v4f32: |
| + case IceType_v4f32: { |
| UnimplementedLoweringError(this, Instr); |
| return; |
| } |
| + } |
| } |
| Operand *CallTarget = Instr->getCallTarget(); |
| // Allow ConstantRelocatable to be left alone as a direct call, |
| @@ -998,23 +1449,43 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
| // Assign the result of the call to Dest. |
| if (ReturnReg) { |
| - if (ReturnRegHi) { |
| - assert(Dest->getType() == IceType_i64); |
| + auto *Zero = getZero(); |
| + switch (Dest->getType()) { |
| + default: |
| + UnimplementedLoweringError(this, Instr); |
| + case IceType_i1: |
| + case IceType_i8: |
| + case IceType_i16: |
| + case IceType_i32: { |
| + _addu(Dest, Zero, ReturnReg); |
| + break; |
| + } |
| + case IceType_i64: { |
| auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); |
| Variable *DestLo = Dest64On32->getLo(); |
| Variable *DestHi = Dest64On32->getHi(); |
| - _mov(DestLo, ReturnReg); |
| - _mov(DestHi, ReturnRegHi); |
| - } else { |
| - assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || |
| - Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || |
| - isVectorType(Dest->getType())); |
| - if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) { |
| - UnimplementedLoweringError(this, Instr); |
| - return; |
| - } else { |
| - _mov(Dest, ReturnReg); |
| - } |
| + _addu(DestLo, Zero, ReturnReg); |
| + _addu(DestHi, Zero, ReturnRegHi); |
| + break; |
| + } |
| + case IceType_f32: { |
| + _mov_s(Dest, ReturnReg); |
| + break; |
| + } |
| + case IceType_f64: { |
| + _mov_d(Dest, ReturnReg); |
| + break; |
| + } |
| + case IceType_v4i1: |
| + case IceType_v8i1: |
| + case IceType_v16i1: |
| + case IceType_v16i8: |
| + case IceType_v8i16: |
| + case IceType_v4i32: |
| + case IceType_v4f32: { |
| + UnimplementedLoweringError(this, Instr); |
| + return; |
| + } |
| } |
| } |
| } |
| @@ -1423,7 +1894,24 @@ void TargetMIPS32::lowerRet(const InstRet *Instr) { |
| Variable *Reg = nullptr; |
| if (Instr->hasRetValue()) { |
| Operand *Src0 = Instr->getRetValue(); |
| + |
| switch (Src0->getType()) { |
| + default: |
| + UnimplementedLoweringError(this, Instr); |
|
Jim Stichnoth
2016/06/13 20:14:19
You probably want a "break;" after this, otherwise
|
| + case IceType_f32: { |
| + if (auto *Src0V = llvm::dyn_cast<Variable>(Src0)) { |
| + Reg = makeReg(Src0V->getType(), RegMIPS32::Reg_F0); |
| + _mov_s(Reg, Src0V); |
| + } |
| + break; |
| + } |
| + case IceType_f64: { |
| + if (auto *Src0V = llvm::dyn_cast<Variable>(Src0)) { |
| + Reg = makeReg(Src0V->getType(), RegMIPS32::Reg_F0F1); |
| + _mov_d(Reg, Src0V); |
| + } |
| + break; |
| + } |
| case IceType_i1: |
| case IceType_i8: |
| case IceType_i16: |
| @@ -1442,9 +1930,16 @@ void TargetMIPS32::lowerRet(const InstRet *Instr) { |
| Context.insert<InstFakeUse>(R1); |
| break; |
| } |
| - |
| - default: |
| + case IceType_v4i1: |
| + case IceType_v8i1: |
| + case IceType_v16i1: |
| + case IceType_v16i8: |
| + case IceType_v8i16: |
| + case IceType_v4i32: |
| + case IceType_v4f32: { |
| UnimplementedLoweringError(this, Instr); |
| + break; |
| + } |
| } |
| } |
| _ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg); |
| @@ -1551,13 +2046,13 @@ void TargetDataMIPS32::lowerGlobals(const VariableDeclarationList &Vars, |
| void TargetDataMIPS32::lowerConstants() { |
| if (getFlags().getDisableTranslation()) |
| return; |
| - UnimplementedError(getFlags()); |
| + // UnimplementedError(getFlags()); |
|
Jim Stichnoth
2016/06/13 20:14:19
remove this
|
| } |
| void TargetDataMIPS32::lowerJumpTables() { |
| if (getFlags().getDisableTranslation()) |
| return; |
| - UnimplementedError(getFlags()); |
| + // UnimplementedError(getFlags()); |
|
Jim Stichnoth
2016/06/13 20:14:19
remove this
|
| } |
| // Helper for legalize() to emit the right code to lower an operand to a |
| @@ -1565,8 +2060,11 @@ void TargetDataMIPS32::lowerJumpTables() { |
| Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) { |
| Type Ty = Src->getType(); |
| Variable *Reg = makeReg(Ty, RegNum); |
| - if (isVectorType(Ty) || isFloatingType(Ty)) { |
| + if (isVectorType(Ty)) { |
| UnimplementedError(getFlags()); |
| + } else if (isFloatingType(Ty)) { |
| + (Ty == IceType_f32) ? _mov_s(Reg, llvm::dyn_cast<Variable>(Src)) |
| + : _mov_d(Reg, llvm::dyn_cast<Variable>(Src)); |
| } else { |
| // Mov's Src operand can really only be the flexible second operand type |
| // or a register. Users should guarantee that. |