Index: src/IceTargetLoweringMIPS32.cpp |
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp |
index 70ddef531f88350f82bf5fc12e8d6b01cdca4ba9..269ecdb7a4f02d9ca4301f813a067093287ca5e1 100644 |
--- a/src/IceTargetLoweringMIPS32.cpp |
+++ b/src/IceTargetLoweringMIPS32.cpp |
@@ -64,6 +64,14 @@ namespace { |
// The maximum number of arguments to pass in GPR registers. |
constexpr uint32_t MIPS32_MAX_GPR_ARG = 4; |
+std::array<RegNumT, MIPS32_MAX_GPR_ARG> GPRArgInitializer; |
+std::array<RegNumT, MIPS32_MAX_GPR_ARG / 2> I64ArgInitializer; |
+ |
+constexpr uint32_t MIPS32_MAX_FP_ARG = 2; |
+ |
+std::array<RegNumT, MIPS32_MAX_FP_ARG> FP32ArgInitializer; |
+std::array<RegNumT, MIPS32_MAX_FP_ARG> FP64ArgInitializer; |
+ |
const char *getRegClassName(RegClass C) { |
auto ClassNum = static_cast<RegClassMIPS32>(C); |
assert(ClassNum < RCMIPS32_NUM); |
@@ -75,6 +83,18 @@ const char *getRegClassName(RegClass C) { |
} |
} |
+// Stack alignment |
+constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 8; |
+ |
+// Value is in bytes. Return Value adjusted to the next highest multiple of the |
+// stack alignment required for the given type. |
+uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { |
+ size_t typeAlignInBytes = typeWidthInBytes(Ty); |
+ if (isVectorType(Ty)) |
+ UnimplementedError(getFlags()); |
+ return Utils::applyAlignment(Value, typeAlignInBytes); |
+} |
+ |
} // end of anonymous namespace |
TargetMIPS32::TargetMIPS32(Cfg *Func) : TargetLowering(Func) {} |
@@ -105,6 +125,20 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) { |
assert(RegisterAliases[RegMIPS32::val][RegMIPS32::val]); |
REGMIPS32_TABLE; |
#undef X |
+ |
+ // TODO(mohit.bhakkad): Change these inits once we provide argument related |
+ // field in register tables |
+ for (size_t i = 0; i < MIPS32_MAX_GPR_ARG; i++) |
+ GPRArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0 + i); |
+ |
+ for (size_t i = 0; i < MIPS32_MAX_GPR_ARG / 2; i++) |
+ I64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0A1 + i); |
+ |
+ for (size_t i = 0; i < MIPS32_MAX_FP_ARG; i++) { |
+ FP32ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12 + i * 2); |
+ FP64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12F13 + i); |
+ } |
+ |
TypeToRegisterSet[IceType_void] = InvalidRegisters; |
TypeToRegisterSet[IceType_i1] = IntegerRegisters; |
TypeToRegisterSet[IceType_i8] = IntegerRegisters; |
@@ -129,6 +163,24 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) { |
RegMIPS32::getRegName, getRegClassName); |
} |
+void TargetMIPS32::findMaxStackOutArgsSize() { |
+ // MinNeededOutArgsBytes should be updated if the Target ever creates a |
+ // high-level InstCall that requires more stack bytes. |
+ constexpr size_t MinNeededOutArgsBytes = 16; |
+ MaxOutArgsSizeBytes = MinNeededOutArgsBytes; |
+ for (CfgNode *Node : Func->getNodes()) { |
+ Context.init(Node); |
+ while (!Context.atEnd()) { |
+ PostIncrLoweringContext PostIncrement(Context); |
+ Inst *CurInstr = iteratorToInst(Context.getCur()); |
+ if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { |
+ SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); |
+ MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); |
+ } |
+ } |
+ } |
+} |
+ |
void TargetMIPS32::translateO2() { |
TimerMarker T(TimerStack::TT_O2, Func); |
@@ -136,6 +188,8 @@ void TargetMIPS32::translateO2() { |
// https://code.google.com/p/nativeclient/issues/detail?id=4094 |
genTargetHelperCalls(); |
+ findMaxStackOutArgsSize(); |
+ |
// Merge Alloca instructions, and lay out the stack. |
static constexpr bool SortAndCombineAllocas = false; |
Func->processAllocas(SortAndCombineAllocas); |
@@ -237,6 +291,8 @@ void TargetMIPS32::translateOm1() { |
// TODO: share passes with X86? |
genTargetHelperCalls(); |
+ findMaxStackOutArgsSize(); |
+ |
// Do not merge Alloca instructions, and lay out the stack. |
static constexpr bool SortAndCombineAllocas = false; |
Func->processAllocas(SortAndCombineAllocas); |
@@ -403,88 +459,450 @@ void TargetMIPS32::emitVariable(const Variable *Var) const { |
Str << "($" << getRegName(getFrameOrStackReg(), FrameSPTy); |
Str << ")"; |
} |
+ // UnimplementedError(getFlags()); |
Jim Stichnoth
2016/06/13 20:14:19
remove this
|
+} |
+ |
+TargetMIPS32::CallingConv::CallingConv() |
+ : GPRegsUsed(RegMIPS32::Reg_NUM), |
+ GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()), |
+ I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()), |
+ VFPRegsUsed(RegMIPS32::Reg_NUM), |
+ FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()), |
+ FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()) {} |
+ |
+// In MIPS O32 abi FP argument registers can be used only if first argument is |
+// of type float/double. UseFPRegs flag is used to care of that. Also FP arg |
+// registers can be used only for first 2 arguments, so we require argument |
+// number to make register allocation decisions. |
+bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo, |
+ RegNumT *Reg) { |
+ if (isScalarIntegerType(Ty)) |
+ return argInGPR(Ty, Reg); |
+ if (isScalarFloatingType(Ty)) { |
+ if (ArgNo == 0) { |
+ UseFPRegs = true; |
+ return argInVFP(Ty, Reg); |
+ } |
+ if (UseFPRegs && ArgNo == 1) { |
+ UseFPRegs = false; |
+ return argInVFP(Ty, Reg); |
+ } |
+ return argInGPR(Ty, Reg); |
+ } |
UnimplementedError(getFlags()); |
+ return false; |
+} |
+ |
+bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) { |
+ CfgVector<RegNumT> *Source; |
+ |
+ switch (Ty) { |
+ default: { |
+ UnimplementedError(getFlags()); |
+ return false; |
+ } break; |
+ case IceType_i32: |
+ case IceType_f32: { |
+ Source = &GPRArgs; |
+ } break; |
+ case IceType_i64: |
+ case IceType_f64: { |
+ Source = &I64Args; |
+ } break; |
+ } |
+ |
+ discardUnavailableGPRsAndTheirAliases(Source); |
+ |
+ if (Source->empty()) { |
+ GPRegsUsed.set(); |
+ return false; |
+ } |
+ |
+ *Reg = Source->back(); |
+ // Note that we don't Source->pop_back() here. This is intentional. Notice how |
+ // we mark all of Reg's aliases as Used. So, for the next argument, |
+ // Source->back() is marked as unavailable, and it is thus implicitly popped |
+ // from the stack. |
+ GPRegsUsed |= RegisterAliases[*Reg]; |
+ return true; |
+} |
+ |
+inline void TargetMIPS32::CallingConv::discardNextGPRAndItsAliases( |
+ CfgVector<RegNumT> *Regs) { |
+ GPRegsUsed |= RegisterAliases[Regs->back()]; |
+ Regs->pop_back(); |
+} |
+ |
+// GPR are not packed when passing parameters. Thus, a function foo(i32, i64, |
+// i32) will have the first argument in a0, the second in a2-a3, and the third |
+// on the stack. To model this behavior, whenever we pop a register from Regs, |
+// we remove all of its aliases from the pool of available GPRs. This has the |
+// effect of computing the "closure" on the GPR registers. |
+void TargetMIPS32::CallingConv::discardUnavailableGPRsAndTheirAliases( |
+ CfgVector<RegNumT> *Regs) { |
+ while (!Regs->empty() && GPRegsUsed[Regs->back()]) { |
+ discardNextGPRAndItsAliases(Regs); |
+ } |
+} |
+ |
+bool TargetMIPS32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) { |
+ CfgVector<RegNumT> *Source; |
+ |
+ switch (Ty) { |
+ default: { |
+ UnimplementedError(getFlags()); |
+ return false; |
+ } break; |
+ case IceType_f32: { |
+ Source = &FP32Args; |
+ } break; |
+ case IceType_f64: { |
+ Source = &FP64Args; |
+ } break; |
+ } |
+ |
+ discardUnavailableVFPRegsAndTheirAliases(Source); |
+ |
+ if (Source->empty()) { |
+ VFPRegsUsed.set(); |
+ return false; |
+ } |
+ |
+ *Reg = Source->back(); |
+ VFPRegsUsed |= RegisterAliases[*Reg]; |
+ |
+ // In MIPS O32 abi if fun arguments are (f32, i32) then one can not use reg_a0 |
+ // for second argument even though it's free. f32 arg goes in reg_f12, i32 arg |
+ // goes in reg_a1. Similarly if arguments are (f64, i32) second argument goes |
+ // in reg_a3 and a0, a1 are not used. |
+ Source = &GPRArgs; |
+ // Discard one GPR reg for f32(4 bytes), two for f64(4 + 4 bytes) |
+ discardNextGPRAndItsAliases(Source); |
+ if (Ty == IceType_f64) |
+ discardNextGPRAndItsAliases(Source); |
+ |
+ return true; |
+} |
+ |
+void TargetMIPS32::CallingConv::discardUnavailableVFPRegsAndTheirAliases( |
+ CfgVector<RegNumT> *Regs) { |
+ while (!Regs->empty() && VFPRegsUsed[Regs->back()]) { |
+ Regs->pop_back(); |
+ } |
} |
void TargetMIPS32::lowerArguments() { |
VarList &Args = Func->getArgs(); |
- // We are only handling integer registers for now. The Mips o32 ABI is |
- // somewhat complex but will be implemented in its totality through follow |
- // on patches. |
- // |
- unsigned NumGPRRegsUsed = 0; |
- // For each register argument, replace Arg in the argument list with the |
- // home register. Then generate an instruction in the prolog to copy the |
- // home register to the assigned location of Arg. |
+ TargetMIPS32::CallingConv CC; |
+ |
+ // For each register argument, replace Arg in the argument list with the home |
+ // register. Then generate an instruction in the prolog to copy the home |
+ // register to the assigned location of Arg. |
Context.init(Func->getEntryNode()); |
Context.setInsertPoint(Context.getCur()); |
+ |
for (SizeT I = 0, E = Args.size(); I < E; ++I) { |
Variable *Arg = Args[I]; |
Type Ty = Arg->getType(); |
- // TODO(rkotler): handle float/vector types. |
- if (isVectorType(Ty)) { |
- UnimplementedError(getFlags()); |
+ RegNumT RegNum; |
+ if (!CC.argInReg(Ty, I, &RegNum)) { |
continue; |
} |
- if (isFloatingType(Ty)) { |
- UnimplementedError(getFlags()); |
- continue; |
+ Variable *RegisterArg = Func->makeVariable(Ty); |
+ if (BuildDefs::dump()) { |
+ RegisterArg->setName(Func, "home_reg:" + Arg->getName()); |
} |
- if (Ty == IceType_i64) { |
- if (NumGPRRegsUsed >= MIPS32_MAX_GPR_ARG) |
- continue; |
- auto RegLo = RegNumT::fixme(RegMIPS32::Reg_A0 + NumGPRRegsUsed); |
- auto RegHi = RegNumT::fixme(RegLo + 1); |
- ++NumGPRRegsUsed; |
- // Always start i64 registers at an even register, so this may end |
- // up padding away a register. |
- if (RegLo % 2 != 0) { |
- RegLo = RegNumT::fixme(RegLo + 1); |
- ++NumGPRRegsUsed; |
- } |
- // If this leaves us without room to consume another register, |
- // leave any previously speculatively consumed registers as consumed. |
- if (NumGPRRegsUsed >= MIPS32_MAX_GPR_ARG) |
- continue; |
- // RegHi = RegNumT::fixme(RegMIPS32::Reg_A0 + NumGPRRegsUsed); |
- ++NumGPRRegsUsed; |
- Variable *RegisterArg = Func->makeVariable(Ty); |
- auto *RegisterArg64On32 = llvm::cast<Variable64On32>(RegisterArg); |
- if (BuildDefs::dump()) |
- RegisterArg64On32->setName(Func, "home_reg:" + Arg->getName()); |
- RegisterArg64On32->initHiLo(Func); |
- RegisterArg64On32->setIsArg(); |
- RegisterArg64On32->getLo()->setRegNum(RegLo); |
- RegisterArg64On32->getHi()->setRegNum(RegHi); |
- Arg->setIsArg(false); |
- Args[I] = RegisterArg64On32; |
- Context.insert<InstAssign>(Arg, RegisterArg); |
- continue; |
- } else { |
- assert(Ty == IceType_i32); |
- if (NumGPRRegsUsed >= MIPS32_MAX_GPR_ARG) |
- continue; |
- const auto RegNum = RegNumT::fixme(RegMIPS32::Reg_A0 + NumGPRRegsUsed); |
- ++NumGPRRegsUsed; |
- Variable *RegisterArg = Func->makeVariable(Ty); |
- if (BuildDefs::dump()) { |
- RegisterArg->setName(Func, "home_reg:" + Arg->getName()); |
- } |
- RegisterArg->setRegNum(RegNum); |
- RegisterArg->setIsArg(); |
- Arg->setIsArg(false); |
- Args[I] = RegisterArg; |
- Context.insert<InstAssign>(Arg, RegisterArg); |
+ RegisterArg->setIsArg(); |
+ Arg->setIsArg(false); |
+ Args[I] = RegisterArg; |
+ switch (Ty) { |
+ default: { RegisterArg->setRegNum(RegNum); } break; |
+ case IceType_i64: { |
+ auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg); |
+ RegisterArg64->initHiLo(Func); |
+ RegisterArg64->getLo()->setRegNum( |
+ RegNumT::fixme(RegMIPS32::getI64PairFirstGPRNum(RegNum))); |
+ RegisterArg64->getHi()->setRegNum( |
+ RegNumT::fixme(RegMIPS32::getI64PairSecondGPRNum(RegNum))); |
+ } break; |
} |
+ Context.insert<InstAssign>(Arg, RegisterArg); |
} |
} |
Type TargetMIPS32::stackSlotType() { return IceType_i32; } |
+// Helper function for addProlog(). |
+// |
+// This assumes Arg is an argument passed on the stack. This sets the frame |
+// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an |
+// I64 arg that has been split into Lo and Hi components, it calls itself |
+// recursively on the components, taking care to handle Lo first because of the |
+// little-endian architecture. Lastly, this function generates an instruction |
+// to copy Arg into its assigned register if applicable. |
+void TargetMIPS32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
+ size_t BasicFrameOffset, |
+ size_t *InArgsSizeBytes) { |
+ const Type Ty = Arg->getType(); |
+ *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty); |
+ |
+ if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { |
+ Variable *const Lo = Arg64On32->getLo(); |
+ Variable *const Hi = Arg64On32->getHi(); |
+ finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
+ finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
+ return; |
+ } |
+ assert(Ty != IceType_i64); |
+ |
+ const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes; |
+ *InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
+ |
+ if (!Arg->hasReg()) { |
+ Arg->setStackOffset(ArgStackOffset); |
+ return; |
+ } |
+ |
+ // If the argument variable has been assigned a register, we need to copy the |
+ // value from the stack slot. |
+ Variable *Parameter = Func->makeVariable(Ty); |
+ Parameter->setMustNotHaveReg(); |
+ Parameter->setStackOffset(ArgStackOffset); |
+ _mov(Arg, Parameter); |
+} |
+ |
void TargetMIPS32::addProlog(CfgNode *Node) { |
- (void)Node; |
+ // Stack frame layout: |
+ // |
+ // +------------------------+ |
+ // | 1. preserved registers | |
+ // +------------------------+ |
+ // | 2. padding | |
+ // +------------------------+ |
+ // | 3. global spill area | |
+ // +------------------------+ |
+ // | 4. padding | |
+ // +------------------------+ |
+ // | 5. local spill area | |
+ // +------------------------+ |
+ // | 6. padding | |
+ // +------------------------+ |
+ // | 7. allocas | |
+ // +------------------------+ |
+ // | 8. padding | |
+ // +------------------------+ |
+ // | 9. out args | |
+ // +------------------------+ <--- StackPointer |
+ // |
+ // The following variables record the size in bytes of the given areas: |
+ // * PreservedRegsSizeBytes: area 1 |
+ // * SpillAreaPaddingBytes: area 2 |
+ // * GlobalsSize: area 3 |
+ // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 |
+ // * LocalsSpillAreaSize: area 5 |
+ // * SpillAreaSizeBytes: areas 2 - 9 |
+ // * maxOutArgsSizeBytes(): area 9 |
+ |
+ Context.init(Node); |
+ Context.setInsertPoint(Context.getCur()); |
+ |
+ SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None); |
+ RegsUsed = SmallBitVector(CalleeSaves.size()); |
+ |
+ VarList SortedSpilledVariables; |
+ |
+ size_t GlobalsSize = 0; |
+ // If there is a separate locals area, this represents that area. Otherwise |
+ // it counts any variable not counted by GlobalsSize. |
+ SpillAreaSizeBytes = 0; |
+ // If there is a separate locals area, this specifies the alignment for it. |
+ uint32_t LocalsSlotsAlignmentBytes = 0; |
+ // The entire spill locations area gets aligned to largest natural alignment |
+ // of the variables that have a spill slot. |
+ uint32_t SpillAreaAlignmentBytes = 0; |
+ // For now, we don't have target-specific variables that need special |
+ // treatment (no stack-slot-linked SpillVariable type). |
+ std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) { |
+ static constexpr bool AssignStackSlot = false; |
+ static constexpr bool DontAssignStackSlot = !AssignStackSlot; |
+ if (llvm::isa<Variable64On32>(Var)) { |
+ return DontAssignStackSlot; |
+ } |
+ return AssignStackSlot; |
+ }; |
+ |
+ // Compute the list of spilled variables and bounds for GlobalsSize, etc. |
+ getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, |
+ &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, |
+ &LocalsSlotsAlignmentBytes, TargetVarHook); |
+ uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; |
+ SpillAreaSizeBytes += GlobalsSize; |
+ |
+ PreservedGPRs.reserve(CalleeSaves.size()); |
+ |
+ // Consider FP and RA as callee-save / used as needed. |
+ if (UsesFramePointer) { |
+ if (RegsUsed[RegMIPS32::Reg_FP]) { |
+ llvm::report_fatal_error("Frame pointer has been used."); |
+ } |
+ CalleeSaves[RegMIPS32::Reg_FP] = true; |
+ RegsUsed[RegMIPS32::Reg_FP] = true; |
+ } |
+ if (!MaybeLeafFunc) { |
+ CalleeSaves[RegMIPS32::Reg_RA] = true; |
+ RegsUsed[RegMIPS32::Reg_RA] = true; |
+ } |
+ |
+ // Make two passes over the used registers. The first pass records all the |
+ // used registers -- and their aliases. Then, we figure out which GPR |
+ // registers should be saved. |
+ SmallBitVector ToPreserve(RegMIPS32::Reg_NUM); |
+ for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
+ if (CalleeSaves[i] && RegsUsed[i]) { |
+ ToPreserve |= RegisterAliases[i]; |
+ } |
+ } |
+ |
+ uint32_t NumCallee = 0; |
+ size_t PreservedRegsSizeBytes = 0; |
+ |
+ // RegClasses is a tuple of |
+ // |
+ // <First Register in Class, Last Register in Class, Vector of Save Registers> |
+ // |
+ // We use this tuple to figure out which register we should save/restore |
+ // during |
+ // prolog/epilog. |
+ using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>; |
+ const RegClassType RegClass = RegClassType( |
+ RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_GPR_Last, &PreservedGPRs); |
+ const uint32_t FirstRegInClass = std::get<0>(RegClass); |
+ const uint32_t LastRegInClass = std::get<1>(RegClass); |
+ VarList *const PreservedRegsInClass = std::get<2>(RegClass); |
+ for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) { |
+ if (!ToPreserve[Reg]) { |
+ continue; |
+ } |
+ ++NumCallee; |
+ Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg)); |
+ PreservedRegsSizeBytes += |
+ typeWidthInBytesOnStack(PhysicalRegister->getType()); |
+ PreservedRegsInClass->push_back(PhysicalRegister); |
+ } |
+ |
+ Ctx->statsUpdateRegistersSaved(NumCallee); |
+ |
+ // Align the variables area. SpillAreaPaddingBytes is the size of the region |
+ // after the preserved registers and before the spill areas. |
+ // LocalsSlotsPaddingBytes is the amount of padding between the globals and |
+ // locals area if they are separate. |
+ assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES); |
+ (void)MIPS32_STACK_ALIGNMENT_BYTES; |
+ assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
+ uint32_t SpillAreaPaddingBytes = 0; |
+ uint32_t LocalsSlotsPaddingBytes = 0; |
+ alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, |
+ GlobalsSize, LocalsSlotsAlignmentBytes, |
+ &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); |
+ SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
+ uint32_t GlobalsAndSubsequentPaddingSize = |
+ GlobalsSize + LocalsSlotsPaddingBytes; |
+ |
+ if (MaybeLeafFunc) |
+ MaxOutArgsSizeBytes = 0; |
+ |
+ // Adds the out args space to the stack, and align SP if necessary. |
+ uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes; |
+ |
+ // TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with |
+ // TotalStackSizeBytes once lowerAlloca is implemented and leaf function |
+ // information is generated by lowerCall. |
+ |
+ // Generate "addiu sp, sp, -TotalStackSizeBytes" |
+ if (TotalStackSizeBytes) { |
+ // Use the scratch register if needed to legalize the immediate. |
+ Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
+ _addiu(SP, SP, -(TotalStackSizeBytes)); |
+ } |
+ |
+ Ctx->statsUpdateFrameBytes(TotalStackSizeBytes); |
+ |
+ if (!PreservedGPRs.empty()) { |
+ uint32_t StackOffset = TotalStackSizeBytes; |
+ for (Variable *Var : *PreservedRegsInClass) { |
+ Variable *PhysicalRegister = getPhysicalRegister(Var->getRegNum()); |
+ StackOffset -= typeWidthInBytesOnStack(PhysicalRegister->getType()); |
+ Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
+ OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create( |
+ Func, IceType_i32, SP, |
+ llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset))); |
+ _sw(PhysicalRegister, MemoryLocation); |
+ } |
+ } |
+ |
+ Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP); |
+ |
+ // Generate "mov FP, SP" if needed. |
+ if (UsesFramePointer) { |
+ Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
+ _mov(FP, SP); |
+ // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). |
+ Context.insert<InstFakeUse>(FP); |
+ } |
+ |
+ // Fill in stack offsets for stack args, and copy args into registers for |
+ // those that were register-allocated. Args are pushed right to left, so |
+ // Arg[0] is closest to the stack/frame pointer. |
+ const VarList &Args = Func->getArgs(); |
+ size_t InArgsSizeBytes = 0; |
+ TargetMIPS32::CallingConv CC; |
+ uint32_t ArgNo = 0; |
+ |
+ for (Variable *Arg : Args) { |
+ RegNumT DummyReg; |
+ const Type Ty = Arg->getType(); |
+ // Skip arguments passed in registers. |
+ if (CC.argInReg(Ty, ArgNo, &DummyReg)) { |
+ ArgNo++; |
+ continue; |
+ } else { |
+ finishArgumentLowering(Arg, FP, TotalStackSizeBytes, &InArgsSizeBytes); |
+ } |
+ } |
+ |
+ // Fill in stack offsets for locals. |
+ assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, |
+ SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, |
+ UsesFramePointer); |
+ this->HasComputedFrame = true; |
+ |
+ if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { |
+ OstreamLocker _(Func->getContext()); |
+ Ostream &Str = Func->getContext()->getStrDump(); |
+ |
+ Str << "Stack layout:\n"; |
+ uint32_t SPAdjustmentPaddingSize = |
+ SpillAreaSizeBytes - LocalsSpillAreaSize - |
+ GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes - |
+ MaxOutArgsSizeBytes; |
+ Str << " in-args = " << InArgsSizeBytes << " bytes\n" |
+ << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" |
+ << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" |
+ << " globals spill area = " << GlobalsSize << " bytes\n" |
+ << " globals-locals spill areas intermediate padding = " |
+ << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" |
+ << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" |
+ << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n"; |
+ |
+ Str << "Stack details:\n" |
+ << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n" |
+ << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" |
+ << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n" |
+ << " locals spill area alignment = " << LocalsSlotsAlignmentBytes |
+ << " bytes\n" |
+ << " is FP based = " << 1 << "\n"; |
+ } |
return; |
- UnimplementedError(getFlags()); |
} |
void TargetMIPS32::addEpilog(CfgNode *Node) { |
@@ -707,17 +1125,6 @@ void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) { |
UnimplementedLoweringError(this, Instr); |
return; |
} |
- switch (Instr->getOp()) { |
- default: |
- break; |
- case InstArithmetic::Fadd: |
- case InstArithmetic::Fsub: |
- case InstArithmetic::Fmul: |
- case InstArithmetic::Fdiv: |
- case InstArithmetic::Frem: |
- UnimplementedLoweringError(this, Instr); |
- return; |
- } |
// At this point Dest->getType() is non-i64 scalar |
@@ -796,13 +1203,54 @@ void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) { |
_mov(Dest, T); |
return; |
} |
- case InstArithmetic::Fadd: |
+ case InstArithmetic::Fadd: { |
+ if (DestTy == IceType_f32) { |
+ _add_s(T, Src0R, Src1R); |
+ _mov_s(Dest, T); |
Jim Stichnoth
2016/06/13 20:14:19
From this, I am seeing invalid asm code being gene
|
+ return; |
+ } |
+ if (DestTy == IceType_f64) { |
+ _add_d(T, Src0R, Src1R); |
+ _mov_d(Dest, T); |
+ return; |
+ } |
break; |
+ } |
case InstArithmetic::Fsub: |
+ if (DestTy == IceType_f32) { |
+ _sub_s(T, Src0R, Src1R); |
+ _mov_s(Dest, T); |
+ return; |
+ } |
+ if (DestTy == IceType_f64) { |
+ _sub_d(T, Src0R, Src1R); |
+ _mov_d(Dest, T); |
+ return; |
+ } |
break; |
case InstArithmetic::Fmul: |
+ if (DestTy == IceType_f32) { |
+ _mul_s(T, Src0R, Src1R); |
+ _mov_s(Dest, T); |
+ return; |
+ } |
+ if (DestTy == IceType_f64) { |
+ _mul_d(T, Src0R, Src1R); |
+ _mov_d(Dest, T); |
+ return; |
+ } |
break; |
case InstArithmetic::Fdiv: |
+ if (DestTy == IceType_f32) { |
+ _div_s(T, Src0R, Src1R); |
+ _mov_s(Dest, T); |
+ return; |
+ } |
+ if (DestTy == IceType_f64) { |
+ _div_d(T, Src0R, Src1R); |
+ _mov_d(Dest, T); |
+ return; |
+ } |
break; |
case InstArithmetic::Frem: |
Jim Stichnoth
2016/06/13 20:14:19
For now, avoid a liveness validation error by addi
|
break; |
@@ -934,10 +1382,10 @@ void TargetMIPS32::lowerBr(const InstBr *Instr) { |
void TargetMIPS32::lowerCall(const InstCall *Instr) { |
// TODO(rkotler): assign arguments to registers and stack. Also reserve stack. |
- if (Instr->getNumArgs()) { |
- UnimplementedLoweringError(this, Instr); |
- return; |
- } |
+ // if (Instr->getNumArgs()) { |
Jim Stichnoth
2016/06/13 20:14:19
remove this
|
+ // UnimplementedLoweringError(this, Instr); |
+ // return; |
+ //} |
// Generate the call instruction. Assign its result to a temporary with high |
// register allocation weight. |
Variable *Dest = Instr->getDest(); |
@@ -962,19 +1410,22 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
ReturnRegHi = I32Reg(RegMIPS32::Reg_V1); |
break; |
case IceType_f32: |
+ ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0); |
+ break; |
case IceType_f64: |
- UnimplementedLoweringError(this, Instr); |
- return; |
+ ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0F1); |
+ break; |
case IceType_v4i1: |
case IceType_v8i1: |
case IceType_v16i1: |
case IceType_v16i8: |
case IceType_v8i16: |
case IceType_v4i32: |
- case IceType_v4f32: |
+ case IceType_v4f32: { |
UnimplementedLoweringError(this, Instr); |
return; |
} |
+ } |
} |
Operand *CallTarget = Instr->getCallTarget(); |
// Allow ConstantRelocatable to be left alone as a direct call, |
@@ -998,23 +1449,43 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
// Assign the result of the call to Dest. |
if (ReturnReg) { |
- if (ReturnRegHi) { |
- assert(Dest->getType() == IceType_i64); |
+ auto *Zero = getZero(); |
+ switch (Dest->getType()) { |
+ default: |
+ UnimplementedLoweringError(this, Instr); |
+ case IceType_i1: |
+ case IceType_i8: |
+ case IceType_i16: |
+ case IceType_i32: { |
+ _addu(Dest, Zero, ReturnReg); |
+ break; |
+ } |
+ case IceType_i64: { |
auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); |
Variable *DestLo = Dest64On32->getLo(); |
Variable *DestHi = Dest64On32->getHi(); |
- _mov(DestLo, ReturnReg); |
- _mov(DestHi, ReturnRegHi); |
- } else { |
- assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || |
- Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || |
- isVectorType(Dest->getType())); |
- if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) { |
- UnimplementedLoweringError(this, Instr); |
- return; |
- } else { |
- _mov(Dest, ReturnReg); |
- } |
+ _addu(DestLo, Zero, ReturnReg); |
+ _addu(DestHi, Zero, ReturnRegHi); |
+ break; |
+ } |
+ case IceType_f32: { |
+ _mov_s(Dest, ReturnReg); |
+ break; |
+ } |
+ case IceType_f64: { |
+ _mov_d(Dest, ReturnReg); |
+ break; |
+ } |
+ case IceType_v4i1: |
+ case IceType_v8i1: |
+ case IceType_v16i1: |
+ case IceType_v16i8: |
+ case IceType_v8i16: |
+ case IceType_v4i32: |
+ case IceType_v4f32: { |
+ UnimplementedLoweringError(this, Instr); |
+ return; |
+ } |
} |
} |
} |
@@ -1423,7 +1894,24 @@ void TargetMIPS32::lowerRet(const InstRet *Instr) { |
Variable *Reg = nullptr; |
if (Instr->hasRetValue()) { |
Operand *Src0 = Instr->getRetValue(); |
+ |
switch (Src0->getType()) { |
+ default: |
+ UnimplementedLoweringError(this, Instr); |
Jim Stichnoth
2016/06/13 20:14:19
You probably want a "break;" after this, otherwise
|
+ case IceType_f32: { |
+ if (auto *Src0V = llvm::dyn_cast<Variable>(Src0)) { |
+ Reg = makeReg(Src0V->getType(), RegMIPS32::Reg_F0); |
+ _mov_s(Reg, Src0V); |
+ } |
+ break; |
+ } |
+ case IceType_f64: { |
+ if (auto *Src0V = llvm::dyn_cast<Variable>(Src0)) { |
+ Reg = makeReg(Src0V->getType(), RegMIPS32::Reg_F0F1); |
+ _mov_d(Reg, Src0V); |
+ } |
+ break; |
+ } |
case IceType_i1: |
case IceType_i8: |
case IceType_i16: |
@@ -1442,9 +1930,16 @@ void TargetMIPS32::lowerRet(const InstRet *Instr) { |
Context.insert<InstFakeUse>(R1); |
break; |
} |
- |
- default: |
+ case IceType_v4i1: |
+ case IceType_v8i1: |
+ case IceType_v16i1: |
+ case IceType_v16i8: |
+ case IceType_v8i16: |
+ case IceType_v4i32: |
+ case IceType_v4f32: { |
UnimplementedLoweringError(this, Instr); |
+ break; |
+ } |
} |
} |
_ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg); |
@@ -1551,13 +2046,13 @@ void TargetDataMIPS32::lowerGlobals(const VariableDeclarationList &Vars, |
void TargetDataMIPS32::lowerConstants() { |
if (getFlags().getDisableTranslation()) |
return; |
- UnimplementedError(getFlags()); |
+ // UnimplementedError(getFlags()); |
Jim Stichnoth
2016/06/13 20:14:19
remove this
|
} |
void TargetDataMIPS32::lowerJumpTables() { |
if (getFlags().getDisableTranslation()) |
return; |
- UnimplementedError(getFlags()); |
+ // UnimplementedError(getFlags()); |
Jim Stichnoth
2016/06/13 20:14:19
remove this
|
} |
// Helper for legalize() to emit the right code to lower an operand to a |
@@ -1565,8 +2060,11 @@ void TargetDataMIPS32::lowerJumpTables() { |
Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) { |
Type Ty = Src->getType(); |
Variable *Reg = makeReg(Ty, RegNum); |
- if (isVectorType(Ty) || isFloatingType(Ty)) { |
+ if (isVectorType(Ty)) { |
UnimplementedError(getFlags()); |
+ } else if (isFloatingType(Ty)) { |
+ (Ty == IceType_f32) ? _mov_s(Reg, llvm::dyn_cast<Variable>(Src)) |
+ : _mov_d(Reg, llvm::dyn_cast<Variable>(Src)); |
} else { |
// Mov's Src operand can really only be the flexible second operand type |
// or a register. Users should guarantee that. |