src/IceTargetLoweringMIPS32.cpp - Issue 2027773002: Subzero, MIPS32: Handling floating point instructions fadd, fsub, fmul, fdiv

Unified Diff: src/IceTargetLoweringMIPS32.cpp

Issue 2027773002: Subzero, MIPS32: Handling floating point instructions fadd, fsub, fmul, fdiv (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Rebase for applying prerequisites Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringMIPS32.cpp

diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp

index 70ddef531f88350f82bf5fc12e8d6b01cdca4ba9..269ecdb7a4f02d9ca4301f813a067093287ca5e1 100644

--- a/src/IceTargetLoweringMIPS32.cpp

+++ b/src/IceTargetLoweringMIPS32.cpp

@@ -64,6 +64,14 @@ namespace {

// The maximum number of arguments to pass in GPR registers.

constexpr uint32_t MIPS32_MAX_GPR_ARG = 4;

+std::array<RegNumT, MIPS32_MAX_GPR_ARG> GPRArgInitializer;

+std::array<RegNumT, MIPS32_MAX_GPR_ARG / 2> I64ArgInitializer;

+constexpr uint32_t MIPS32_MAX_FP_ARG = 2;

+std::array<RegNumT, MIPS32_MAX_FP_ARG> FP32ArgInitializer;

+std::array<RegNumT, MIPS32_MAX_FP_ARG> FP64ArgInitializer;

const char *getRegClassName(RegClass C) {

auto ClassNum = static_cast<RegClassMIPS32>(C);

assert(ClassNum < RCMIPS32_NUM);

@@ -75,6 +83,18 @@ const char *getRegClassName(RegClass C) {

}

+// Stack alignment

+constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 8;

+// Value is in bytes. Return Value adjusted to the next highest multiple of the

+// stack alignment required for the given type.

+uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {

+ size_t typeAlignInBytes = typeWidthInBytes(Ty);

+ if (isVectorType(Ty))

+ UnimplementedError(getFlags());

+ return Utils::applyAlignment(Value, typeAlignInBytes);

} // end of anonymous namespace

TargetMIPS32::TargetMIPS32(Cfg *Func) : TargetLowering(Func) {}

@@ -105,6 +125,20 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) {

assert(RegisterAliases[RegMIPS32::val][RegMIPS32::val]);

REGMIPS32_TABLE;

#undef X

+ // TODO(mohit.bhakkad): Change these inits once we provide argument related

+ // field in register tables

+ for (size_t i = 0; i < MIPS32_MAX_GPR_ARG; i++)

+ GPRArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0 + i);

+ for (size_t i = 0; i < MIPS32_MAX_GPR_ARG / 2; i++)

+ I64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0A1 + i);

+ for (size_t i = 0; i < MIPS32_MAX_FP_ARG; i++) {

+ FP32ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12 + i * 2);

+ FP64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12F13 + i);

+ }

TypeToRegisterSet[IceType_void] = InvalidRegisters;

TypeToRegisterSet[IceType_i1] = IntegerRegisters;

TypeToRegisterSet[IceType_i8] = IntegerRegisters;

@@ -129,6 +163,24 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) {

RegMIPS32::getRegName, getRegClassName);

}

+void TargetMIPS32::findMaxStackOutArgsSize() {

+ // MinNeededOutArgsBytes should be updated if the Target ever creates a

+ // high-level InstCall that requires more stack bytes.

+ constexpr size_t MinNeededOutArgsBytes = 16;

+ MaxOutArgsSizeBytes = MinNeededOutArgsBytes;

+ for (CfgNode *Node : Func->getNodes()) {

+ Context.init(Node);

+ while (!Context.atEnd()) {

+ PostIncrLoweringContext PostIncrement(Context);

+ Inst *CurInstr = iteratorToInst(Context.getCur());

+ if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {

+ SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);

+ MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);

+ }

void TargetMIPS32::translateO2() {

TimerMarker T(TimerStack::TT_O2, Func);

@@ -136,6 +188,8 @@ void TargetMIPS32::translateO2() {

// https://code.google.com/p/nativeclient/issues/detail?id=4094

genTargetHelperCalls();

+ findMaxStackOutArgsSize();

// Merge Alloca instructions, and lay out the stack.

static constexpr bool SortAndCombineAllocas = false;

Func->processAllocas(SortAndCombineAllocas);

@@ -237,6 +291,8 @@ void TargetMIPS32::translateOm1() {

// TODO: share passes with X86?

genTargetHelperCalls();

+ findMaxStackOutArgsSize();

// Do not merge Alloca instructions, and lay out the stack.

static constexpr bool SortAndCombineAllocas = false;

Func->processAllocas(SortAndCombineAllocas);

@@ -403,88 +459,450 @@ void TargetMIPS32::emitVariable(const Variable *Var) const {

Str << "($" << getRegName(getFrameOrStackReg(), FrameSPTy);

Str << ")";

}

+ // UnimplementedError(getFlags());

Jim Stichnoth 2016/06/13 20:14:19 remove this

+TargetMIPS32::CallingConv::CallingConv()

+ : GPRegsUsed(RegMIPS32::Reg_NUM),

+ GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),

+ I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),

+ VFPRegsUsed(RegMIPS32::Reg_NUM),

+ FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),

+ FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()) {}

+// In MIPS O32 abi FP argument registers can be used only if first argument is

+// of type float/double. UseFPRegs flag is used to care of that. Also FP arg

+// registers can be used only for first 2 arguments, so we require argument

+// number to make register allocation decisions.

+bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo,

+ RegNumT *Reg) {

+ if (isScalarIntegerType(Ty))

+ return argInGPR(Ty, Reg);

+ if (isScalarFloatingType(Ty)) {

+ if (ArgNo == 0) {

+ UseFPRegs = true;

+ return argInVFP(Ty, Reg);

+ }

+ if (UseFPRegs && ArgNo == 1) {

+ UseFPRegs = false;

+ return argInVFP(Ty, Reg);

+ }

+ return argInGPR(Ty, Reg);

+ }

UnimplementedError(getFlags());

+ return false;

+bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {

+ CfgVector<RegNumT> *Source;

+ switch (Ty) {

+ default: {

+ UnimplementedError(getFlags());

+ return false;

+ } break;

+ case IceType_i32:

+ case IceType_f32: {

+ Source = &GPRArgs;

+ } break;

+ case IceType_i64:

+ case IceType_f64: {

+ Source = &I64Args;

+ } break;

+ }

+ discardUnavailableGPRsAndTheirAliases(Source);

+ if (Source->empty()) {

+ GPRegsUsed.set();

+ return false;

+ }

+ *Reg = Source->back();

+ // Note that we don't Source->pop_back() here. This is intentional. Notice how

+ // we mark all of Reg's aliases as Used. So, for the next argument,

+ // Source->back() is marked as unavailable, and it is thus implicitly popped

+ // from the stack.

+ GPRegsUsed |= RegisterAliases[*Reg];

+ return true;

+inline void TargetMIPS32::CallingConv::discardNextGPRAndItsAliases(

+ CfgVector<RegNumT> *Regs) {

+ GPRegsUsed |= RegisterAliases[Regs->back()];

+ Regs->pop_back();

+// GPR are not packed when passing parameters. Thus, a function foo(i32, i64,

+// i32) will have the first argument in a0, the second in a2-a3, and the third

+// on the stack. To model this behavior, whenever we pop a register from Regs,

+// we remove all of its aliases from the pool of available GPRs. This has the

+// effect of computing the "closure" on the GPR registers.

+void TargetMIPS32::CallingConv::discardUnavailableGPRsAndTheirAliases(

+ CfgVector<RegNumT> *Regs) {

+ while (!Regs->empty() && GPRegsUsed[Regs->back()]) {

+ discardNextGPRAndItsAliases(Regs);

+ }

+bool TargetMIPS32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {

+ CfgVector<RegNumT> *Source;

+ switch (Ty) {

+ default: {

+ UnimplementedError(getFlags());

+ return false;

+ } break;

+ case IceType_f32: {

+ Source = &FP32Args;

+ } break;

+ case IceType_f64: {

+ Source = &FP64Args;

+ } break;

+ }

+ discardUnavailableVFPRegsAndTheirAliases(Source);

+ if (Source->empty()) {

+ VFPRegsUsed.set();

+ return false;

+ }

+ *Reg = Source->back();

+ VFPRegsUsed |= RegisterAliases[*Reg];

+ // In MIPS O32 abi if fun arguments are (f32, i32) then one can not use reg_a0

+ // for second argument even though it's free. f32 arg goes in reg_f12, i32 arg

+ // goes in reg_a1. Similarly if arguments are (f64, i32) second argument goes

+ // in reg_a3 and a0, a1 are not used.

+ Source = &GPRArgs;

+ // Discard one GPR reg for f32(4 bytes), two for f64(4 + 4 bytes)

+ discardNextGPRAndItsAliases(Source);

+ if (Ty == IceType_f64)

+ discardNextGPRAndItsAliases(Source);

+ return true;

+void TargetMIPS32::CallingConv::discardUnavailableVFPRegsAndTheirAliases(

+ CfgVector<RegNumT> *Regs) {

+ while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {

+ Regs->pop_back();

+ }

}

void TargetMIPS32::lowerArguments() {

VarList &Args = Func->getArgs();

- // We are only handling integer registers for now. The Mips o32 ABI is

- // somewhat complex but will be implemented in its totality through follow

- // on patches.

- //

- unsigned NumGPRRegsUsed = 0;

- // For each register argument, replace Arg in the argument list with the

- // home register. Then generate an instruction in the prolog to copy the

- // home register to the assigned location of Arg.

+ TargetMIPS32::CallingConv CC;

+ // For each register argument, replace Arg in the argument list with the home

+ // register. Then generate an instruction in the prolog to copy the home

+ // register to the assigned location of Arg.

Context.init(Func->getEntryNode());

Context.setInsertPoint(Context.getCur());

for (SizeT I = 0, E = Args.size(); I < E; ++I) {

Variable *Arg = Args[I];

Type Ty = Arg->getType();

- // TODO(rkotler): handle float/vector types.

- if (isVectorType(Ty)) {

- UnimplementedError(getFlags());

+ RegNumT RegNum;

+ if (!CC.argInReg(Ty, I, &RegNum)) {

continue;

}

- if (isFloatingType(Ty)) {

- UnimplementedError(getFlags());

- continue;

+ Variable *RegisterArg = Func->makeVariable(Ty);

+ if (BuildDefs::dump()) {

+ RegisterArg->setName(Func, "home_reg:" + Arg->getName());

}

- if (Ty == IceType_i64) {

- if (NumGPRRegsUsed >= MIPS32_MAX_GPR_ARG)

- continue;

- auto RegLo = RegNumT::fixme(RegMIPS32::Reg_A0 + NumGPRRegsUsed);

- auto RegHi = RegNumT::fixme(RegLo + 1);

- ++NumGPRRegsUsed;

- // Always start i64 registers at an even register, so this may end

- // up padding away a register.

- if (RegLo % 2 != 0) {

- RegLo = RegNumT::fixme(RegLo + 1);

- ++NumGPRRegsUsed;

- }

- // If this leaves us without room to consume another register,

- // leave any previously speculatively consumed registers as consumed.

- if (NumGPRRegsUsed >= MIPS32_MAX_GPR_ARG)

- continue;

- // RegHi = RegNumT::fixme(RegMIPS32::Reg_A0 + NumGPRRegsUsed);

- ++NumGPRRegsUsed;

- Variable *RegisterArg = Func->makeVariable(Ty);

- auto *RegisterArg64On32 = llvm::cast<Variable64On32>(RegisterArg);

- if (BuildDefs::dump())

- RegisterArg64On32->setName(Func, "home_reg:" + Arg->getName());

- RegisterArg64On32->initHiLo(Func);

- RegisterArg64On32->setIsArg();

- RegisterArg64On32->getLo()->setRegNum(RegLo);

- RegisterArg64On32->getHi()->setRegNum(RegHi);

- Arg->setIsArg(false);

- Args[I] = RegisterArg64On32;

- Context.insert<InstAssign>(Arg, RegisterArg);

- continue;

- } else {

- assert(Ty == IceType_i32);

- if (NumGPRRegsUsed >= MIPS32_MAX_GPR_ARG)

- continue;

- const auto RegNum = RegNumT::fixme(RegMIPS32::Reg_A0 + NumGPRRegsUsed);

- ++NumGPRRegsUsed;

- Variable *RegisterArg = Func->makeVariable(Ty);

- if (BuildDefs::dump()) {

- RegisterArg->setName(Func, "home_reg:" + Arg->getName());

- }

- RegisterArg->setRegNum(RegNum);

- RegisterArg->setIsArg();

- Arg->setIsArg(false);

- Args[I] = RegisterArg;

- Context.insert<InstAssign>(Arg, RegisterArg);

+ RegisterArg->setIsArg();

+ Arg->setIsArg(false);

+ Args[I] = RegisterArg;

+ switch (Ty) {

+ default: { RegisterArg->setRegNum(RegNum); } break;

+ case IceType_i64: {

+ auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);

+ RegisterArg64->initHiLo(Func);

+ RegisterArg64->getLo()->setRegNum(

+ RegNumT::fixme(RegMIPS32::getI64PairFirstGPRNum(RegNum)));

+ RegisterArg64->getHi()->setRegNum(

+ RegNumT::fixme(RegMIPS32::getI64PairSecondGPRNum(RegNum)));

+ } break;

}

+ Context.insert<InstAssign>(Arg, RegisterArg);

}

Type TargetMIPS32::stackSlotType() { return IceType_i32; }

+// Helper function for addProlog().

+//

+// This assumes Arg is an argument passed on the stack. This sets the frame

+// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an

+// I64 arg that has been split into Lo and Hi components, it calls itself

+// recursively on the components, taking care to handle Lo first because of the

+// little-endian architecture. Lastly, this function generates an instruction

+// to copy Arg into its assigned register if applicable.

+void TargetMIPS32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,

+ size_t BasicFrameOffset,

+ size_t *InArgsSizeBytes) {

+ const Type Ty = Arg->getType();

+ *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);

+ if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {

+ Variable *const Lo = Arg64On32->getLo();

+ Variable *const Hi = Arg64On32->getHi();

+ finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);

+ finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);

+ return;

+ }

+ assert(Ty != IceType_i64);

+ const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;

+ *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

+ if (!Arg->hasReg()) {

+ Arg->setStackOffset(ArgStackOffset);

+ return;

+ }

+ // If the argument variable has been assigned a register, we need to copy the

+ // value from the stack slot.

+ Variable *Parameter = Func->makeVariable(Ty);

+ Parameter->setMustNotHaveReg();

+ Parameter->setStackOffset(ArgStackOffset);

+ _mov(Arg, Parameter);

void TargetMIPS32::addProlog(CfgNode *Node) {

- (void)Node;

+ // Stack frame layout:

+ //

+ // +------------------------+

+ // | 1. preserved registers |

+ // +------------------------+

+ // | 2. padding |

+ // +------------------------+

+ // | 3. global spill area |

+ // +------------------------+

+ // | 4. padding |

+ // +------------------------+

+ // | 5. local spill area |

+ // +------------------------+

+ // | 6. padding |

+ // +------------------------+

+ // | 7. allocas |

+ // +------------------------+

+ // | 8. padding |

+ // +------------------------+

+ // | 9. out args |

+ // +------------------------+ <--- StackPointer

+ //

+ // The following variables record the size in bytes of the given areas:

+ // * PreservedRegsSizeBytes: area 1

+ // * SpillAreaPaddingBytes: area 2

+ // * GlobalsSize: area 3

+ // * GlobalsAndSubsequentPaddingSize: areas 3 - 4

+ // * LocalsSpillAreaSize: area 5

+ // * SpillAreaSizeBytes: areas 2 - 9

+ // * maxOutArgsSizeBytes(): area 9

+ Context.init(Node);

+ Context.setInsertPoint(Context.getCur());

+ SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);

+ RegsUsed = SmallBitVector(CalleeSaves.size());

+ VarList SortedSpilledVariables;

+ size_t GlobalsSize = 0;

+ // If there is a separate locals area, this represents that area. Otherwise

+ // it counts any variable not counted by GlobalsSize.

+ SpillAreaSizeBytes = 0;

+ // If there is a separate locals area, this specifies the alignment for it.

+ uint32_t LocalsSlotsAlignmentBytes = 0;

+ // The entire spill locations area gets aligned to largest natural alignment

+ // of the variables that have a spill slot.

+ uint32_t SpillAreaAlignmentBytes = 0;

+ // For now, we don't have target-specific variables that need special

+ // treatment (no stack-slot-linked SpillVariable type).

+ std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {

+ static constexpr bool AssignStackSlot = false;

+ static constexpr bool DontAssignStackSlot = !AssignStackSlot;

+ if (llvm::isa<Variable64On32>(Var)) {

+ return DontAssignStackSlot;

+ }

+ return AssignStackSlot;

+ };

+ // Compute the list of spilled variables and bounds for GlobalsSize, etc.

+ getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,

+ &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,

+ &LocalsSlotsAlignmentBytes, TargetVarHook);

+ uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;

+ SpillAreaSizeBytes += GlobalsSize;

+ PreservedGPRs.reserve(CalleeSaves.size());

+ // Consider FP and RA as callee-save / used as needed.

+ if (UsesFramePointer) {

+ if (RegsUsed[RegMIPS32::Reg_FP]) {

+ llvm::report_fatal_error("Frame pointer has been used.");

+ }

+ CalleeSaves[RegMIPS32::Reg_FP] = true;

+ RegsUsed[RegMIPS32::Reg_FP] = true;

+ }

+ if (!MaybeLeafFunc) {

+ CalleeSaves[RegMIPS32::Reg_RA] = true;

+ RegsUsed[RegMIPS32::Reg_RA] = true;

+ }

+ // Make two passes over the used registers. The first pass records all the

+ // used registers -- and their aliases. Then, we figure out which GPR

+ // registers should be saved.

+ SmallBitVector ToPreserve(RegMIPS32::Reg_NUM);

+ for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

+ if (CalleeSaves[i] && RegsUsed[i]) {

+ ToPreserve |= RegisterAliases[i];

+ }

+ uint32_t NumCallee = 0;

+ size_t PreservedRegsSizeBytes = 0;

+ // RegClasses is a tuple of

+ //

+ // <First Register in Class, Last Register in Class, Vector of Save Registers>

+ //

+ // We use this tuple to figure out which register we should save/restore

+ // during

+ // prolog/epilog.

+ using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;

+ const RegClassType RegClass = RegClassType(

+ RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_GPR_Last, &PreservedGPRs);

+ const uint32_t FirstRegInClass = std::get<0>(RegClass);

+ const uint32_t LastRegInClass = std::get<1>(RegClass);

+ VarList *const PreservedRegsInClass = std::get<2>(RegClass);

+ for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) {

+ if (!ToPreserve[Reg]) {

+ continue;

+ }

+ ++NumCallee;

+ Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));

+ PreservedRegsSizeBytes +=

+ typeWidthInBytesOnStack(PhysicalRegister->getType());

+ PreservedRegsInClass->push_back(PhysicalRegister);

+ }

+ Ctx->statsUpdateRegistersSaved(NumCallee);

+ // Align the variables area. SpillAreaPaddingBytes is the size of the region

+ // after the preserved registers and before the spill areas.

+ // LocalsSlotsPaddingBytes is the amount of padding between the globals and

+ // locals area if they are separate.

+ assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES);

+ (void)MIPS32_STACK_ALIGNMENT_BYTES;

+ assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

+ uint32_t SpillAreaPaddingBytes = 0;

+ uint32_t LocalsSlotsPaddingBytes = 0;

+ alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,

+ GlobalsSize, LocalsSlotsAlignmentBytes,

+ &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);

+ SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;

+ uint32_t GlobalsAndSubsequentPaddingSize =

+ GlobalsSize + LocalsSlotsPaddingBytes;

+ if (MaybeLeafFunc)

+ MaxOutArgsSizeBytes = 0;

+ // Adds the out args space to the stack, and align SP if necessary.

+ uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes;

+ // TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with

+ // TotalStackSizeBytes once lowerAlloca is implemented and leaf function

+ // information is generated by lowerCall.

+ // Generate "addiu sp, sp, -TotalStackSizeBytes"

+ if (TotalStackSizeBytes) {

+ // Use the scratch register if needed to legalize the immediate.

+ Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);

+ _addiu(SP, SP, -(TotalStackSizeBytes));

+ }

+ Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);

+ if (!PreservedGPRs.empty()) {

+ uint32_t StackOffset = TotalStackSizeBytes;

+ for (Variable *Var : *PreservedRegsInClass) {

+ Variable *PhysicalRegister = getPhysicalRegister(Var->getRegNum());

+ StackOffset -= typeWidthInBytesOnStack(PhysicalRegister->getType());

+ Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);

+ OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(

+ Func, IceType_i32, SP,

+ llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));

+ _sw(PhysicalRegister, MemoryLocation);

+ }

+ Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);

+ // Generate "mov FP, SP" if needed.

+ if (UsesFramePointer) {

+ Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);

+ _mov(FP, SP);

+ // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).

+ Context.insert<InstFakeUse>(FP);

+ }

+ // Fill in stack offsets for stack args, and copy args into registers for

+ // those that were register-allocated. Args are pushed right to left, so

+ // Arg[0] is closest to the stack/frame pointer.

+ const VarList &Args = Func->getArgs();

+ size_t InArgsSizeBytes = 0;

+ TargetMIPS32::CallingConv CC;

+ uint32_t ArgNo = 0;

+ for (Variable *Arg : Args) {

+ RegNumT DummyReg;

+ const Type Ty = Arg->getType();

+ // Skip arguments passed in registers.

+ if (CC.argInReg(Ty, ArgNo, &DummyReg)) {

+ ArgNo++;

+ continue;

+ } else {

+ finishArgumentLowering(Arg, FP, TotalStackSizeBytes, &InArgsSizeBytes);

+ }

+ // Fill in stack offsets for locals.

+ assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,

+ SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,

+ UsesFramePointer);

+ this->HasComputedFrame = true;

+ if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {

+ OstreamLocker _(Func->getContext());

+ Ostream &Str = Func->getContext()->getStrDump();

+ Str << "Stack layout:\n";

+ uint32_t SPAdjustmentPaddingSize =

+ SpillAreaSizeBytes - LocalsSpillAreaSize -

+ GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -

+ MaxOutArgsSizeBytes;

+ Str << " in-args = " << InArgsSizeBytes << " bytes\n"

+ << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"

+ << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"

+ << " globals spill area = " << GlobalsSize << " bytes\n"

+ << " globals-locals spill areas intermediate padding = "

+ << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"

+ << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"

+ << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";

+ Str << "Stack details:\n"

+ << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"

+ << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"

+ << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"

+ << " locals spill area alignment = " << LocalsSlotsAlignmentBytes

+ << " bytes\n"

+ << " is FP based = " << 1 << "\n";

+ }

return;

- UnimplementedError(getFlags());

}

void TargetMIPS32::addEpilog(CfgNode *Node) {

@@ -707,17 +1125,6 @@ void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) {

UnimplementedLoweringError(this, Instr);

return;

}

- switch (Instr->getOp()) {

- default:

- break;

- case InstArithmetic::Fadd:

- case InstArithmetic::Fsub:

- case InstArithmetic::Fmul:

- case InstArithmetic::Fdiv:

- case InstArithmetic::Frem:

- UnimplementedLoweringError(this, Instr);

- return;

- }

// At this point Dest->getType() is non-i64 scalar

@@ -796,13 +1203,54 @@ void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) {

_mov(Dest, T);

return;

}

- case InstArithmetic::Fadd:

+ case InstArithmetic::Fadd: {

+ if (DestTy == IceType_f32) {

+ _add_s(T, Src0R, Src1R);

+ _mov_s(Dest, T);

Jim Stichnoth 2016/06/13 20:14:19 From this, I am seeing invalid asm code being gene

+ return;

+ }

+ if (DestTy == IceType_f64) {

+ _add_d(T, Src0R, Src1R);

+ _mov_d(Dest, T);

+ return;

+ }

break;

+ }

case InstArithmetic::Fsub:

+ if (DestTy == IceType_f32) {

+ _sub_s(T, Src0R, Src1R);

+ _mov_s(Dest, T);

+ return;

+ }

+ if (DestTy == IceType_f64) {

+ _sub_d(T, Src0R, Src1R);

+ _mov_d(Dest, T);

+ return;

+ }

break;

case InstArithmetic::Fmul:

+ if (DestTy == IceType_f32) {

+ _mul_s(T, Src0R, Src1R);

+ _mov_s(Dest, T);

+ return;

+ }

+ if (DestTy == IceType_f64) {

+ _mul_d(T, Src0R, Src1R);

+ _mov_d(Dest, T);

+ return;

+ }

break;

case InstArithmetic::Fdiv:

+ if (DestTy == IceType_f32) {

+ _div_s(T, Src0R, Src1R);

+ _mov_s(Dest, T);

+ return;

+ }

+ if (DestTy == IceType_f64) {

+ _div_d(T, Src0R, Src1R);

+ _mov_d(Dest, T);

+ return;

+ }

break;

case InstArithmetic::Frem:

Jim Stichnoth 2016/06/13 20:14:19 For now, avoid a liveness validation error by addi

break;

@@ -934,10 +1382,10 @@ void TargetMIPS32::lowerBr(const InstBr *Instr) {

void TargetMIPS32::lowerCall(const InstCall *Instr) {

// TODO(rkotler): assign arguments to registers and stack. Also reserve stack.

- if (Instr->getNumArgs()) {

- UnimplementedLoweringError(this, Instr);

- return;

- }

+ // if (Instr->getNumArgs()) {

Jim Stichnoth 2016/06/13 20:14:19 remove this

+ // UnimplementedLoweringError(this, Instr);

+ // return;

+ //}

// Generate the call instruction. Assign its result to a temporary with high

// register allocation weight.

Variable *Dest = Instr->getDest();

@@ -962,19 +1410,22 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) {

ReturnRegHi = I32Reg(RegMIPS32::Reg_V1);

break;

case IceType_f32:

+ ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0);

+ break;

case IceType_f64:

- UnimplementedLoweringError(this, Instr);

- return;

+ ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0F1);

+ break;

case IceType_v4i1:

case IceType_v8i1:

case IceType_v16i1:

case IceType_v16i8:

case IceType_v8i16:

case IceType_v4i32:

- case IceType_v4f32:

+ case IceType_v4f32: {

UnimplementedLoweringError(this, Instr);

return;

}

+ }

}

Operand *CallTarget = Instr->getCallTarget();

// Allow ConstantRelocatable to be left alone as a direct call,

@@ -998,23 +1449,43 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) {

// Assign the result of the call to Dest.

if (ReturnReg) {

- if (ReturnRegHi) {

- assert(Dest->getType() == IceType_i64);

+ auto *Zero = getZero();

+ switch (Dest->getType()) {

+ default:

+ UnimplementedLoweringError(this, Instr);

+ case IceType_i1:

+ case IceType_i8:

+ case IceType_i16:

+ case IceType_i32: {

+ _addu(Dest, Zero, ReturnReg);

+ break;

+ }

+ case IceType_i64: {

auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);

Variable *DestLo = Dest64On32->getLo();

Variable *DestHi = Dest64On32->getHi();

- _mov(DestLo, ReturnReg);

- _mov(DestHi, ReturnRegHi);

- } else {

- assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||

- Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||

- isVectorType(Dest->getType()));

- if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {

- UnimplementedLoweringError(this, Instr);

- return;

- } else {

- _mov(Dest, ReturnReg);

- }

+ _addu(DestLo, Zero, ReturnReg);

+ _addu(DestHi, Zero, ReturnRegHi);

+ break;

+ }

+ case IceType_f32: {

+ _mov_s(Dest, ReturnReg);

+ break;

+ }

+ case IceType_f64: {

+ _mov_d(Dest, ReturnReg);

+ break;

+ }

+ case IceType_v4i1:

+ case IceType_v8i1:

+ case IceType_v16i1:

+ case IceType_v16i8:

+ case IceType_v8i16:

+ case IceType_v4i32:

+ case IceType_v4f32: {

+ UnimplementedLoweringError(this, Instr);

+ return;

+ }

}

@@ -1423,7 +1894,24 @@ void TargetMIPS32::lowerRet(const InstRet *Instr) {

Variable *Reg = nullptr;

if (Instr->hasRetValue()) {

Operand *Src0 = Instr->getRetValue();

switch (Src0->getType()) {

+ default:

+ UnimplementedLoweringError(this, Instr);

Jim Stichnoth 2016/06/13 20:14:19 You probably want a "break;" after this, otherwise

+ case IceType_f32: {

+ if (auto *Src0V = llvm::dyn_cast<Variable>(Src0)) {

+ Reg = makeReg(Src0V->getType(), RegMIPS32::Reg_F0);

+ _mov_s(Reg, Src0V);

+ }

+ break;

+ }

+ case IceType_f64: {

+ if (auto *Src0V = llvm::dyn_cast<Variable>(Src0)) {

+ Reg = makeReg(Src0V->getType(), RegMIPS32::Reg_F0F1);

+ _mov_d(Reg, Src0V);

+ }

+ break;

+ }

case IceType_i1:

case IceType_i8:

case IceType_i16:

@@ -1442,9 +1930,16 @@ void TargetMIPS32::lowerRet(const InstRet *Instr) {

Context.insert<InstFakeUse>(R1);

break;

}

- default:

+ case IceType_v4i1:

+ case IceType_v8i1:

+ case IceType_v16i1:

+ case IceType_v16i8:

+ case IceType_v8i16:

+ case IceType_v4i32:

+ case IceType_v4f32: {

UnimplementedLoweringError(this, Instr);

+ break;

+ }

}

_ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg);

@@ -1551,13 +2046,13 @@ void TargetDataMIPS32::lowerGlobals(const VariableDeclarationList &Vars,

void TargetDataMIPS32::lowerConstants() {

if (getFlags().getDisableTranslation())

return;

- UnimplementedError(getFlags());

+ // UnimplementedError(getFlags());

Jim Stichnoth 2016/06/13 20:14:19 remove this

}

void TargetDataMIPS32::lowerJumpTables() {

if (getFlags().getDisableTranslation())

return;

- UnimplementedError(getFlags());

+ // UnimplementedError(getFlags());

Jim Stichnoth 2016/06/13 20:14:19 remove this

}

// Helper for legalize() to emit the right code to lower an operand to a

@@ -1565,8 +2060,11 @@ void TargetDataMIPS32::lowerJumpTables() {

Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) {

Type Ty = Src->getType();

Variable *Reg = makeReg(Ty, RegNum);

- if (isVectorType(Ty) || isFloatingType(Ty)) {

+ if (isVectorType(Ty)) {

UnimplementedError(getFlags());

+ } else if (isFloatingType(Ty)) {

+ (Ty == IceType_f32) ? _mov_s(Reg, llvm::dyn_cast<Variable>(Src))

+ : _mov_d(Reg, llvm::dyn_cast<Variable>(Src));

} else {

// Mov's Src operand can really only be the flexible second operand type

// or a register. Users should guarantee that.

« no previous file with comments | « src/IceTargetLoweringMIPS32.h ('k') | src/IceTargetLoweringX8632.cpp » ('j') | no next file with comments »