Chromium Code Reviews| Index: src/IceTargetLoweringMIPS32.cpp |
| diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp |
| index 21262f62c51230b522b20b23edc1bb0b171be052..c6fa55ca666ee2843669a761fb5223c78101efac 100644 |
| --- a/src/IceTargetLoweringMIPS32.cpp |
| +++ b/src/IceTargetLoweringMIPS32.cpp |
| @@ -163,10 +163,30 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) { |
| RegMIPS32::getRegName, getRegClassName); |
| } |
| +void TargetMIPS32::unsetIfNonLeafFunc() { |
|
Jim Stichnoth
2016/06/15 13:41:19
I think that the ARM target folds this computation
Jim Stichnoth
2016/06/15 13:43:19
Actually, it just occurred to me that statisticall
sagar.thakur
2016/06/16 08:05:54
This pass is needed because findMaxStackOutArgsSiz
|
| + for (CfgNode *Node : Func->getNodes()) { |
| + for (Inst &Instr : Node->getInsts()) { |
| + if (llvm::isa<InstCall>(&Instr)) { |
| + // Unset MaybeLeafFunc if call instruction exists. |
| + MaybeLeafFunc = false; |
| + break; |
|
Jim Stichnoth
2016/06/15 13:41:19
Can you just early-return here, and avoid the brea
sagar.thakur
2016/06/16 08:05:54
Done.
|
| + } |
| + } |
| + if (!MaybeLeafFunc) |
| + break; |
| + } |
| +} |
| + |
| +uint32_t TargetMIPS32::getStackAlignment() const { |
| + return MIPS32_STACK_ALIGNMENT_BYTES; |
| +} |
| + |
| void TargetMIPS32::findMaxStackOutArgsSize() { |
| // MinNeededOutArgsBytes should be updated if the Target ever creates a |
| // high-level InstCall that requires more stack bytes. |
| - constexpr size_t MinNeededOutArgsBytes = 16; |
| + size_t MinNeededOutArgsBytes = 0; |
| + if (!MaybeLeafFunc) |
| + MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4; |
| MaxOutArgsSizeBytes = MinNeededOutArgsBytes; |
| for (CfgNode *Node : Func->getNodes()) { |
| Context.init(Node); |
| @@ -188,10 +208,12 @@ void TargetMIPS32::translateO2() { |
| // https://code.google.com/p/nativeclient/issues/detail?id=4094 |
| genTargetHelperCalls(); |
| + unsetIfNonLeafFunc(); |
| + |
| findMaxStackOutArgsSize(); |
| // Merge Alloca instructions, and lay out the stack. |
| - static constexpr bool SortAndCombineAllocas = false; |
| + static constexpr bool SortAndCombineAllocas = true; |
| Func->processAllocas(SortAndCombineAllocas); |
| Func->dump("After Alloca processing"); |
| @@ -291,6 +313,8 @@ void TargetMIPS32::translateOm1() { |
| // TODO: share passes with X86? |
| genTargetHelperCalls(); |
| + unsetIfNonLeafFunc(); |
| + |
| findMaxStackOutArgsSize(); |
| // Do not merge Alloca instructions, and lay out the stack. |
| @@ -441,8 +465,8 @@ OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) { |
| // hold the operand. |
| auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg)); |
| return OperandMIPS32Mem::create( |
| - Func, Ty, Base, |
| - llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); |
| + Func, Ty, Base, llvm::cast<ConstantInteger32>( |
| + Ctx->getConstantInt32(Base->getStackOffset()))); |
| } |
| void TargetMIPS32::emitVariable(const Variable *Var) const { |
| @@ -808,15 +832,9 @@ void TargetMIPS32::addProlog(CfgNode *Node) { |
| uint32_t GlobalsAndSubsequentPaddingSize = |
| GlobalsSize + LocalsSlotsPaddingBytes; |
| - if (MaybeLeafFunc) |
| - MaxOutArgsSizeBytes = 0; |
| - |
| // Adds the out args space to the stack, and align SP if necessary. |
| - uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes; |
| - |
| - // TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with |
| - // TotalStackSizeBytes once lowerAlloca is implemented and leaf function |
| - // information is generated by lowerCall. |
| + uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes + |
| + FixedAllocaSizeBytes + MaxOutArgsSizeBytes; |
| // Generate "addiu sp, sp, -TotalStackSizeBytes" |
| if (TotalStackSizeBytes) { |
| @@ -854,7 +872,7 @@ void TargetMIPS32::addProlog(CfgNode *Node) { |
| // those that were register-allocated. Args are pushed right to left, so |
| // Arg[0] is closest to the stack/frame pointer. |
| const VarList &Args = Func->getArgs(); |
| - size_t InArgsSizeBytes = 0; |
| + size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4; |
| TargetMIPS32::CallingConv CC; |
| uint32_t ArgNo = 0; |
| @@ -1002,14 +1020,64 @@ SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include, |
| } |
| void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) { |
| - UsesFramePointer = true; |
| // Conservatively require the stack to be aligned. Some stack adjustment |
| // operations implemented below assume that the stack is aligned before the |
| // alloca. All the alloca code ensures that the stack alignment is preserved |
| // after the alloca. The stack alignment restriction can be relaxed in some |
| // cases. |
| NeedsStackAlignment = true; |
| - UnimplementedLoweringError(this, Instr); |
| + |
| + // For default align=0, set it to the real value 1, to avoid any |
| + // bit-manipulation problems below. |
| + const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes()); |
| + |
| + // LLVM enforces power of 2 alignment. |
| + assert(llvm::isPowerOf2_32(AlignmentParam)); |
| + assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES)); |
| + |
| + const uint32_t Alignment = |
| + std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES); |
| + const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES; |
| + const bool OptM1 = getFlags().getOptLevel() == Opt_m1; |
| + const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset(); |
| + const bool UseFramePointer = |
| + hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; |
| + |
| + if (UseFramePointer) |
| + setHasFramePointer(); |
| + |
| + Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); |
| + |
| + Variable *Dest = Instr->getDest(); |
| + Operand *TotalSize = Instr->getSizeInBytes(); |
| + |
| + if (const auto *ConstantTotalSize = |
| + llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
| + const uint32_t Value = |
| + Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment); |
| + FixedAllocaSizeBytes += Value; |
| + // Constant size alloca. |
| + if (!UseFramePointer) { |
| + // If we don't need a Frame Pointer, this alloca has a known offset to the |
| + // stack pointer. We don't need adjust the stack pointer, nor assign any |
| + // value to Dest, as Dest is rematerializable. |
| + assert(Dest->isRematerializable()); |
| + Context.insert<InstFakeDef>(Dest); |
| + return; |
| + } |
| + } else { |
| + UnimplementedLoweringError(this, Instr); |
| + return; |
| + } |
| + |
| + // Add enough to the returned address to account for the out args area. |
| + if (MaxOutArgsSizeBytes > 0) { |
| + Variable *T = makeReg(getPointerType()); |
| + _addiu(T, SP, MaxOutArgsSizeBytes); |
| + _mov(Dest, T); |
| + } else { |
| + _mov(Dest, SP); |
| + } |
| } |
| void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr, |