Chromium Code Reviews| Index: src/IceTargetLoweringARM32.cpp |
| diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
| index 60d3a37feb1cd6d546f547f067cff09b6994cf99..12810f6c88656208ead7115fa0021f4915a5ece5 100644 |
| --- a/src/IceTargetLoweringARM32.cpp |
| +++ b/src/IceTargetLoweringARM32.cpp |
| @@ -233,12 +233,62 @@ void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { |
| } |
| } // end of anonymous namespace |
| +uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) { |
| + TargetARM32::CallingConv CC; |
| + size_t OutArgsSizeBytes = 0; |
| + for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) { |
| + Operand *Arg = legalizeUndef(Call->getArg(i)); |
| + Type Ty = Arg->getType(); |
| + if (Ty == IceType_i64) { |
| + std::pair<int32_t, int32_t> Regs; |
| + if (CC.I64InRegs(&Regs)) { |
| + continue; |
| + } |
| + } else if (isVectorType(Ty) || isFloatingType(Ty)) { |
| + int32_t Reg; |
| + if (CC.FPInReg(Ty, &Reg)) { |
| + continue; |
| + } |
| + } else { |
| + assert(Ty == IceType_i32); |
| + int32_t Reg; |
| + if (CC.I32InReg(&Reg)) { |
| + continue; |
| + } |
| + } |
| + |
| + OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty); |
| + OutArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
| + } |
| + |
| + return applyStackAlignment(OutArgsSizeBytes); |
| +} |
| + |
| +void TargetARM32::findMaxStackOutArgsSize() { |
| + // MinNeededOutArgsBytes should be updated if the Target ever creates an |
|
Jim Stichnoth
2015/11/22 04:09:43
s/an/a/
at least for the way I pronounce "high"
John
2015/11/23 18:54:10
Done.
|
| + // high-level InstCall that requires more stack bytes. |
| + constexpr size_t MinNeededOutArgsBytes = 0; |
| + MaxOutArgsSizeBytes = MinNeededOutArgsBytes; |
| + for (CfgNode *Node : Func->getNodes()) { |
| + Context.init(Node); |
| + while (!Context.atEnd()) { |
| + PostIncrLoweringContext PostIncrement(Context); |
| + Inst *CurInstr = Context.getCur(); |
| + if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { |
| + SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); |
| + MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); |
| + } |
| + } |
| + } |
| +} |
| + |
| void TargetARM32::translateO2() { |
| TimerMarker T(TimerStack::TT_O2, Func); |
| // TODO(stichnot): share passes with X86? |
| // https://code.google.com/p/nativeclient/issues/detail?id=4094 |
| genTargetHelperCalls(); |
| + findMaxStackOutArgsSize(); |
| // Do not merge Alloca instructions, and lay out the stack. |
| static constexpr bool SortAndCombineAllocas = false; |
| @@ -346,6 +396,7 @@ void TargetARM32::translateOm1() { |
| // TODO: share passes with X86? |
| genTargetHelperCalls(); |
| + findMaxStackOutArgsSize(); |
| // Do not merge Alloca instructions, and lay out the stack. |
| static constexpr bool SortAndCombineAllocas = false; |
| @@ -473,8 +524,6 @@ void TargetARM32::emitVariable(const Variable *Var) const { |
| int32_t BaseRegNum = Var->getBaseRegNum(); |
| if (BaseRegNum == Variable::NoRegister) { |
| BaseRegNum = getFrameOrStackReg(); |
| - if (!hasFramePointer()) |
| - Offset += getStackAdjustment(); |
| } |
| const Type VarTy = Var->getType(); |
| Str << "[" << getRegName(BaseRegNum, VarTy); |
| @@ -670,7 +719,11 @@ void TargetARM32::addProlog(CfgNode *Node) { |
| // +------------------------+ |
| // | 6. padding | |
| // +------------------------+ |
| - // | 7. allocas | |
| + // | 7. allocas (variable) | |
| + // +------------------------+ |
| + // | 8. padding | |
| + // +------------------------+ |
| + // | 9. out args | |
| // +------------------------+ <--- StackPointer |
| // |
| // The following variables record the size in bytes of the given areas: |
| @@ -679,7 +732,9 @@ void TargetARM32::addProlog(CfgNode *Node) { |
| // * GlobalsSize: area 3 |
| // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 |
| // * LocalsSpillAreaSize: area 5 |
| - // * SpillAreaSizeBytes: areas 2 - 6 |
| + // * SpillAreaSizeBytes: areas 2 - 6, and 9 |
| + // * MaxOutArgsSizeBytes: area 9 |
| + // |
| // Determine stack frame offsets for each Variable without a register |
| // assignment. This can be done as one variable per stack slot. Or, do |
| // coalescing by running the register allocator again with an infinite set of |
| @@ -785,10 +840,13 @@ void TargetARM32::addProlog(CfgNode *Node) { |
| uint32_t GlobalsAndSubsequentPaddingSize = |
| GlobalsSize + LocalsSlotsPaddingBytes; |
| - // Align SP if necessary. |
| - if (NeedsStackAlignment) { |
| + // Adds the out args space to the stack, and align SP if necessary. |
| + if (!NeedsStackAlignment) { |
| + SpillAreaSizeBytes += MaxOutArgsSizeBytes; |
| + } else { |
| uint32_t StackOffset = PreservedRegsSizeBytes; |
| uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
| + StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes); |
| SpillAreaSizeBytes = StackSize - StackOffset; |
| } |
| @@ -802,8 +860,6 @@ void TargetARM32::addProlog(CfgNode *Node) { |
| } |
| Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
| - resetStackAdjustment(); |
| - |
| // Fill in stack offsets for stack args, and copy args into registers for |
| // those that were register-allocated. Args are pushed right to left, so |
| // Arg[0] is closest to the stack/frame pointer. |
| @@ -847,7 +903,8 @@ void TargetARM32::addProlog(CfgNode *Node) { |
| Str << "Stack layout:\n"; |
| uint32_t SPAdjustmentPaddingSize = |
| SpillAreaSizeBytes - LocalsSpillAreaSize - |
| - GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; |
| + GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes - |
| + MaxOutArgsSizeBytes; |
| Str << " in-args = " << InArgsSizeBytes << " bytes\n" |
| << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" |
| << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" |
| @@ -860,6 +917,7 @@ void TargetARM32::addProlog(CfgNode *Node) { |
| Str << "Stack details:\n" |
| << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n" |
| << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" |
| + << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n" |
| << " locals spill area alignment = " << LocalsSlotsAlignmentBytes |
| << " bytes\n" |
| << " is FP based = " << UsesFramePointer << "\n"; |
| @@ -956,10 +1014,7 @@ bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const { |
| return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset); |
| } |
| -Variable *TargetARM32::newBaseRegister(int32_t OriginalOffset, |
| - int32_t StackAdjust, |
| - Variable *OrigBaseReg) { |
| - int32_t Offset = OriginalOffset + StackAdjust; |
| +Variable *TargetARM32::newBaseRegister(int32_t Offset, Variable *OrigBaseReg) { |
| // Legalize will likely need a movw/movt combination, but if the top bits are |
| // all 0 from negating the offset and subtracting, we could use that instead. |
| bool ShouldSub = (-Offset & 0xFFFF0000) == 0; |
| @@ -976,26 +1031,25 @@ Variable *TargetARM32::newBaseRegister(int32_t OriginalOffset, |
| } |
| OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset, |
| - int32_t StackAdjust, |
| Variable *OrigBaseReg, |
| Variable **NewBaseReg, |
| int32_t *NewBaseOffset) { |
| - if (isLegalMemOffset(Ty, Offset + StackAdjust)) { |
| + if (isLegalMemOffset(Ty, Offset)) { |
| return OperandARM32Mem::create( |
| - Func, Ty, OrigBaseReg, llvm::cast<ConstantInteger32>( |
| - Ctx->getConstantInt32(Offset + StackAdjust)), |
| + Func, Ty, OrigBaseReg, |
| + llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)), |
| OperandARM32Mem::Offset); |
| } |
| if (*NewBaseReg == nullptr) { |
| - *NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg); |
| - *NewBaseOffset = Offset + StackAdjust; |
| + *NewBaseReg = newBaseRegister(Offset, OrigBaseReg); |
| + *NewBaseOffset = Offset; |
| } |
| - int32_t OffsetDiff = Offset + StackAdjust - *NewBaseOffset; |
| + int32_t OffsetDiff = Offset - *NewBaseOffset; |
| if (!isLegalMemOffset(Ty, OffsetDiff)) { |
| - *NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg); |
| - *NewBaseOffset = Offset + StackAdjust; |
| + *NewBaseReg = newBaseRegister(Offset, OrigBaseReg); |
| + *NewBaseOffset = Offset; |
| OffsetDiff = 0; |
| } |
| @@ -1005,9 +1059,8 @@ OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset, |
| OperandARM32Mem::Offset); |
| } |
| -void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, int32_t StackAdjust, |
| - Variable *OrigBaseReg, Variable **NewBaseReg, |
| - int32_t *NewBaseOffset) { |
| +void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg, |
| + Variable **NewBaseReg, int32_t *NewBaseOffset) { |
| Variable *Dest = MovInstr->getDest(); |
| assert(Dest != nullptr); |
| Type DestTy = Dest->getType(); |
| @@ -1027,8 +1080,8 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, int32_t StackAdjust, |
| assert(SrcR->hasReg()); |
| const int32_t Offset = Dest->getStackOffset(); |
| // This is a _mov(Mem(), Variable), i.e., a store. |
| - _str(SrcR, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg, |
| - NewBaseReg, NewBaseOffset), |
| + _str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg, |
| + NewBaseOffset), |
| MovInstr->getPredicate()); |
| // _str() does not have a Dest, so we add a fake-def(Dest). |
| Context.insert(InstFakeDef::create(Func, Dest)); |
| @@ -1036,8 +1089,8 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, int32_t StackAdjust, |
| } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) { |
| if (!Var->hasReg()) { |
| const int32_t Offset = Var->getStackOffset(); |
| - _ldr(Dest, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg, |
| - NewBaseReg, NewBaseOffset), |
| + _ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg, |
| + NewBaseOffset), |
| MovInstr->getPredicate()); |
| Legalized = true; |
| } |
| @@ -1064,7 +1117,6 @@ void TargetARM32::legalizeStackSlots() { |
| Func->dump("Before legalizeStackSlots"); |
| assert(hasComputedFrame()); |
| Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); |
| - int32_t StackAdjust = 0; |
| // Do a fairly naive greedy clustering for now. Pick the first stack slot |
| // that's out of bounds and make a new base reg using the architecture's temp |
| // register. If that works for the next slot, then great. Otherwise, create a |
| @@ -1091,23 +1143,8 @@ void TargetARM32::legalizeStackSlots() { |
| NewBaseOffset = 0; |
| } |
| - // The stack adjustment only matters if we are using SP instead of FP. |
| - if (!hasFramePointer()) { |
| - if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) { |
| - StackAdjust += AdjInst->getAmount(); |
| - NewBaseOffset += AdjInst->getAmount(); |
| - continue; |
| - } |
| - if (llvm::isa<InstARM32Call>(CurInstr)) { |
| - NewBaseOffset -= StackAdjust; |
| - StackAdjust = 0; |
| - continue; |
| - } |
| - } |
| - |
| if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) { |
| - legalizeMov(MovInstr, StackAdjust, OrigBaseReg, &NewBaseReg, |
| - &NewBaseOffset); |
| + legalizeMov(MovInstr, OrigBaseReg, &NewBaseReg, &NewBaseOffset); |
| } |
| } |
| } |
| @@ -1269,7 +1306,14 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) { |
| alignRegisterPow2(T, Alignment); |
| _sub(SP, SP, T); |
| } |
| - _mov(Dest, SP); |
| + Variable *T = SP; |
| + if (MaxOutArgsSizeBytes != 0) { |
| + T = makeReg(getPointerType()); |
| + Operand *OutArgsSizeRF = legalize( |
| + Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex); |
| + _add(T, SP, OutArgsSizeRF); |
| + } |
| + _mov(Dest, T); |
| } |
| void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { |
| @@ -2093,6 +2137,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| } |
| case InstArithmetic::Sub: { |
| if (Srcs.hasConstOperand()) { |
| + // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed |
| + // to be used. |
| Variable *Src0R = Srcs.src0R(this); |
| if (Srcs.immediateIsFlexEncodable()) { |
| Operand *Src1RF = Srcs.src1RF(this); |
| @@ -2346,7 +2392,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) { |
| TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs; |
| // Pair of Arg Operand -> stack offset. |
| llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs; |
| - int32_t ParameterAreaSizeBytes = 0; |
| + size_t ParameterAreaSizeBytes = 0; |
| // Classify each argument operand according to the location where the |
| // argument is passed. |
| @@ -2390,16 +2436,8 @@ void TargetARM32::lowerCall(const InstCall *Instr) { |
| // the stack is already aligned at the start of the calling sequence. |
| ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); |
| - // Subtract the appropriate amount for the argument area. This also takes |
| - // care of setting the stack adjustment during emission. |
| - // |
| - // TODO: If for some reason the call instruction gets dead-code eliminated |
| - // after lowering, we would need to ensure that the pre-call and the |
| - // post-call esp adjustment get eliminated as well. |
| - if (ParameterAreaSizeBytes) { |
| - Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), |
| - Legal_Reg | Legal_Flex); |
| - _adjust_stack(ParameterAreaSizeBytes, SubAmount); |
| + if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) { |
| + llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max."); |
| } |
| // Copy arguments that are passed on the stack to the appropriate stack |
| @@ -2492,15 +2530,6 @@ void TargetARM32::lowerCall(const InstCall *Instr) { |
| if (ReturnRegHi) |
| Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
| - // Add the appropriate offset to SP. The call instruction takes care of |
| - // resetting the stack offset during emission. |
| - if (ParameterAreaSizeBytes) { |
| - Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), |
| - Legal_Reg | Legal_Flex); |
| - Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
| - _add(SP, SP, AddAmount); |
| - } |
| - |
| // Insert a register-kill pseudo instruction. |
| Context.insert(InstFakeKill::create(Func, NewCall)); |