Chromium Code Reviews| Index: src/IceTargetLoweringARM32.cpp |
| diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
| index faf1aa03d2b2c149f54aa42bb3761b06e87ef611..00e5e114fc970c9e7d976a747762fa3c1777e251 100644 |
| --- a/src/IceTargetLoweringARM32.cpp |
| +++ b/src/IceTargetLoweringARM32.cpp |
| @@ -285,6 +285,11 @@ void TargetARM32::translateO2() { |
| return; |
| Func->dump("After stack frame mapping"); |
| + legalizeStackSlots(); |
| + if (Func->hasError()) |
| + return; |
| + Func->dump("After legalizeStackSlots"); |
| + |
| Func->contractEmptyNodes(); |
| Func->reorderNodes(); |
| @@ -335,6 +340,11 @@ void TargetARM32::translateOm1() { |
| return; |
| Func->dump("After stack frame mapping"); |
| + legalizeStackSlots(); |
| + if (Func->hasError()) |
| + return; |
| + Func->dump("After legalizeStackSlots"); |
| + |
| // Nop insertion |
| if (Ctx->getFlags().shouldDoNopInsertion()) { |
| Func->doNopInsertion(); |
| @@ -394,16 +404,17 @@ void TargetARM32::emitVariable(const Variable *Var) const { |
| "Infinite-weight Variable has no register assigned"); |
| } |
| int32_t Offset = Var->getStackOffset(); |
| - if (!hasFramePointer()) |
| - Offset += getStackAdjustment(); |
| - // TODO(jvoung): Handle out of range. Perhaps we need a scratch register |
| - // to materialize a larger offset. |
| - constexpr bool SignExt = false; |
| - if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) { |
| + int32_t BaseRegNum = Var->getBaseRegNum(); |
| + if (BaseRegNum == Variable::NoRegister) { |
| + BaseRegNum = getFrameOrStackReg(); |
| + if (!hasFramePointer()) |
| + Offset += getStackAdjustment(); |
| + } |
| + if (!isLegalVariableStackOffset(Offset)) { |
| llvm::report_fatal_error("Illegal stack offset"); |
| } |
| - const Type FrameSPTy = IceType_i32; |
| - Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy); |
| + const Type FrameSPTy = stackSlotType(); |
| + Str << "[" << getRegName(BaseRegNum, FrameSPTy); |
| if (Offset != 0) { |
| Str << ", " << getConstantPrefix() << Offset; |
| } |
| @@ -556,7 +567,7 @@ void TargetARM32::addProlog(CfgNode *Node) { |
| // | 1. preserved registers | |
| // +------------------------+ |
| // | 2. padding | |
| - // +------------------------+ |
| + // +------------------------+ <--- FramePointer (if used) |
| // | 3. global spill area | |
| // +------------------------+ |
| // | 4. padding | |
| @@ -566,7 +577,7 @@ void TargetARM32::addProlog(CfgNode *Node) { |
| // | 6. padding | |
| // +------------------------+ |
| // | 7. allocas | |
| - // +------------------------+ |
| + // +------------------------+ <--- StackPointer |
| // |
| // The following variables record the size in bytes of the given areas: |
| // * PreservedRegsSizeBytes: area 1 |
| @@ -681,10 +692,9 @@ void TargetARM32::addProlog(CfgNode *Node) { |
| // Generate "sub sp, SpillAreaSizeBytes" |
| if (SpillAreaSizeBytes) { |
| - // Use the IP inter-procedural scratch register if needed to legalize |
| - // the immediate. |
| + // Use the scratch register if needed to legalize the immediate. |
| Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
| - Legal_Reg | Legal_Flex, RegARM32::Reg_ip); |
| + Legal_Reg | Legal_Flex, getReservedTmpReg()); |
| Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
| _sub(SP, SP, SubAmount); |
| } |
| @@ -785,10 +795,10 @@ void TargetARM32::addEpilog(CfgNode *Node) { |
| } else { |
| // add SP, SpillAreaSizeBytes |
| if (SpillAreaSizeBytes) { |
| - // Use the IP inter-procedural scratch register if needed to legalize |
| - // the immediate. It shouldn't be live at this point. |
| - Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
| - Legal_Reg | Legal_Flex, RegARM32::Reg_ip); |
| + // Use the scratch register if needed to legalize the immediate. |
| + Operand *AddAmount = |
| + legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
| + Legal_Reg | Legal_Flex, getReservedTmpReg()); |
| _add(SP, SP, AddAmount); |
| } |
| } |
| @@ -838,6 +848,156 @@ void TargetARM32::addEpilog(CfgNode *Node) { |
| RI->setDeleted(); |
| } |
| +bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { |
| + constexpr bool SignExt = false; |
| + return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); |
| +} |
| + |
| +StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, |
| + Variable *OrigBaseReg) { |
| + int32_t Offset = Var->getStackOffset(); |
| + // Legalize will likely need a movw/movt combination, but if the top |
| + // bits are all 0 from negating the offset and subtracting, we could |
| + // use that instead. |
| + bool ShouldSub = (-Offset & 0xFFFF0000) == 0; |
| + if (ShouldSub) |
| + Offset = -Offset; |
| + Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), |
| + Legal_Reg | Legal_Flex, getReservedTmpReg()); |
| + Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); |
| + if (ShouldSub) |
| + _sub(ScratchReg, OrigBaseReg, OffsetVal); |
| + else |
| + _add(ScratchReg, OrigBaseReg, OffsetVal); |
| + StackVariable *NewVar = Func->makeVariable<StackVariable>(stackSlotType()); |
| + NewVar->setWeight(RegWeight::Zero); |
| + NewVar->setBaseRegNum(ScratchReg->getRegNum()); |
| + constexpr int32_t NewOffset = 0; |
| + NewVar->setStackOffset(NewOffset); |
| + return NewVar; |
| +} |
| + |
| +void TargetARM32::legalizeStackSlots() { |
| + // If a stack variable's frame offset doesn't fit, convert from: |
| + // ldr X, OFF[SP] |
| + // to: |
| + // movw/movt TMP, OFF_PART |
| + // add TMP, TMP, SP |
| + // ldr X, OFF_MORE[TMP] |
| + // |
| + // This is safe because we have reserved TMP, and add for ARM does not |
| + // clobber the flags register. |
| + Func->dump("Before legalizeStackSlots"); |
| + assert(hasComputedFrame()); |
| + // Early exit, if SpillAreaSizeBytes is really small. |
| + if (isLegalVariableStackOffset(SpillAreaSizeBytes)) |
| + return; |
| + Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); |
| + int32_t StackAdjust = 0; |
| + // Do a fairly naive greedy clustering for now. Pick the first stack slot |
| + // that's out of bounds and make a new base reg using the architecture's temp |
| + // register. If that works for the next slot, then great. Otherwise, create |
| + // a new base register, clobbering the previous base register. Never share a |
| + // base reg across different basic blocks. This isn't ideal if local and |
| + // multi-block variables are far apart and their references are interspersed. |
| + // It may help to be more coordinated about assign stack slot numbers |
| + // and may help to assign smaller offsets to higher-weight variables |
| + // so that they don't depend on this legalization. |
| + for (CfgNode *Node : Func->getNodes()) { |
| + Context.init(Node); |
| + StackVariable *NewBaseReg = nullptr; |
| + int32_t NewBaseOffset = 0; |
| + while (!Context.atEnd()) { |
| + PostIncrLoweringContext PostIncrement(Context); |
| + Inst *CurInstr = Context.getCur(); |
| + Variable *Dest = CurInstr->getDest(); |
| + // Check if the previous NewBaseReg is clobbered, and reset if needed. |
| + if ((Dest && NewBaseReg && Dest->hasReg() && |
| + Dest->getRegNum() == NewBaseReg->getBaseRegNum()) || |
| + llvm::isa<InstFakeKill>(CurInstr)) { |
| + NewBaseReg = nullptr; |
| + NewBaseOffset = 0; |
| + } |
| + // The stack adjustment only matters if we are using SP instead of FP. |
| + if (!hasFramePointer()) { |
| + if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) { |
| + StackAdjust += AdjInst->getAmount(); |
| + NewBaseOffset += AdjInst->getAmount(); |
| + continue; |
| + } |
| + if (llvm::isa<InstARM32Call>(CurInstr)) { |
| + NewBaseOffset -= StackAdjust; |
| + StackAdjust = 0; |
| + continue; |
| + } |
| + } |
| + // For now, only Mov instructions can have stack variables. We need to |
| + // know the type of instruction because we currently create a fresh one |
| + // to replace Dest/Source, rather than mutate in place. |
| + auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr); |
| + if (!MovInst) { |
| + continue; |
| + } |
| + if (!Dest->hasReg()) { |
| + int32_t Offset = Dest->getStackOffset(); |
| + Offset += StackAdjust; |
| + if (!isLegalVariableStackOffset(Offset)) { |
| + if (NewBaseReg) { |
| + int32_t OffsetDiff = Offset - NewBaseOffset; |
| + if (isLegalVariableStackOffset(OffsetDiff)) { |
| + StackVariable *NewDest = |
| + Func->makeVariable<StackVariable>(stackSlotType()); |
| + NewDest->setWeight(RegWeight::Zero); |
| + NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum()); |
| + NewDest->setStackOffset(OffsetDiff); |
| + Variable *NewDestVar = NewDest; |
| + _mov(NewDestVar, MovInst->getSrc(0)); |
| + MovInst->setDeleted(); |
| + continue; |
| + } |
| + } |
| + StackVariable *LegalDest = legalizeVariableSlot(Dest, OrigBaseReg); |
| + assert(LegalDest != Dest); |
| + Variable *LegalDestVar = LegalDest; |
| + _mov(LegalDestVar, MovInst->getSrc(0)); |
| + MovInst->setDeleted(); |
| + NewBaseReg = LegalDest; |
| + NewBaseOffset = Offset; |
| + continue; |
| + } |
| + } |
| + assert(MovInst->getSrcSize() == 1); |
| + Variable *Var = llvm::dyn_cast<Variable>(MovInst->getSrc(0)); |
| + if (Var && !Var->hasReg()) { |
| + int32_t Offset = Var->getStackOffset(); |
| + Offset += StackAdjust; |
| + if (!isLegalVariableStackOffset(Offset)) { |
| + if (NewBaseReg) { |
| + int32_t OffsetDiff = Offset - NewBaseOffset; |
| + if (isLegalVariableStackOffset(OffsetDiff)) { |
| + StackVariable *NewVar = |
| + Func->makeVariable<StackVariable>(stackSlotType()); |
| + NewVar->setWeight(RegWeight::Zero); |
| + NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum()); |
| + NewVar->setStackOffset(OffsetDiff); |
| + _mov(Dest, NewVar); |
| + MovInst->setDeleted(); |
| + continue; |
| + } |
| + } |
| + StackVariable *LegalVar = legalizeVariableSlot(Var, OrigBaseReg); |
| + assert(LegalVar != Var); |
| + _mov(Dest, LegalVar); |
| + MovInst->setDeleted(); |
| + NewBaseReg = LegalVar; |
| + NewBaseOffset = Offset; |
| + continue; |
| + } |
| + } |
| + } |
| + } |
| +} |
| + |
| void TargetARM32::split64(Variable *Var) { |
| assert(Var->getType() == IceType_i64); |
| Variable *Lo = Var->getLo(); |
| @@ -2078,7 +2238,9 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
| if (Val->getType() == IceType_i64) { |
| Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| - _mov(DestHi, Zero); |
| + Variable *T = nullptr; |
| + _mov(T, Zero); |
|
jvoung (off chromium)
2015/07/23 23:26:32
misc legalization fix for test_bitmanip, now that
|
| + _mov(DestHi, T); |
| } |
| return; |
| } |
| @@ -2230,7 +2392,9 @@ void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) { |
| // prolong the liveness of T2 as if it was used as a source. |
| _set_dest_nonkillable(); |
| _mov(DestLo, T2); |
| - _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| + Variable *T3 = nullptr; |
| + _mov(T3, Zero); |
| + _mov(DestHi, T3); |
| return; |
| } |
| _mov(Dest, T); |