| Index: src/IceTargetLoweringARM32.cpp
|
| diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
|
| index 12810f6c88656208ead7115fa0021f4915a5ece5..f23609b7837cce25402e4d1382ffa9741273d02c 100644
|
| --- a/src/IceTargetLoweringARM32.cpp
|
| +++ b/src/IceTargetLoweringARM32.cpp
|
| @@ -265,7 +265,7 @@ uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
|
| }
|
|
|
| void TargetARM32::findMaxStackOutArgsSize() {
|
| - // MinNeededOutArgsBytes should be updated if the Target ever creates an
|
| + // MinNeededOutArgsBytes should be updated if the Target ever creates a
|
| // high-level InstCall that requires more stack bytes.
|
| constexpr size_t MinNeededOutArgsBytes = 0;
|
| MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
|
| @@ -291,7 +291,7 @@ void TargetARM32::translateO2() {
|
| findMaxStackOutArgsSize();
|
|
|
| // Do not merge Alloca instructions, and lay out the stack.
|
| - static constexpr bool SortAndCombineAllocas = false;
|
| + static constexpr bool SortAndCombineAllocas = true;
|
| Func->processAllocas(SortAndCombineAllocas);
|
| Func->dump("After Alloca processing");
|
|
|
| @@ -356,6 +356,7 @@ void TargetARM32::translateO2() {
|
| regAlloc(RAK_Global);
|
| if (Func->hasError())
|
| return;
|
| +
|
| copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
|
| Func->dump("After linear scan regalloc");
|
|
|
| @@ -364,6 +365,8 @@ void TargetARM32::translateO2() {
|
| Func->dump("After advanced Phi lowering");
|
| }
|
|
|
| + ForbidTemporaryWithoutReg _(this);
|
| +
|
| // Stack frame mapping.
|
| Func->genFrame();
|
| if (Func->hasError())
|
| @@ -399,8 +402,8 @@ void TargetARM32::translateOm1() {
|
| findMaxStackOutArgsSize();
|
|
|
| // Do not merge Alloca instructions, and lay out the stack.
|
| - static constexpr bool SortAndCombineAllocas = false;
|
| - Func->processAllocas(SortAndCombineAllocas);
|
| + static constexpr bool DontSortAndCombineAllocas = false;
|
| + Func->processAllocas(DontSortAndCombineAllocas);
|
| Func->dump("After Alloca processing");
|
|
|
| Func->placePhiLoads();
|
| @@ -424,9 +427,12 @@ void TargetARM32::translateOm1() {
|
| regAlloc(RAK_InfOnly);
|
| if (Func->hasError())
|
| return;
|
| +
|
| copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
|
| Func->dump("After regalloc of infinite-weight variables");
|
|
|
| + ForbidTemporaryWithoutReg _(this);
|
| +
|
| Func->genFrame();
|
| if (Func->hasError())
|
| return;
|
| @@ -520,6 +526,7 @@ void TargetARM32::emitVariable(const Variable *Var) const {
|
| llvm::report_fatal_error(
|
| "Infinite-weight Variable has no register assigned");
|
| }
|
| + assert(!Var->isRematerializable());
|
| int32_t Offset = Var->getStackOffset();
|
| int32_t BaseRegNum = Var->getBaseRegNum();
|
| if (BaseRegNum == Variable::NoRegister) {
|
| @@ -850,6 +857,9 @@ void TargetARM32::addProlog(CfgNode *Node) {
|
| SpillAreaSizeBytes = StackSize - StackOffset;
|
| }
|
|
|
| + // Combine fixed alloca with SpillAreaSize.
|
| + SpillAreaSizeBytes += FixedAllocaSizeBytes;
|
| +
|
| // Generate "sub sp, SpillAreaSizeBytes"
|
| if (SpillAreaSizeBytes) {
|
| // Use the scratch register if needed to legalize the immediate.
|
| @@ -857,7 +867,11 @@ void TargetARM32::addProlog(CfgNode *Node) {
|
| Legal_Reg | Legal_Flex, getReservedTmpReg());
|
| Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
|
| _sub(SP, SP, SubAmount);
|
| + if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
|
| + alignRegisterPow2(SP, FixedAllocaAlignBytes);
|
| + }
|
| }
|
| +
|
| Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
|
|
|
| // Fill in stack offsets for stack args, and copy args into registers for
|
| @@ -1034,6 +1048,7 @@ OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
|
| Variable *OrigBaseReg,
|
| Variable **NewBaseReg,
|
| int32_t *NewBaseOffset) {
|
| + assert(!OrigBaseReg->isRematerializable());
|
| if (isLegalMemOffset(Ty, Offset)) {
|
| return OperandARM32Mem::create(
|
| Func, Ty, OrigBaseReg,
|
| @@ -1053,6 +1068,7 @@ OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
|
| OffsetDiff = 0;
|
| }
|
|
|
| + assert(!(*NewBaseReg)->isRematerializable());
|
| return OperandARM32Mem::create(
|
| Func, Ty, *NewBaseReg,
|
| llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)),
|
| @@ -1076,8 +1092,9 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg,
|
|
|
| bool Legalized = false;
|
| if (!Dest->hasReg()) {
|
| - auto *const SrcR = llvm::cast<Variable>(Src);
|
| + auto *SrcR = llvm::cast<Variable>(Src);
|
| assert(SrcR->hasReg());
|
| + assert(!SrcR->isRematerializable());
|
| const int32_t Offset = Dest->getStackOffset();
|
| // This is a _mov(Mem(), Variable), i.e., a store.
|
| _str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
|
| @@ -1087,12 +1104,26 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg,
|
| Context.insert(InstFakeDef::create(Func, Dest));
|
| Legalized = true;
|
| } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
|
| - if (!Var->hasReg()) {
|
| - const int32_t Offset = Var->getStackOffset();
|
| - _ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
|
| - NewBaseOffset),
|
| - MovInstr->getPredicate());
|
| + if (Var->isRematerializable()) {
|
| + // Rematerialization arithmetic.
|
| + const int32_t ExtraOffset =
|
| + (static_cast<SizeT>(Var->getRegNum()) == getFrameReg())
|
| + ? getFrameFixedAllocaOffset()
|
| + : 0;
|
| +
|
| + const int32_t Offset = Var->getStackOffset() + ExtraOffset;
|
| + Operand *OffsetRF = legalize(Ctx->getConstantInt32(Offset),
|
| + Legal_Reg | Legal_Flex, Dest->getRegNum());
|
| + _add(Dest, Var, OffsetRF);
|
| Legalized = true;
|
| + } else {
|
| + if (!Var->hasReg()) {
|
| + const int32_t Offset = Var->getStackOffset();
|
| + _ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
|
| + NewBaseOffset),
|
| + MovInstr->getPredicate());
|
| + Legalized = true;
|
| + }
|
| }
|
| }
|
|
|
| @@ -1163,13 +1194,15 @@ Operand *TargetARM32::loOperand(Operand *Operand) {
|
| // increment) in case of duplication.
|
| assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
|
| Mem->getAddrMode() == OperandARM32Mem::NegOffset);
|
| + Variable *BaseR = legalizeToReg(Mem->getBase());
|
| if (Mem->isRegReg()) {
|
| - return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
|
| - Mem->getIndex(), Mem->getShiftOp(),
|
| - Mem->getShiftAmt(), Mem->getAddrMode());
|
| + Variable *IndexR = legalizeToReg(Mem->getIndex());
|
| + return OperandARM32Mem::create(Func, IceType_i32, BaseR, IndexR,
|
| + Mem->getShiftOp(), Mem->getShiftAmt(),
|
| + Mem->getAddrMode());
|
| } else {
|
| - return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
|
| - Mem->getOffset(), Mem->getAddrMode());
|
| + return OperandARM32Mem::create(Func, IceType_i32, BaseR, Mem->getOffset(),
|
| + Mem->getAddrMode());
|
| }
|
| }
|
| llvm_unreachable("Unsupported operand type");
|
| @@ -1201,7 +1234,9 @@ Operand *TargetARM32::hiOperand(Operand *Operand) {
|
| Variable *NewBase = Func->makeVariable(Base->getType());
|
| lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
|
| Base, Four));
|
| - return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
|
| + Variable *BaseR = legalizeToReg(NewBase);
|
| + Variable *IndexR = legalizeToReg(Mem->getIndex());
|
| + return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR,
|
| Mem->getShiftOp(), Mem->getShiftAmt(),
|
| Mem->getAddrMode());
|
| } else {
|
| @@ -1216,16 +1251,17 @@ Operand *TargetARM32::hiOperand(Operand *Operand) {
|
| // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
|
| // RegReg addressing modes, prefer adding to base and replacing
|
| // instead. Thus we leave the old offset alone.
|
| - Constant *Four = Ctx->getConstantInt32(4);
|
| + Constant *_4 = Ctx->getConstantInt32(4);
|
| Variable *NewBase = Func->makeVariable(Base->getType());
|
| lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
|
| - NewBase, Base, Four));
|
| + NewBase, Base, _4));
|
| Base = NewBase;
|
| } else {
|
| Offset =
|
| llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
|
| }
|
| - return OperandARM32Mem::create(Func, SplitType, Base, Offset,
|
| + Variable *BaseR = legalizeToReg(Base);
|
| + return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
|
| Mem->getAddrMode());
|
| }
|
| }
|
| @@ -1264,7 +1300,6 @@ llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
|
| }
|
|
|
| void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
|
| - UsesFramePointer = true;
|
| // Conservatively require the stack to be aligned. Some stack adjustment
|
| // operations implemented below assume that the stack is aligned before the
|
| // alloca. All the alloca code ensures that the stack alignment is preserved
|
| @@ -1272,29 +1307,53 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
|
| // cases.
|
| NeedsStackAlignment = true;
|
|
|
| - // TODO(stichnot): minimize the number of adjustments of SP, etc.
|
| - Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
|
| - Variable *Dest = Inst->getDest();
|
| - uint32_t AlignmentParam = Inst->getAlignInBytes();
|
| // For default align=0, set it to the real value 1, to avoid any
|
| // bit-manipulation problems below.
|
| - AlignmentParam = std::max(AlignmentParam, 1u);
|
| + const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes());
|
|
|
| // LLVM enforces power of 2 alignment.
|
| assert(llvm::isPowerOf2_32(AlignmentParam));
|
| assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
|
|
|
| - uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
|
| - if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
|
| + const uint32_t Alignment =
|
| + std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
|
| + const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES;
|
| + const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;
|
| + const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset();
|
| + const bool UseFramePointer =
|
| + hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
|
| +
|
| + if (UseFramePointer)
|
| + setHasFramePointer();
|
| +
|
| + Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
|
| + if (OverAligned) {
|
| alignRegisterPow2(SP, Alignment);
|
| }
|
| +
|
| + Variable *Dest = Inst->getDest();
|
| Operand *TotalSize = Inst->getSizeInBytes();
|
| +
|
| if (const auto *ConstantTotalSize =
|
| llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
|
| - uint32_t Value = ConstantTotalSize->getValue();
|
| - Value = Utils::applyAlignment(Value, Alignment);
|
| - Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
|
| - _sub(SP, SP, SubAmount);
|
| + const uint32_t Value =
|
| + Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
|
| + // Constant size alloca.
|
| + if (!UseFramePointer) {
|
| + // If we don't need a Frame Pointer, this alloca has a known offset to the
|
| + // stack pointer. We don't need adjust the stack pointer, nor assign any
|
| + // value to Dest, as Dest is rematerializable.
|
| + assert(Dest->isRematerializable());
|
| + FixedAllocaSizeBytes += Value;
|
| + Context.insert(InstFakeDef::create(Func, Dest));
|
| + return;
|
| + }
|
| +
|
| + // If a frame pointer is required, then we need to store the alloca'd result
|
| + // in Dest.
|
| + Operand *SubAmountRF =
|
| + legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
|
| + _sub(SP, SP, SubAmountRF);
|
| } else {
|
| // Non-constant sizes need to be adjusted to the next highest multiple of
|
| // the required alignment at runtime.
|
| @@ -1306,6 +1365,8 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
|
| alignRegisterPow2(T, Alignment);
|
| _sub(SP, SP, T);
|
| }
|
| +
|
| + // Adds back a few bytes to SP to account for the out args area.
|
| Variable *T = SP;
|
| if (MaxOutArgsSizeBytes != 0) {
|
| T = makeReg(getPointerType());
|
| @@ -1313,6 +1374,7 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
|
| Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
|
| _add(T, SP, OutArgsSizeRF);
|
| }
|
| +
|
| _mov(Dest, T);
|
| }
|
|
|
| @@ -1976,6 +2038,12 @@ void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
|
|
|
| void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
|
| Variable *Dest = Inst->getDest();
|
| +
|
| + if (Dest->isRematerializable()) {
|
| + Context.insert(InstFakeDef::create(Func, Dest));
|
| + return;
|
| + }
|
| +
|
| if (Dest->getType() == IceType_i1) {
|
| lowerInt1Arithmetic(Inst);
|
| return;
|
| @@ -2139,8 +2207,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
|
| if (Srcs.hasConstOperand()) {
|
| // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed
|
| // to be used.
|
| - Variable *Src0R = Srcs.src0R(this);
|
| if (Srcs.immediateIsFlexEncodable()) {
|
| + Variable *Src0R = Srcs.src0R(this);
|
| Operand *Src1RF = Srcs.src1RF(this);
|
| if (Srcs.swappedOperands()) {
|
| _rsb(T, Src0R, Src1RF);
|
| @@ -2151,6 +2219,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
|
| return;
|
| }
|
| if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
|
| + Variable *Src0R = Srcs.src0R(this);
|
| Operand *Src1F = Srcs.negatedSrc1F(this);
|
| _add(T, Src0R, Src1F);
|
| _mov(Dest, T);
|
| @@ -2215,6 +2284,12 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
|
|
|
| void TargetARM32::lowerAssign(const InstAssign *Inst) {
|
| Variable *Dest = Inst->getDest();
|
| +
|
| + if (Dest->isRematerializable()) {
|
| + Context.insert(InstFakeDef::create(Func, Dest));
|
| + return;
|
| + }
|
| +
|
| Operand *Src0 = Inst->getSrc(0);
|
| assert(Dest->getType() == Src0->getType());
|
| if (Dest->getType() == IceType_i64) {
|
| @@ -4425,13 +4500,17 @@ OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
|
| assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
|
| : (ValidImmMask & OffsetImm) == OffsetImm);
|
|
|
| + Variable *BaseR = makeReg(getPointerType());
|
| + Context.insert(InstAssign::create(Func, BaseR, BaseVar));
|
| if (OffsetReg != nullptr) {
|
| - return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetReg, ShiftKind,
|
| + Variable *OffsetR = makeReg(getPointerType());
|
| + Context.insert(InstAssign::create(Func, OffsetR, OffsetReg));
|
| + return OperandARM32Mem::create(Func, Ty, BaseR, OffsetR, ShiftKind,
|
| OffsetRegShamt);
|
| }
|
|
|
| return OperandARM32Mem::create(
|
| - Func, Ty, BaseVar,
|
| + Func, Ty, BaseR,
|
| llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
|
| }
|
|
|
| @@ -4630,7 +4709,8 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
|
| if (RegNum == Variable::NoRegister) {
|
| if (Variable *Subst = getContext().availabilityGet(From)) {
|
| // At this point we know there is a potential substitution available.
|
| - if (Subst->mustHaveReg() && !Subst->hasReg()) {
|
| + if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
|
| + !Subst->hasReg()) {
|
| // At this point we know the substitution will have a register.
|
| if (From->getType() == Subst->getType()) {
|
| // At this point we know the substitution's register is compatible.
|
| @@ -4788,6 +4868,13 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
|
| }
|
|
|
| if (auto *Var = llvm::dyn_cast<Variable>(From)) {
|
| + if (Var->isRematerializable()) {
|
| + // TODO(jpp): We don't need to rematerialize Var if legalize() was invoked
|
| + // for a Variable in a Mem operand.
|
| + Variable *T = makeReg(Var->getType(), RegNum);
|
| + _mov(T, Var);
|
| + return T;
|
| + }
|
| // Check if the variable is guaranteed a physical register. This can happen
|
| // either when the variable is pre-colored or when it is assigned infinite
|
| // weight.
|
| @@ -4844,9 +4931,9 @@ OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
|
| // If we didn't do address mode optimization, then we only have a
|
| // base/offset to work with. ARM always requires a base register, so
|
| // just use that to hold the operand.
|
| - Variable *Base = legalizeToReg(Operand);
|
| + Variable *BaseR = legalizeToReg(Operand);
|
| return OperandARM32Mem::create(
|
| - Func, Ty, Base,
|
| + Func, Ty, BaseR,
|
| llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
|
| }
|
|
|
| @@ -4863,6 +4950,7 @@ Variable64On32 *TargetARM32::makeI64RegPair() {
|
| Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
|
| // There aren't any 64-bit integer registers for ARM32.
|
| assert(Type != IceType_i64);
|
| + assert(AllowTemporaryWithNoReg || RegNum != Variable::NoRegister);
|
| Variable *Reg = Func->makeVariable(Type);
|
| if (RegNum == Variable::NoRegister)
|
| Reg->setMustHaveReg();
|
| @@ -4871,7 +4959,8 @@ Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
|
| return Reg;
|
| }
|
|
|
| -void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
|
| +void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align,
|
| + int32_t TmpRegNum) {
|
| assert(llvm::isPowerOf2_32(Align));
|
| uint32_t RotateAmt;
|
| uint32_t Immed_8;
|
| @@ -4880,10 +4969,12 @@ void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
|
| // it fits at all). Assume Align is usually small, in which case BIC works
|
| // better. Thus, this rounds down to the alignment.
|
| if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
|
| - Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
|
| + Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex,
|
| + TmpRegNum);
|
| _bic(Reg, Reg, Mask);
|
| } else {
|
| - Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex);
|
| + Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex,
|
| + TmpRegNum);
|
| _and(Reg, Reg, Mask);
|
| }
|
| }
|
|
|