Chromium Code Reviews| Index: src/IceCfg.cpp |
| diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp |
| index 498c5e9da2d08dda4c409d90af446f2819a95bfa..24bbd206ca10f7fa13717c4a4c30c7a4adb2d925 100644 |
| --- a/src/IceCfg.cpp |
| +++ b/src/IceCfg.cpp |
| @@ -201,7 +201,26 @@ void Cfg::translate() { |
| if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) |
| Var64On32->initHiLo(this); |
| - processAllocas(); |
| + // Figure out which alloca instructions result in storage at known stack frame |
| + // offsets. If this is true for all alloca instructions, then a stack pointer |
| + // can still be used instead of a frame pointer, freeing up the frame pointer |
| + // for normal register allocation. Additionally, for each such alloca, its |
| + // address could be rematerialized at each use in terms of the stack/frame |
| + // pointer, saving a stack slot and a load from that stack slot. |
|
Jim Stichnoth
2015/11/11 17:39:44
And extra register pressure from having to load fr
sehr
2015/11/11 22:14:10
Done.
|
| + // |
| + // This simple implementation is limited to alloca instructions at the start |
|
Jim Stichnoth
2015/11/11 17:39:44
Didn't you find an example where a non-alloca stat
sehr
2015/11/11 22:14:10
The example is tests_lit/llvm2ice_tests/alloc.ll,
|
| + // of the entry node. |
| + for (Inst &Instr : getEntryNode()->getInsts()) { |
| + if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { |
| + if (llvm::isa<Constant>(Alloca->getSizeInBytes())) { |
| + Alloca->setKnownFrameOffset(); |
| + continue; |
| + } |
| + } |
| + // The first instruction that is not an alloca with a constant size stops |
| + // the search. |
| + break; |
| + } |
| // The set of translation passes and their order are determined by the |
| // target. |
| @@ -451,28 +470,96 @@ void Cfg::doArgLowering() { |
| getTarget()->lowerArguments(); |
| } |
| -void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts, |
| - bool IsKnownFrameOffset) { |
| +void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas, |
| + uint32_t CombinedAlignment, InstList &Insts, |
| + AllocaBaseVariableType BaseVariableType) { |
| if (Allocas.empty()) |
| return; |
| - // Sort by decreasing alignment. This does not really matter at the moment, |
| - // but will allow compacting stack allocation when we fuse to one alloca. |
| + // Sort by decreasing alignment. |
| std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) { |
| auto *A1 = llvm::dyn_cast<InstAlloca>(I1); |
| auto *A2 = llvm::dyn_cast<InstAlloca>(I2); |
| return A1->getAlignInBytes() > A2->getAlignInBytes(); |
| }); |
| + // Process the allocas in order of decreasing stack alignment. This allows |
| + // us to pack less-aligned pieces after more-aligned ones, resulting in less |
| + // stack growth. It also allows there to be at most one stack alignment "and" |
| + // instruction for a whole list of allocas. |
| + uint32_t CurrentOffset = 0; |
| + CfgVector<int32_t> Offsets; |
| for (Inst *Instr : Allocas) { |
| auto *Alloca = llvm::cast<InstAlloca>(Instr); |
| - // Move the alloca to its sorted position. |
| - InstAlloca *NewAlloca = |
| - InstAlloca::create(this, Alloca->getSizeInBytes(), |
| - Alloca->getAlignInBytes(), Alloca->getDest()); |
| - if (IsKnownFrameOffset) |
| - NewAlloca->setKnownFrameOffset(); |
| - Insts.push_front(NewAlloca); |
| + // Adjust the size of the allocation up to the next multiple of the |
| + // object's alignment. |
| + uint32_t Alignment = std::max(Alloca->getAlignInBytes(), 1u); |
| + auto *ConstSize = |
| + llvm::dyn_cast<ConstantInteger32>(Alloca->getSizeInBytes()); |
| + uint32_t Size = Utils::applyAlignment(ConstSize->getValue(), Alignment); |
| + if (BaseVariableType == FramePointer) { |
| + // Addressing is relative to the frame pointer. Subtract the offset after |
| + // adding the size of the alloca, because it grows downwards from the |
| + // frame pointer. |
| + Offsets.push_back(-(CurrentOffset + Size)); |
| + } else { |
| + // Addressing is relative to the stack pointer or to a user pointer. Add |
| + // the offset before adding the size of the object, because it grows |
| + // upwards from the stack pointer. |
| + Offsets.push_back(CurrentOffset); |
| + } |
| + // Update the running offset of the fused alloca region. |
| + CurrentOffset += Size; |
| + } |
| + // Round the offset up to the alignment granularity to use as the size. |
| + uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment); |
| + // Ensure every alloca was assigned an offset. |
| + assert(Allocas.size() == Offsets.size()); |
| + Variable *BaseVariable = makeVariable(IceType_i32); |
| + Variable *AllocaDest = BaseVariable; |
| + // Emit one addition for each alloca after the first. |
| + for (size_t i = 0; i < Allocas.size(); ++i) { |
| + auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]); |
| + switch (BaseVariableType) { |
| + case FramePointer: |
| + case UserPointer: { |
| + // Emit a new addition operation to replace the alloca. |
| + Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]); |
| + InstArithmetic *Add = |
| + InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(), |
| + BaseVariable, AllocaOffset); |
| + Insts.push_front(Add); |
| + } break; |
| + case StackPointer: { |
| + // Emit a fake definition of the rematerializable variable. |
| + Variable *Dest = Alloca->getDest(); |
| + InstFakeDef *Def = InstFakeDef::create(this, Dest); |
| + Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]); |
| + Insts.push_front(Def); |
| + } break; |
| + } |
| Alloca->setDeleted(); |
| } |
| + Operand *AllocaSize = Ctx->getConstantInt32(TotalSize); |
| + switch (BaseVariableType) { |
| + case FramePointer: { |
| + // Adjust the return of the alloca to the top of the returned region. |
| + AllocaDest = makeVariable(IceType_i32); |
| + InstArithmetic *Add = InstArithmetic::create( |
| + this, InstArithmetic::Add, BaseVariable, AllocaDest, AllocaSize); |
| + Insts.push_front(Add); |
| + } break; |
| + case StackPointer: { |
| + // Emit a fake use to keep the Alloca live. |
| + InstFakeUse *Use = InstFakeUse::create(this, AllocaDest); |
| + Insts.push_front(Use); |
| + } break; |
| + case UserPointer: |
| + break; |
| + } |
| + // And insert the fused alloca. |
| + InstAlloca *CombinedAlloca = |
| + InstAlloca::create(this, AllocaSize, CombinedAlignment, AllocaDest); |
| + CombinedAlloca->setKnownFrameOffset(); |
| + Insts.push_front(CombinedAlloca); |
| } |
| void Cfg::processAllocas() { |
| @@ -486,51 +573,71 @@ void Cfg::processAllocas() { |
| CfgVector<Inst *> AlignedAllocas; |
| // LLVM enforces power of 2 alignment. |
| assert(llvm::isPowerOf2_32(StackAlignment)); |
| - // Collect the Allocas into the two vectors. |
| - bool RequiresFramePointer = false; |
| + // Determine if there are large alignment allocations in the entry block or |
| + // dynamic allocations (variable size in the entry block). |
| + bool HasLargeAlignment = false; |
| + bool HasDynamicAllocation = false; |
| for (Inst &Instr : EntryNode->getInsts()) { |
| if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { |
| - if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) { |
| - // Variable-sized allocations require a frame pointer. |
| - RequiresFramePointer = true; |
| - continue; |
| - } |
| uint32_t AlignmentParam = Alloca->getAlignInBytes(); |
| - // For default align=0, set it to the real value 1, to avoid any |
| - // bit-manipulation problems below. |
| - AlignmentParam = std::max(AlignmentParam, 1u); |
| - assert(llvm::isPowerOf2_32(AlignmentParam)); |
| - if (AlignmentParam > StackAlignment) { |
| - // Allocations aligned more than the stack require a frame pointer. |
| - RequiresFramePointer = true; |
| - AlignedAllocas.push_back(Alloca); |
| - } else |
| - FixedAllocas.push_back(Alloca); |
| + if (AlignmentParam > StackAlignment) |
| + HasLargeAlignment = true; |
| + if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) |
| + HasDynamicAllocation = true; |
| } |
| } |
| - // Look for alloca instructions in other blocks |
| + // Any alloca outside the entry block is a dynamic allocation. |
| for (CfgNode *Node : Nodes) { |
| if (Node == EntryNode) |
| continue; |
| for (Inst &Instr : Node->getInsts()) { |
| if (llvm::isa<InstAlloca>(&Instr)) { |
| // Allocations outside the entry block require a frame pointer. |
| - RequiresFramePointer = true; |
| + HasDynamicAllocation = true; |
| break; |
| } |
| } |
| - if (RequiresFramePointer) |
| + if (HasDynamicAllocation) |
| break; |
| } |
| // Mark the target as requiring a frame pointer. |
| - if (RequiresFramePointer) |
| + if (HasLargeAlignment || HasDynamicAllocation) |
| getTarget()->setHasFramePointer(); |
| + // Collect the Allocas into the two vectors. |
| + // Maximum alignment used for the dynamic/aligned allocas. |
| + uint32_t MaxAlignment = StackAlignment; |
| + for (Inst &Instr : EntryNode->getInsts()) { |
| + if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { |
| + if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) |
| + continue; |
| + uint32_t AlignmentParam = Alloca->getAlignInBytes(); |
| + // For default align=0, set it to the real value 1, to avoid any |
| + // bit-manipulation problems below. |
| + AlignmentParam = std::max(AlignmentParam, 1u); |
| + assert(llvm::isPowerOf2_32(AlignmentParam)); |
| + if (HasDynamicAllocation && AlignmentParam > StackAlignment) { |
| + // If we have both dynamic allocations and large stack alignments, |
| + // high-alignment allocations are pulled out with their own base. |
| + AlignedAllocas.push_back(Alloca); |
| + } else |
|
Jim Stichnoth
2015/11/11 17:39:44
} else {
(for consistency)
sehr
2015/11/11 22:14:10
Done.
|
| + FixedAllocas.push_back(Alloca); |
| + MaxAlignment = std::max(AlignmentParam, MaxAlignment); |
| + } |
| + } |
| // Add instructions to the head of the entry block in reverse order. |
| InstList &Insts = getEntryNode()->getInsts(); |
| - // Fixed, large alignment alloca addresses do not have known offset. |
| - sortAllocas(AlignedAllocas, Insts, false); |
| - // Fixed, small alignment alloca addresses have known offset. |
| - sortAllocas(FixedAllocas, Insts, true); |
| + if (HasDynamicAllocation && HasLargeAlignment) { |
| + // We are using a frame pointer, but fixed large-alignment alloca addresses, |
| + // do not have a known offset from either the stack or frame pointer. |
| + // They grow up from a user pointer from an alloca. |
| + sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, UserPointer); |
| + } |
| + // Otherwise, fixed size allocas are always addressed relative to the stack |
| + // unless there are dynamic allocas. |
| + // TODO(sehr): re-enable frame pointer and decrementing addressing. |
| + AllocaBaseVariableType BasePointerType = |
| + (HasDynamicAllocation ? UserPointer : StackPointer); |
| + sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType); |
| } |
| void Cfg::doAddressOpt() { |