| Index: src/IceCfg.cpp
|
| diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
|
| index 57b1e739193cb017068e32fd9afad1047c69951b..9eb934b3c8b6a5f73cf56205af9d37cf6ed6ee29 100644
|
| --- a/src/IceCfg.cpp
|
| +++ b/src/IceCfg.cpp
|
| @@ -201,26 +201,7 @@ void Cfg::translate() {
|
| if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var))
|
| Var64On32->initHiLo(this);
|
|
|
| - // Figure out which alloca instructions result in storage at known stack frame
|
| - // offsets. If this is true for all alloca instructions, then a stack pointer
|
| - // can still be used instead of a frame pointer, freeing up the frame pointer
|
| - // for normal register allocation. Additionally, for each such alloca, its
|
| - // address could be rematerialized at each use in terms of the stack/frame
|
| - // pointer, saving a stack slot and a load from that stack slot.
|
| - //
|
| - // This simple implementation is limited to alloca instructions at the start
|
| - // of the entry node.
|
| - for (Inst &Instr : getEntryNode()->getInsts()) {
|
| - if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
|
| - if (llvm::isa<Constant>(Alloca->getSizeInBytes())) {
|
| - Alloca->setKnownFrameOffset();
|
| - continue;
|
| - }
|
| - }
|
| - // The first instruction that is not an alloca with a constant size stops
|
| - // the search.
|
| - break;
|
| - }
|
| + processAllocas();
|
|
|
| // The set of translation passes and their order are determined by the
|
| // target.
|
| @@ -470,6 +451,91 @@ void Cfg::doArgLowering() {
|
| getTarget()->lowerArguments();
|
| }
|
|
|
| +void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts,
|
| + bool IsKnownFrameOffset) {
|
| + if (Allocas.empty())
|
| + return;
|
| + // Sort by decreasing alignment. This does not really matter at the moment,
|
| + // but will allow compacting stack allocation when we fuse to one alloca.
|
| + std::sort(Allocas.begin(), Allocas.end(),
|
| + [](Inst *I1, Inst *I2) {
|
| + auto *A1 = llvm::dyn_cast<InstAlloca>(I1);
|
| + auto *A2 = llvm::dyn_cast<InstAlloca>(I2);
|
| + return A1->getAlignInBytes() > A2->getAlignInBytes();
|
| + });
|
| + for (Inst *Instr: Allocas) {
|
| + auto *Alloca = llvm::cast<InstAlloca>(Instr);
|
| + // Move the alloca to its sorted position.
|
| + InstAlloca *NewAlloca = InstAlloca::create(this,
|
| + Alloca->getSizeInBytes(),
|
| + Alloca->getAlignInBytes(),
|
| + Alloca->getDest());
|
| + if (IsKnownFrameOffset)
|
| + NewAlloca->setKnownFrameOffset();
|
| + Insts.push_front(NewAlloca);
|
| + Alloca->setDeleted();
|
| + }
|
| +}
|
| +
|
| +void Cfg::processAllocas() {
|
| + const uint32_t StackAlignment = getTarget()->getStackAlignment();
|
| + CfgNode *EntryNode = getEntryNode();
|
| + // Allocas in the entry block that have constant size and alignment less
|
| + // than or equal to the function's stack alignment.
|
| + CfgVector<Inst *> FixedAllocas;
|
| + // Allocas in the entry block that have constant size and alignment greater
|
| + // than the function's stack alignment.
|
| + CfgVector<Inst *> AlignedAllocas;
|
| + // LLVM enforces power of 2 alignment.
|
| + assert(llvm::isPowerOf2_32(StackAlignment));
|
| + // Collect the Allocas into the two vectors.
|
| + bool RequiresFramePointer = false;
|
| + for (Inst &Instr : EntryNode->getInsts()) {
|
| + if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
|
| + if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) {
|
| + // Variable-sized allocations require a frame pointer.
|
| + RequiresFramePointer = true;
|
| + continue;
|
| + }
|
| + uint32_t AlignmentParam = Alloca->getAlignInBytes();
|
| + // For default align=0, set it to the real value 1, to avoid any
|
| + // bit-manipulation problems below.
|
| + AlignmentParam = std::max(AlignmentParam, 1u);
|
| + assert(llvm::isPowerOf2_32(AlignmentParam));
|
| + if (AlignmentParam > StackAlignment) {
|
| + // Allocations aligned more than the stack require a frame pointer.
|
| + RequiresFramePointer = true;
|
| + AlignedAllocas.push_back(Alloca);
|
| + }
|
| + else
|
| + FixedAllocas.push_back(Alloca);
|
| + }
|
| + }
|
| + // Look for alloca instructions in other blocks
|
| + for (CfgNode *Node : Nodes) {
|
| + if (Node == EntryNode)
|
| + continue;
|
| + for (Inst &Instr : Node->getInsts()) {
|
| + if (llvm::isa<InstAlloca>(&Instr)) {
|
| + // Allocations outside the entry block require a frame pointer.
|
| + RequiresFramePointer = true;
|
| + break;
|
| + }
|
| + }
|
| + if (RequiresFramePointer)
|
| + break;
|
| + }
|
| + // Mark the target as requiring a frame pointer.
|
| + if (RequiresFramePointer)
|
| + getTarget()->setHasFramePointer();
|
| + // Add instructions to the head of the entry block in reverse order.
|
| + InstList &Insts = getEntryNode()->getInsts();
|
| + // Fixed, large alignment alloca addresses do not have known offset.
|
| + sortAllocas(AlignedAllocas, Insts, false);
|
| + // Fixed, small alignment alloca addresses have known offset.
|
| + sortAllocas(FixedAllocas, Insts, true);
|
| +}
|
| +
|
| void Cfg::doAddressOpt() {
|
| TimerMarker T(TimerStack::TT_doAddressOpt, this);
|
| for (CfgNode *Node : Nodes)
|
|
|