Index: src/IceCfg.cpp |
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp |
index 57b1e739193cb017068e32fd9afad1047c69951b..9eb934b3c8b6a5f73cf56205af9d37cf6ed6ee29 100644 |
--- a/src/IceCfg.cpp |
+++ b/src/IceCfg.cpp |
@@ -201,26 +201,7 @@ void Cfg::translate() { |
if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) |
Var64On32->initHiLo(this); |
- // Figure out which alloca instructions result in storage at known stack frame |
- // offsets. If this is true for all alloca instructions, then a stack pointer |
- // can still be used instead of a frame pointer, freeing up the frame pointer |
- // for normal register allocation. Additionally, for each such alloca, its |
- // address could be rematerialized at each use in terms of the stack/frame |
- // pointer, saving a stack slot and a load from that stack slot. |
- // |
- // This simple implementation is limited to alloca instructions at the start |
- // of the entry node. |
- for (Inst &Instr : getEntryNode()->getInsts()) { |
- if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { |
- if (llvm::isa<Constant>(Alloca->getSizeInBytes())) { |
- Alloca->setKnownFrameOffset(); |
- continue; |
- } |
- } |
- // The first instruction that is not an alloca with a constant size stops |
- // the search. |
- break; |
- } |
+ processAllocas(); |
// The set of translation passes and their order are determined by the |
// target. |
@@ -470,6 +451,91 @@ void Cfg::doArgLowering() { |
getTarget()->lowerArguments(); |
} |
+void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts, |
+ bool IsKnownFrameOffset) { |
+ if (Allocas.empty()) |
+ return; |
+ // Sort by decreasing alignment. This does not really matter at the moment, |
+ // but will allow compacting stack allocation when we fuse to one alloca. |
+ std::sort(Allocas.begin(), Allocas.end(), |
+ [](Inst *I1, Inst *I2) { |
+ auto *A1 = llvm::dyn_cast<InstAlloca>(I1); |
+ auto *A2 = llvm::dyn_cast<InstAlloca>(I2); |
+ return A1->getAlignInBytes() > A2->getAlignInBytes(); |
+ }); |
+ for (Inst *Instr: Allocas) { |
+ auto *Alloca = llvm::cast<InstAlloca>(Instr); |
+ // Move the alloca to its sorted position. |
+ InstAlloca *NewAlloca = InstAlloca::create(this, |
+ Alloca->getSizeInBytes(), |
+ Alloca->getAlignInBytes(), |
+ Alloca->getDest()); |
+ if (IsKnownFrameOffset) |
+ NewAlloca->setKnownFrameOffset(); |
+ Insts.push_front(NewAlloca); |
+ Alloca->setDeleted(); |
+ } |
+} |
+ |
+void Cfg::processAllocas() { |
+ const uint32_t StackAlignment = getTarget()->getStackAlignment(); |
+ CfgNode *EntryNode = getEntryNode(); |
+ // Allocas in the entry block that have constant size and alignment less |
+ // than or equal to the function's stack alignment. |
+ CfgVector<Inst *> FixedAllocas; |
+ // Allocas in the entry block that have constant size and alignment greater |
+ // than the function's stack alignment. |
+ CfgVector<Inst *> AlignedAllocas; |
+ // LLVM enforces power of 2 alignment. |
+ assert(llvm::isPowerOf2_32(StackAlignment)); |
+ // Collect the Allocas into the two vectors. |
+ bool RequiresFramePointer = false; |
+ for (Inst &Instr : EntryNode->getInsts()) { |
+ if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { |
+ if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) { |
+ // Variable-sized allocations require a frame pointer. |
+ RequiresFramePointer = true; |
+ continue; |
+ } |
+ uint32_t AlignmentParam = Alloca->getAlignInBytes(); |
+ // For default align=0, set it to the real value 1, to avoid any |
+ // bit-manipulation problems below. |
+ AlignmentParam = std::max(AlignmentParam, 1u); |
+ assert(llvm::isPowerOf2_32(AlignmentParam)); |
+ if (AlignmentParam > StackAlignment) { |
+ // Allocations aligned more than the stack require a frame pointer. |
+ RequiresFramePointer = true; |
+ AlignedAllocas.push_back(Alloca); |
+ } |
+ else |
+ FixedAllocas.push_back(Alloca); |
+ } |
+ } |
+ // Look for alloca instructions in other blocks |
+ for (CfgNode *Node : Nodes) { |
+ if (Node == EntryNode) |
+ continue; |
+ for (Inst &Instr : Node->getInsts()) { |
+ if (llvm::isa<InstAlloca>(&Instr)) { |
+ // Allocations outside the entry block require a frame pointer. |
+ RequiresFramePointer = true; |
+ break; |
+ } |
+ } |
+ if (RequiresFramePointer) |
+ break; |
+ } |
+ // Mark the target as requiring a frame pointer. |
+ if (RequiresFramePointer) |
+ getTarget()->setHasFramePointer(); |
+ // Add instructions to the head of the entry block in reverse order. |
+ InstList &Insts = getEntryNode()->getInsts(); |
+ // Fixed, large alignment alloca addresses do not have known offset. |
+ sortAllocas(AlignedAllocas, Insts, false); |
+ // Fixed, small alignment alloca addresses have known offset. |
+ sortAllocas(FixedAllocas, Insts, true); |
+} |
+ |
void Cfg::doAddressOpt() { |
TimerMarker T(TimerStack::TT_doAddressOpt, this); |
for (CfgNode *Node : Nodes) |