Index: src/IceCfg.cpp |
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp |
index 57b1e739193cb017068e32fd9afad1047c69951b..e8d6825da106e7d20acc7030ac786f774a1efefd 100644 |
--- a/src/IceCfg.cpp |
+++ b/src/IceCfg.cpp |
@@ -201,26 +201,7 @@ void Cfg::translate() { |
if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) |
Var64On32->initHiLo(this); |
- // Figure out which alloca instructions result in storage at known stack frame |
- // offsets. If this is true for all alloca instructions, then a stack pointer |
- // can still be used instead of a frame pointer, freeing up the frame pointer |
- // for normal register allocation. Additionally, for each such alloca, its |
- // address could be rematerialized at each use in terms of the stack/frame |
- // pointer, saving a stack slot and a load from that stack slot. |
- // |
- // This simple implementation is limited to alloca instructions at the start |
- // of the entry node. |
- for (Inst &Instr : getEntryNode()->getInsts()) { |
- if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { |
- if (llvm::isa<Constant>(Alloca->getSizeInBytes())) { |
- Alloca->setKnownFrameOffset(); |
- continue; |
- } |
- } |
- // The first instruction that is not an alloca with a constant size stops |
- // the search. |
- break; |
- } |
+ processAllocas(); |
// The set of translation passes and their order are determined by the |
// target. |
@@ -470,6 +451,90 @@ void Cfg::doArgLowering() { |
getTarget()->lowerArguments(); |
} |
+void Cfg::sortAllocas(std::vector<Inst *> &Allocas, InstList &Insts, |
Jim Stichnoth
2015/11/06 18:44:11
Use CfgVector instead of std::vector here and belo
sehr
2015/11/06 19:01:15
Done.
|
+ bool IsKnownFrameOffset) { |
+ if (Allocas.size() == 0) |
Jim Stichnoth
2015/11/06 18:44:11
.empty()
sehr
2015/11/06 19:01:15
Done.
|
+ return; |
+ // Sort by decreasing alignment. |
Jim Stichnoth
2015/11/06 18:45:27
This is so that they get prepended in order of inc
sehr
2015/11/06 19:01:15
It's sort of arbitrary in the general sense here,
|
+ std::sort(Allocas.begin(), Allocas.end(), |
+ [](Inst *I1, Inst *I2) { |
+ auto *A1 = llvm::dyn_cast<InstAlloca>(I1); |
+ auto *A2 = llvm::dyn_cast<InstAlloca>(I2); |
+ return A1->getAlignInBytes() > A2->getAlignInBytes(); |
+ }); |
+ for (Inst *Instr: Allocas) { |
+ auto *Alloca = llvm::cast<InstAlloca>(Instr); |
+ // Move the alloca to its sorted position. |
+ InstAlloca *NewAlloca = InstAlloca::create(this, |
+ Alloca->getSizeInBytes(), |
+ Alloca->getAlignInBytes(), |
+ Alloca->getDest()); |
+ if (IsKnownFrameOffset) |
+ NewAlloca->setKnownFrameOffset(); |
+ Insts.push_front(NewAlloca); |
+ Alloca->setDeleted(); |
+ } |
+} |
+ |
+void Cfg::processAllocas() { |
+ const uint32_t StackAlignment = getTarget()->getStackAlignment(); |
+ CfgNode *EntryNode = getEntryNode(); |
+ // Allocas in the entry block that have constant size and alignment less |
+ // than or equal to the function's stack alignment. |
+ std::vector<Inst *> FixedAllocas; |
+ // Allocas in the entry block that have constant size and alignment greater |
+ // than the function's stack alignment. |
+ std::vector<Inst *> AlignedAllocas; |
+ // LLVM enforces power of 2 alignment. |
+ assert(llvm::isPowerOf2_32(StackAlignment)); |
+ // Collect the Allocas into the two vectors. |
+ bool RequiresFramePointer = false; |
+ for (Inst &Instr : EntryNode->getInsts()) { |
+ if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { |
+ if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) { |
+ // Variable-sized allocations require a frame pointer. |
+ RequiresFramePointer = true; |
+ continue; |
+ } |
+ uint32_t AlignmentParam = Alloca->getAlignInBytes(); |
+ // For default align=0, set it to the real value 1, to avoid any |
+ // bit-manipulation problems below. |
+ AlignmentParam = std::max(AlignmentParam, 1u); |
+ assert(llvm::isPowerOf2_32(AlignmentParam)); |
+ if (AlignmentParam > StackAlignment) { |
+ // Allocations aligned more than the stack require a frame pointer. |
+ RequiresFramePointer = true; |
+ AlignedAllocas.push_back(Alloca); |
+ } |
+ else |
+ FixedAllocas.push_back(Alloca); |
+ } |
+ } |
+ // Look for alloca instructions in other blocks |
+ for (CfgNode *Node : Nodes) { |
+ if (Node == EntryNode) |
+ continue; |
+ for (Inst &Instr : Node->getInsts()) { |
+ if (llvm::isa<InstAlloca>(&Instr)) { |
+ // Allocations outside the entry block require a frame pointer. |
+ RequiresFramePointer = true; |
+ break; |
+ } |
+ } |
+ if (RequiresFramePointer) |
+ break; |
+ } |
+ // Mark the target as requiring a frame pointer. |
+ if (RequiresFramePointer) |
+ getTarget()->setHasFramePointer(); |
+ // Add instructions to the head of the entry block in reverse order. |
+ InstList &Insts = getEntryNode()->getInsts(); |
+ // Fixed, large alignment alloca addresses do not have known offset. |
+ sortAllocas(AlignedAllocas, Insts, false); |
+ // Fixed, small alignment alloca addresses have known offset. |
+ sortAllocas(FixedAllocas, Insts, true); |
+} |
+ |
void Cfg::doAddressOpt() { |
TimerMarker T(TimerStack::TT_doAddressOpt, this); |
for (CfgNode *Node : Nodes) |