Index: src/IceCfg.cpp |
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp |
index 498c5e9da2d08dda4c409d90af446f2819a95bfa..0735146572267989262fdddf5c2791b0b8d0681b 100644 |
--- a/src/IceCfg.cpp |
+++ b/src/IceCfg.cpp |
@@ -201,8 +201,6 @@ void Cfg::translate() { |
if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) |
Var64On32->initHiLo(this); |
- processAllocas(); |
- |
// The set of translation passes and their order are determined by the |
// target. |
getTarget()->translate(); |
@@ -451,86 +449,180 @@ void Cfg::doArgLowering() { |
getTarget()->lowerArguments(); |
} |
-void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts, |
- bool IsKnownFrameOffset) { |
+void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas, |
+ uint32_t CombinedAlignment, InstList &Insts, |
+ AllocaBaseVariableType BaseVariableType) { |
if (Allocas.empty()) |
return; |
- // Sort by decreasing alignment. This does not really matter at the moment, |
- // but will allow compacting stack allocation when we fuse to one alloca. |
+ // Sort by decreasing alignment. |
std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) { |
auto *A1 = llvm::dyn_cast<InstAlloca>(I1); |
auto *A2 = llvm::dyn_cast<InstAlloca>(I2); |
return A1->getAlignInBytes() > A2->getAlignInBytes(); |
}); |
+ // Process the allocas in order of decreasing stack alignment. This allows |
+ // us to pack less-aligned pieces after more-aligned ones, resulting in less |
+ // stack growth. It also allows there to be at most one stack alignment "and" |
+ // instruction for a whole list of allocas. |
+ uint32_t CurrentOffset = 0; |
+ CfgVector<int32_t> Offsets; |
for (Inst *Instr : Allocas) { |
auto *Alloca = llvm::cast<InstAlloca>(Instr); |
- // Move the alloca to its sorted position. |
- InstAlloca *NewAlloca = |
- InstAlloca::create(this, Alloca->getSizeInBytes(), |
- Alloca->getAlignInBytes(), Alloca->getDest()); |
- if (IsKnownFrameOffset) |
- NewAlloca->setKnownFrameOffset(); |
- Insts.push_front(NewAlloca); |
+ // Adjust the size of the allocation up to the next multiple of the |
+ // object's alignment. |
+ uint32_t Alignment = std::max(Alloca->getAlignInBytes(), 1u); |
+ auto *ConstSize = |
+ llvm::dyn_cast<ConstantInteger32>(Alloca->getSizeInBytes()); |
+ uint32_t Size = Utils::applyAlignment(ConstSize->getValue(), Alignment); |
+ if (BaseVariableType == BVT_FramePointer) { |
+ // Addressing is relative to the frame pointer. Subtract the offset after |
+ // adding the size of the alloca, because it grows downwards from the |
+ // frame pointer. |
+ Offsets.push_back(-(CurrentOffset + Size)); |
+ } else { |
+ // Addressing is relative to the stack pointer or to a user pointer. Add |
+ // the offset before adding the size of the object, because it grows |
+ // upwards from the stack pointer. |
+ Offsets.push_back(CurrentOffset); |
+ } |
+ // Update the running offset of the fused alloca region. |
+ CurrentOffset += Size; |
+ } |
+ // Round the offset up to the alignment granularity to use as the size. |
+ uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment); |
+ // Ensure every alloca was assigned an offset. |
+ assert(Allocas.size() == Offsets.size()); |
+ Variable *BaseVariable = makeVariable(IceType_i32); |
+ Variable *AllocaDest = BaseVariable; |
+ // Emit one addition for each alloca after the first. |
+ for (size_t i = 0; i < Allocas.size(); ++i) { |
+ auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]); |
+ switch (BaseVariableType) { |
+ case BVT_FramePointer: |
+ case BVT_UserPointer: { |
+ // Emit a new addition operation to replace the alloca. |
+ Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]); |
+ InstArithmetic *Add = |
+ InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(), |
+ BaseVariable, AllocaOffset); |
+ Insts.push_front(Add); |
+ } break; |
+ case BVT_StackPointer: { |
+ // Emit a fake definition of the rematerializable variable. |
+ Variable *Dest = Alloca->getDest(); |
+ InstFakeDef *Def = InstFakeDef::create(this, Dest); |
+ Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]); |
+ Insts.push_front(Def); |
+ } break; |
+ } |
Alloca->setDeleted(); |
} |
+ Operand *AllocaSize = Ctx->getConstantInt32(TotalSize); |
+ switch (BaseVariableType) { |
+ case BVT_FramePointer: { |
+ // Adjust the return of the alloca to the top of the returned region. |
+ AllocaDest = makeVariable(IceType_i32); |
+ InstArithmetic *Add = InstArithmetic::create( |
+ this, InstArithmetic::Add, BaseVariable, AllocaDest, AllocaSize); |
+ Insts.push_front(Add); |
+ } break; |
+ case BVT_StackPointer: { |
+ // Emit a fake use to keep the Alloca live. |
+ InstFakeUse *Use = InstFakeUse::create(this, AllocaDest); |
+ Insts.push_front(Use); |
+ } break; |
+ case BVT_UserPointer: |
+ break; |
+ } |
+ // And insert the fused alloca. |
+ InstAlloca *CombinedAlloca = |
+ InstAlloca::create(this, AllocaSize, CombinedAlignment, AllocaDest); |
+ CombinedAlloca->setKnownFrameOffset(); |
+ Insts.push_front(CombinedAlloca); |
} |
-void Cfg::processAllocas() { |
+void Cfg::processAllocas(bool SortAndCombine) { |
const uint32_t StackAlignment = getTarget()->getStackAlignment(); |
CfgNode *EntryNode = getEntryNode(); |
- // Allocas in the entry block that have constant size and alignment less |
- // than or equal to the function's stack alignment. |
- CfgVector<Inst *> FixedAllocas; |
- // Allocas in the entry block that have constant size and alignment greater |
- // than the function's stack alignment. |
- CfgVector<Inst *> AlignedAllocas; |
// LLVM enforces power of 2 alignment. |
assert(llvm::isPowerOf2_32(StackAlignment)); |
- // Collect the Allocas into the two vectors. |
- bool RequiresFramePointer = false; |
+ // Determine if there are large alignment allocations in the entry block or |
+ // dynamic allocations (variable size in the entry block). |
+ bool HasLargeAlignment = false; |
+ bool HasDynamicAllocation = false; |
for (Inst &Instr : EntryNode->getInsts()) { |
if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { |
- if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) { |
- // Variable-sized allocations require a frame pointer. |
- RequiresFramePointer = true; |
- continue; |
- } |
uint32_t AlignmentParam = Alloca->getAlignInBytes(); |
- // For default align=0, set it to the real value 1, to avoid any |
- // bit-manipulation problems below. |
- AlignmentParam = std::max(AlignmentParam, 1u); |
- assert(llvm::isPowerOf2_32(AlignmentParam)); |
- if (AlignmentParam > StackAlignment) { |
- // Allocations aligned more than the stack require a frame pointer. |
- RequiresFramePointer = true; |
- AlignedAllocas.push_back(Alloca); |
- } else |
- FixedAllocas.push_back(Alloca); |
+ if (AlignmentParam > StackAlignment) |
+ HasLargeAlignment = true; |
+ if (llvm::isa<Constant>(Alloca->getSizeInBytes())) |
+ Alloca->setKnownFrameOffset(); |
+ else |
+ HasDynamicAllocation = true; |
} |
} |
- // Look for alloca instructions in other blocks |
+ // Don't do the heavyweight sorting and layout for low optimization levels. |
+ if (!SortAndCombine) |
+ return; |
+ // Any alloca outside the entry block is a dynamic allocation. |
for (CfgNode *Node : Nodes) { |
if (Node == EntryNode) |
continue; |
for (Inst &Instr : Node->getInsts()) { |
if (llvm::isa<InstAlloca>(&Instr)) { |
// Allocations outside the entry block require a frame pointer. |
- RequiresFramePointer = true; |
+ HasDynamicAllocation = true; |
break; |
} |
} |
- if (RequiresFramePointer) |
+ if (HasDynamicAllocation) |
break; |
} |
// Mark the target as requiring a frame pointer. |
- if (RequiresFramePointer) |
+ if (HasLargeAlignment || HasDynamicAllocation) |
getTarget()->setHasFramePointer(); |
+ // Collect the Allocas into the two vectors. |
+ // Allocas in the entry block that have constant size and alignment less |
+ // than or equal to the function's stack alignment. |
+ CfgVector<Inst *> FixedAllocas; |
+ // Allocas in the entry block that have constant size and alignment greater |
+ // than the function's stack alignment. |
+ CfgVector<Inst *> AlignedAllocas; |
+ // Maximum alignment used for the dynamic/aligned allocas. |
+ uint32_t MaxAlignment = StackAlignment; |
+ for (Inst &Instr : EntryNode->getInsts()) { |
+ if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { |
+ if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) |
+ continue; |
+ uint32_t AlignmentParam = Alloca->getAlignInBytes(); |
+ // For default align=0, set it to the real value 1, to avoid any |
+ // bit-manipulation problems below. |
+ AlignmentParam = std::max(AlignmentParam, 1u); |
+ assert(llvm::isPowerOf2_32(AlignmentParam)); |
+ if (HasDynamicAllocation && AlignmentParam > StackAlignment) { |
+ // If we have both dynamic allocations and large stack alignments, |
+ // high-alignment allocations are pulled out with their own base. |
+ AlignedAllocas.push_back(Alloca); |
+ } else { |
+ FixedAllocas.push_back(Alloca); |
+ } |
+ MaxAlignment = std::max(AlignmentParam, MaxAlignment); |
+ } |
+ } |
// Add instructions to the head of the entry block in reverse order. |
InstList &Insts = getEntryNode()->getInsts(); |
- // Fixed, large alignment alloca addresses do not have known offset. |
- sortAllocas(AlignedAllocas, Insts, false); |
- // Fixed, small alignment alloca addresses have known offset. |
- sortAllocas(FixedAllocas, Insts, true); |
+ if (HasDynamicAllocation && HasLargeAlignment) { |
+ // We are using a frame pointer, but fixed large-alignment alloca addresses, |
+ // do not have a known offset from either the stack or frame pointer. |
+ // They grow up from a user pointer from an alloca. |
+ sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, BVT_UserPointer); |
+ } |
+ // Otherwise, fixed size allocas are always addressed relative to the stack |
+ // unless there are dynamic allocas. |
+ // TODO(sehr): re-enable frame pointer and decrementing addressing. |
+ AllocaBaseVariableType BasePointerType = |
+ (HasDynamicAllocation ? BVT_UserPointer : BVT_StackPointer); |
+ sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType); |
} |
void Cfg::doAddressOpt() { |