Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Unified Diff: src/IceCfg.cpp

Issue 1411583007: Combine allocas (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fix a typo. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/IceCfg.cpp
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 498c5e9da2d08dda4c409d90af446f2819a95bfa..24bbd206ca10f7fa13717c4a4c30c7a4adb2d925 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -201,7 +201,26 @@ void Cfg::translate() {
if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var))
Var64On32->initHiLo(this);
- processAllocas();
+ // Figure out which alloca instructions result in storage at known stack frame
+ // offsets. If this is true for all alloca instructions, then a stack pointer
+ // can still be used instead of a frame pointer, freeing up the frame pointer
+ // for normal register allocation. Additionally, for each such alloca, its
+ // address could be rematerialized at each use in terms of the stack/frame
+ // pointer, saving a stack slot and a load from that stack slot.
Jim Stichnoth 2015/11/11 17:39:44 And extra register pressure from having to load fr
sehr 2015/11/11 22:14:10 Done.
+ //
+ // This simple implementation is limited to alloca instructions at the start
Jim Stichnoth 2015/11/11 17:39:44 Didn't you find an example where a non-alloca stat
sehr 2015/11/11 22:14:10 The example is tests_lit/llvm2ice_tests/alloc.ll,
+ // of the entry node.
+ for (Inst &Instr : getEntryNode()->getInsts()) {
+ if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
+ if (llvm::isa<Constant>(Alloca->getSizeInBytes())) {
+ Alloca->setKnownFrameOffset();
+ continue;
+ }
+ }
+ // The first instruction that is not an alloca with a constant size stops
+ // the search.
+ break;
+ }
// The set of translation passes and their order are determined by the
// target.
@@ -451,28 +470,96 @@ void Cfg::doArgLowering() {
getTarget()->lowerArguments();
}
-void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts,
- bool IsKnownFrameOffset) {
+void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
+ uint32_t CombinedAlignment, InstList &Insts,
+ AllocaBaseVariableType BaseVariableType) {
if (Allocas.empty())
return;
- // Sort by decreasing alignment. This does not really matter at the moment,
- // but will allow compacting stack allocation when we fuse to one alloca.
+ // Sort by decreasing alignment.
std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) {
auto *A1 = llvm::dyn_cast<InstAlloca>(I1);
auto *A2 = llvm::dyn_cast<InstAlloca>(I2);
return A1->getAlignInBytes() > A2->getAlignInBytes();
});
+ // Process the allocas in order of decreasing stack alignment. This allows
+ // us to pack less-aligned pieces after more-aligned ones, resulting in less
+ // stack growth. It also allows there to be at most one stack alignment "and"
+ // instruction for a whole list of allocas.
+ uint32_t CurrentOffset = 0;
+ CfgVector<int32_t> Offsets;
for (Inst *Instr : Allocas) {
auto *Alloca = llvm::cast<InstAlloca>(Instr);
- // Move the alloca to its sorted position.
- InstAlloca *NewAlloca =
- InstAlloca::create(this, Alloca->getSizeInBytes(),
- Alloca->getAlignInBytes(), Alloca->getDest());
- if (IsKnownFrameOffset)
- NewAlloca->setKnownFrameOffset();
- Insts.push_front(NewAlloca);
+ // Adjust the size of the allocation up to the next multiple of the
+ // object's alignment.
+ uint32_t Alignment = std::max(Alloca->getAlignInBytes(), 1u);
+ auto *ConstSize =
+ llvm::dyn_cast<ConstantInteger32>(Alloca->getSizeInBytes());
+ uint32_t Size = Utils::applyAlignment(ConstSize->getValue(), Alignment);
+ if (BaseVariableType == FramePointer) {
+ // Addressing is relative to the frame pointer. Subtract the offset after
+ // adding the size of the alloca, because it grows downwards from the
+ // frame pointer.
+ Offsets.push_back(-(CurrentOffset + Size));
+ } else {
+ // Addressing is relative to the stack pointer or to a user pointer. Add
+ // the offset before adding the size of the object, because it grows
+ // upwards from the stack pointer.
+ Offsets.push_back(CurrentOffset);
+ }
+ // Update the running offset of the fused alloca region.
+ CurrentOffset += Size;
+ }
+ // Round the offset up to the alignment granularity to use as the size.
+ uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment);
+ // Ensure every alloca was assigned an offset.
+ assert(Allocas.size() == Offsets.size());
+ Variable *BaseVariable = makeVariable(IceType_i32);
+ Variable *AllocaDest = BaseVariable;
+ // Emit one addition for each alloca after the first.
+ for (size_t i = 0; i < Allocas.size(); ++i) {
+ auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
+ switch (BaseVariableType) {
+ case FramePointer:
+ case UserPointer: {
+ // Emit a new addition operation to replace the alloca.
+ Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]);
+ InstArithmetic *Add =
+ InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(),
+ BaseVariable, AllocaOffset);
+ Insts.push_front(Add);
+ } break;
+ case StackPointer: {
+ // Emit a fake definition of the rematerializable variable.
+ Variable *Dest = Alloca->getDest();
+ InstFakeDef *Def = InstFakeDef::create(this, Dest);
+ Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]);
+ Insts.push_front(Def);
+ } break;
+ }
Alloca->setDeleted();
}
+ Operand *AllocaSize = Ctx->getConstantInt32(TotalSize);
+ switch (BaseVariableType) {
+ case FramePointer: {
+ // Adjust the return of the alloca to the top of the returned region.
+ AllocaDest = makeVariable(IceType_i32);
+ InstArithmetic *Add = InstArithmetic::create(
+ this, InstArithmetic::Add, BaseVariable, AllocaDest, AllocaSize);
+ Insts.push_front(Add);
+ } break;
+ case StackPointer: {
+ // Emit a fake use to keep the Alloca live.
+ InstFakeUse *Use = InstFakeUse::create(this, AllocaDest);
+ Insts.push_front(Use);
+ } break;
+ case UserPointer:
+ break;
+ }
+ // And insert the fused alloca.
+ InstAlloca *CombinedAlloca =
+ InstAlloca::create(this, AllocaSize, CombinedAlignment, AllocaDest);
+ CombinedAlloca->setKnownFrameOffset();
+ Insts.push_front(CombinedAlloca);
}
void Cfg::processAllocas() {
@@ -486,51 +573,71 @@ void Cfg::processAllocas() {
CfgVector<Inst *> AlignedAllocas;
// LLVM enforces power of 2 alignment.
assert(llvm::isPowerOf2_32(StackAlignment));
- // Collect the Allocas into the two vectors.
- bool RequiresFramePointer = false;
+ // Determine if there are large alignment allocations in the entry block or
+ // dynamic allocations (variable size in the entry block).
+ bool HasLargeAlignment = false;
+ bool HasDynamicAllocation = false;
for (Inst &Instr : EntryNode->getInsts()) {
if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
- if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) {
- // Variable-sized allocations require a frame pointer.
- RequiresFramePointer = true;
- continue;
- }
uint32_t AlignmentParam = Alloca->getAlignInBytes();
- // For default align=0, set it to the real value 1, to avoid any
- // bit-manipulation problems below.
- AlignmentParam = std::max(AlignmentParam, 1u);
- assert(llvm::isPowerOf2_32(AlignmentParam));
- if (AlignmentParam > StackAlignment) {
- // Allocations aligned more than the stack require a frame pointer.
- RequiresFramePointer = true;
- AlignedAllocas.push_back(Alloca);
- } else
- FixedAllocas.push_back(Alloca);
+ if (AlignmentParam > StackAlignment)
+ HasLargeAlignment = true;
+ if (!llvm::isa<Constant>(Alloca->getSizeInBytes()))
+ HasDynamicAllocation = true;
}
}
- // Look for alloca instructions in other blocks
+ // Any alloca outside the entry block is a dynamic allocation.
for (CfgNode *Node : Nodes) {
if (Node == EntryNode)
continue;
for (Inst &Instr : Node->getInsts()) {
if (llvm::isa<InstAlloca>(&Instr)) {
// Allocations outside the entry block require a frame pointer.
- RequiresFramePointer = true;
+ HasDynamicAllocation = true;
break;
}
}
- if (RequiresFramePointer)
+ if (HasDynamicAllocation)
break;
}
// Mark the target as requiring a frame pointer.
- if (RequiresFramePointer)
+ if (HasLargeAlignment || HasDynamicAllocation)
getTarget()->setHasFramePointer();
+ // Collect the Allocas into the two vectors.
+ // Maximum alignment used for the dynamic/aligned allocas.
+ uint32_t MaxAlignment = StackAlignment;
+ for (Inst &Instr : EntryNode->getInsts()) {
+ if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
+ if (!llvm::isa<Constant>(Alloca->getSizeInBytes()))
+ continue;
+ uint32_t AlignmentParam = Alloca->getAlignInBytes();
+ // For default align=0, set it to the real value 1, to avoid any
+ // bit-manipulation problems below.
+ AlignmentParam = std::max(AlignmentParam, 1u);
+ assert(llvm::isPowerOf2_32(AlignmentParam));
+ if (HasDynamicAllocation && AlignmentParam > StackAlignment) {
+ // If we have both dynamic allocations and large stack alignments,
+ // high-alignment allocations are pulled out with their own base.
+ AlignedAllocas.push_back(Alloca);
+ } else
Jim Stichnoth 2015/11/11 17:39:44 } else { (for consistency)
sehr 2015/11/11 22:14:10 Done.
+ FixedAllocas.push_back(Alloca);
+ MaxAlignment = std::max(AlignmentParam, MaxAlignment);
+ }
+ }
// Add instructions to the head of the entry block in reverse order.
InstList &Insts = getEntryNode()->getInsts();
- // Fixed, large alignment alloca addresses do not have known offset.
- sortAllocas(AlignedAllocas, Insts, false);
- // Fixed, small alignment alloca addresses have known offset.
- sortAllocas(FixedAllocas, Insts, true);
+ if (HasDynamicAllocation && HasLargeAlignment) {
+ // We are using a frame pointer, but fixed large-alignment alloca addresses,
+ // do not have a known offset from either the stack or frame pointer.
+ // They grow up from a user pointer from an alloca.
+ sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, UserPointer);
+ }
+ // Otherwise, fixed size allocas are always addressed relative to the stack
+ // unless there are dynamic allocas.
+ // TODO(sehr): re-enable frame pointer and decrementing addressing.
+ AllocaBaseVariableType BasePointerType =
+ (HasDynamicAllocation ? UserPointer : StackPointer);
+ sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
}
void Cfg::doAddressOpt() {

Powered by Google App Engine
This is Rietveld 408576698