Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Unified Diff: src/IceCfg.cpp

Issue 1411583007: Combine allocas (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Finish fast path. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/IceCfg.h ('k') | src/IceInstX8632.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceCfg.cpp
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 498c5e9da2d08dda4c409d90af446f2819a95bfa..0735146572267989262fdddf5c2791b0b8d0681b 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -201,8 +201,6 @@ void Cfg::translate() {
if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var))
Var64On32->initHiLo(this);
- processAllocas();
-
// The set of translation passes and their order are determined by the
// target.
getTarget()->translate();
@@ -451,86 +449,180 @@ void Cfg::doArgLowering() {
getTarget()->lowerArguments();
}
-void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts,
- bool IsKnownFrameOffset) {
+void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
+ uint32_t CombinedAlignment, InstList &Insts,
+ AllocaBaseVariableType BaseVariableType) {
if (Allocas.empty())
return;
- // Sort by decreasing alignment. This does not really matter at the moment,
- // but will allow compacting stack allocation when we fuse to one alloca.
+ // Sort by decreasing alignment.
std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) {
auto *A1 = llvm::dyn_cast<InstAlloca>(I1);
auto *A2 = llvm::dyn_cast<InstAlloca>(I2);
return A1->getAlignInBytes() > A2->getAlignInBytes();
});
+ // Process the allocas in order of decreasing stack alignment. This allows
+ // us to pack less-aligned pieces after more-aligned ones, resulting in less
+ // stack growth. It also allows there to be at most one stack alignment "and"
+ // instruction for a whole list of allocas.
+ uint32_t CurrentOffset = 0;
+ CfgVector<int32_t> Offsets;
for (Inst *Instr : Allocas) {
auto *Alloca = llvm::cast<InstAlloca>(Instr);
- // Move the alloca to its sorted position.
- InstAlloca *NewAlloca =
- InstAlloca::create(this, Alloca->getSizeInBytes(),
- Alloca->getAlignInBytes(), Alloca->getDest());
- if (IsKnownFrameOffset)
- NewAlloca->setKnownFrameOffset();
- Insts.push_front(NewAlloca);
+ // Adjust the size of the allocation up to the next multiple of the
+ // object's alignment.
+ uint32_t Alignment = std::max(Alloca->getAlignInBytes(), 1u);
+ auto *ConstSize =
+ llvm::dyn_cast<ConstantInteger32>(Alloca->getSizeInBytes());
+ uint32_t Size = Utils::applyAlignment(ConstSize->getValue(), Alignment);
+ if (BaseVariableType == BVT_FramePointer) {
+ // Addressing is relative to the frame pointer. Subtract the offset after
+ // adding the size of the alloca, because it grows downwards from the
+ // frame pointer.
+ Offsets.push_back(-(CurrentOffset + Size));
+ } else {
+ // Addressing is relative to the stack pointer or to a user pointer. Add
+ // the offset before adding the size of the object, because it grows
+ // upwards from the stack pointer.
+ Offsets.push_back(CurrentOffset);
+ }
+ // Update the running offset of the fused alloca region.
+ CurrentOffset += Size;
+ }
+ // Round the offset up to the alignment granularity to use as the size.
+ uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment);
+ // Ensure every alloca was assigned an offset.
+ assert(Allocas.size() == Offsets.size());
+ Variable *BaseVariable = makeVariable(IceType_i32);
+ Variable *AllocaDest = BaseVariable;
+ // Emit one addition for each alloca after the first.
+ for (size_t i = 0; i < Allocas.size(); ++i) {
+ auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
+ switch (BaseVariableType) {
+ case BVT_FramePointer:
+ case BVT_UserPointer: {
+ // Emit a new addition operation to replace the alloca.
+ Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]);
+ InstArithmetic *Add =
+ InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(),
+ BaseVariable, AllocaOffset);
+ Insts.push_front(Add);
+ } break;
+ case BVT_StackPointer: {
+ // Emit a fake definition of the rematerializable variable.
+ Variable *Dest = Alloca->getDest();
+ InstFakeDef *Def = InstFakeDef::create(this, Dest);
+ Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]);
+ Insts.push_front(Def);
+ } break;
+ }
Alloca->setDeleted();
}
+ Operand *AllocaSize = Ctx->getConstantInt32(TotalSize);
+ switch (BaseVariableType) {
+ case BVT_FramePointer: {
+ // Adjust the return of the alloca to the top of the returned region.
+ AllocaDest = makeVariable(IceType_i32);
+ InstArithmetic *Add = InstArithmetic::create(
+ this, InstArithmetic::Add, BaseVariable, AllocaDest, AllocaSize);
+ Insts.push_front(Add);
+ } break;
+ case BVT_StackPointer: {
+ // Emit a fake use to keep the Alloca live.
+ InstFakeUse *Use = InstFakeUse::create(this, AllocaDest);
+ Insts.push_front(Use);
+ } break;
+ case BVT_UserPointer:
+ break;
+ }
+ // And insert the fused alloca.
+ InstAlloca *CombinedAlloca =
+ InstAlloca::create(this, AllocaSize, CombinedAlignment, AllocaDest);
+ CombinedAlloca->setKnownFrameOffset();
+ Insts.push_front(CombinedAlloca);
}
-void Cfg::processAllocas() {
+void Cfg::processAllocas(bool SortAndCombine) {
const uint32_t StackAlignment = getTarget()->getStackAlignment();
CfgNode *EntryNode = getEntryNode();
- // Allocas in the entry block that have constant size and alignment less
- // than or equal to the function's stack alignment.
- CfgVector<Inst *> FixedAllocas;
- // Allocas in the entry block that have constant size and alignment greater
- // than the function's stack alignment.
- CfgVector<Inst *> AlignedAllocas;
// LLVM enforces power of 2 alignment.
assert(llvm::isPowerOf2_32(StackAlignment));
- // Collect the Allocas into the two vectors.
- bool RequiresFramePointer = false;
+ // Determine if there are large alignment allocations in the entry block or
+ // dynamic allocations (variable size in the entry block).
+ bool HasLargeAlignment = false;
+ bool HasDynamicAllocation = false;
for (Inst &Instr : EntryNode->getInsts()) {
if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
- if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) {
- // Variable-sized allocations require a frame pointer.
- RequiresFramePointer = true;
- continue;
- }
uint32_t AlignmentParam = Alloca->getAlignInBytes();
- // For default align=0, set it to the real value 1, to avoid any
- // bit-manipulation problems below.
- AlignmentParam = std::max(AlignmentParam, 1u);
- assert(llvm::isPowerOf2_32(AlignmentParam));
- if (AlignmentParam > StackAlignment) {
- // Allocations aligned more than the stack require a frame pointer.
- RequiresFramePointer = true;
- AlignedAllocas.push_back(Alloca);
- } else
- FixedAllocas.push_back(Alloca);
+ if (AlignmentParam > StackAlignment)
+ HasLargeAlignment = true;
+ if (llvm::isa<Constant>(Alloca->getSizeInBytes()))
+ Alloca->setKnownFrameOffset();
+ else
+ HasDynamicAllocation = true;
}
}
- // Look for alloca instructions in other blocks
+ // Don't do the heavyweight sorting and layout for low optimization levels.
+ if (!SortAndCombine)
+ return;
+ // Any alloca outside the entry block is a dynamic allocation.
for (CfgNode *Node : Nodes) {
if (Node == EntryNode)
continue;
for (Inst &Instr : Node->getInsts()) {
if (llvm::isa<InstAlloca>(&Instr)) {
// Allocations outside the entry block require a frame pointer.
- RequiresFramePointer = true;
+ HasDynamicAllocation = true;
break;
}
}
- if (RequiresFramePointer)
+ if (HasDynamicAllocation)
break;
}
// Mark the target as requiring a frame pointer.
- if (RequiresFramePointer)
+ if (HasLargeAlignment || HasDynamicAllocation)
getTarget()->setHasFramePointer();
+ // Collect the Allocas into the two vectors.
+ // Allocas in the entry block that have constant size and alignment less
+ // than or equal to the function's stack alignment.
+ CfgVector<Inst *> FixedAllocas;
+ // Allocas in the entry block that have constant size and alignment greater
+ // than the function's stack alignment.
+ CfgVector<Inst *> AlignedAllocas;
+ // Maximum alignment used for the dynamic/aligned allocas.
+ uint32_t MaxAlignment = StackAlignment;
+ for (Inst &Instr : EntryNode->getInsts()) {
+ if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
+ if (!llvm::isa<Constant>(Alloca->getSizeInBytes()))
+ continue;
+ uint32_t AlignmentParam = Alloca->getAlignInBytes();
+ // For default align=0, set it to the real value 1, to avoid any
+ // bit-manipulation problems below.
+ AlignmentParam = std::max(AlignmentParam, 1u);
+ assert(llvm::isPowerOf2_32(AlignmentParam));
+ if (HasDynamicAllocation && AlignmentParam > StackAlignment) {
+ // If we have both dynamic allocations and large stack alignments,
+ // high-alignment allocations are pulled out with their own base.
+ AlignedAllocas.push_back(Alloca);
+ } else {
+ FixedAllocas.push_back(Alloca);
+ }
+ MaxAlignment = std::max(AlignmentParam, MaxAlignment);
+ }
+ }
// Add instructions to the head of the entry block in reverse order.
InstList &Insts = getEntryNode()->getInsts();
- // Fixed, large alignment alloca addresses do not have known offset.
- sortAllocas(AlignedAllocas, Insts, false);
- // Fixed, small alignment alloca addresses have known offset.
- sortAllocas(FixedAllocas, Insts, true);
+ if (HasDynamicAllocation && HasLargeAlignment) {
+ // We are using a frame pointer, but fixed large-alignment alloca addresses,
+ // do not have a known offset from either the stack or frame pointer.
+ // They grow up from a user pointer from an alloca.
+ sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, BVT_UserPointer);
+ }
+ // Otherwise, fixed size allocas are always addressed relative to the stack
+ // unless there are dynamic allocas.
+ // TODO(sehr): re-enable frame pointer and decrementing addressing.
+ AllocaBaseVariableType BasePointerType =
+ (HasDynamicAllocation ? BVT_UserPointer : BVT_StackPointer);
+ sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
}
void Cfg::doAddressOpt() {
« no previous file with comments | « src/IceCfg.h ('k') | src/IceInstX8632.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698