Chromium Code Reviews| Index: src/IceCfg.cpp |
| diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp |
| index 5797fdcaf6375139dc1dc611e27792fdbba838cf..739af3fe9aa5f5efa7caf465ae8c34b40921d0db 100644 |
| --- a/src/IceCfg.cpp |
| +++ b/src/IceCfg.cpp |
| @@ -462,10 +462,151 @@ void Cfg::genFrame() { |
| getTarget()->addEpilog(Node); |
| } |
| -// This is a lightweight version of live-range-end calculation. Marks |
| -// the last use of only those variables whose definition and uses are |
| -// completely with a single block. It is a quick single pass and |
| -// doesn't need to iterate until convergence. |
| +// Use Tarjn's strongly connected components algorithm to identify outermost to |
|
jvoung (off chromium)
2015/09/01 18:56:18
Tarjan's
ascull
2015/09/03 19:52:37
Done.
|
| +// innermost loops. By deleting the head of the loop from the graph, inner |
| +// loops can be found. This assumes that the head node is not shared between |
| +// loops but instead all paths to the head come from 'continue' constructs. |
| +// |
| +// This only computes the loop nest depth within the function and does not take |
| +// into account whether the function was called from within a loop. |
| +void Cfg::computeLoopNestDepth() { |
|
Jim Stichnoth
2015/09/01 22:17:37
Add a TimerMarker here (and add a new marker type
ascull
2015/09/03 19:52:37
Done.
|
| + using index_t = uint32_t; |
|
Jim Stichnoth
2015/09/01 22:17:37
Type names should be capitalized per LLVM coding s
ascull
2015/09/03 19:52:37
Done.
|
| + constexpr index_t UndefinedIndex = 0; |
| + |
| + struct LoopNode { |
|
Jim Stichnoth
2015/09/01 22:17:37
I think if you are defining methods, you should ca
ascull
2015/09/03 19:52:37
Done.
|
| + LoopNode() = delete; |
| + LoopNode operator=(const LoopNode &) = delete; |
| + |
| + explicit LoopNode(CfgNode *BB) : BB(BB) {} |
| + LoopNode(const LoopNode &) = default; |
| + |
| + void reset() { |
| + Succ = BB->getOutEdges().begin(); |
| + Index = LowLink = UndefinedIndex; |
| + OnStack = false; |
| + } |
| + |
| + void visit(index_t VisitIndex) { Index = LowLink = VisitIndex; } |
| + bool isVisited() const { return Index != UndefinedIndex; } |
| + |
| + void setDeleted() { deleted = true; } |
| + bool isDeleted() const { return deleted; } |
| + |
| + CfgNode *BB; |
| + NodeList::const_iterator Succ; |
|
Jim Stichnoth
2015/09/01 22:17:37
Initialize members in the ctor or in the same way
ascull
2015/09/03 19:52:37
Done.
|
| + index_t Index; |
| + index_t LowLink; |
| + bool OnStack; |
| + bool deleted = false; |
|
jvoung (off chromium)
2015/09/01 18:56:17
capitalize deleted to follow code style
ascull
2015/09/03 19:52:37
Done.
|
| + }; |
| + |
| + // Create a collection of decorated nodes in the same order as Nodes which |
| + // means the node's index will be valid. |
| + std::vector<LoopNode, CfgLocalAllocator<LoopNode>> AllNodes; |
| + // This is used as a replacement for the call stack |
| + std::vector<LoopNode *, CfgLocalAllocator<LoopNode *>> WorkStack; |
| + // Track which loop a node belongs to |
| + std::vector<LoopNode *, CfgLocalAllocator<LoopNode *>> LoopStack; |
| + |
| + // Allocate memory ahead of time. This is why a vector is used instead of a |
| + // stack which doesn not support reserving (or bulk erasure used below). |
|
jvoung (off chromium)
2015/09/01 18:56:18
doesn -> doesn't
ascull
2015/09/03 19:52:37
Done.
|
| + AllNodes.reserve(Nodes.size()); |
| + WorkStack.reserve(Nodes.size()); |
| + LoopStack.reserve(Nodes.size()); |
| + |
| + // Load in initial data |
| + for (CfgNode *Node : Nodes) |
| + AllNodes.emplace_back(Node); |
| + |
| + size_t NumDeletedNodes = 0; |
|
Jim Stichnoth
2015/09/01 22:17:37
SizeT
or, if you object to that,
std::vector<Loop
ascull
2015/09/03 19:52:37
Done.
|
| + |
| + while (NumDeletedNodes < AllNodes.size()) { |
| + // Prepare to run Tarjan's |
| + index_t NextIndex = 1; |
|
jvoung (off chromium)
2015/09/01 18:56:18
nit: How about UndefinedIndex + 1, to ensure the i
ascull
2015/09/03 19:52:37
Done.
|
| + for (LoopNode &Node : AllNodes) |
| + Node.reset(); |
|
jvoung (off chromium)
2015/09/01 18:56:18
Wonder if you should add an Ice timer for this who
ascull
2015/09/03 19:52:37
Done.
|
| + |
| + assert(WorkStack.empty()); |
| + assert(LoopStack.empty()); |
| + |
| + for (LoopNode &Node : AllNodes) { |
| + if (Node.isDeleted() || Node.isVisited()) |
| + continue; |
| + |
| + WorkStack.push_back(&Node); |
| + |
| + continue_work_loop: |
| + while (!WorkStack.empty()) { |
| + LoopNode &WorkNode = *WorkStack.back(); |
| + WorkStack.pop_back(); |
| + |
| + if (!WorkNode.isVisited()) { |
| + WorkNode.visit(NextIndex++); |
| + LoopStack.push_back(&WorkNode); |
| + WorkNode.OnStack = true; |
| + } else { |
| + // Returning to a node after having recursed into Succ so continue |
| + // iterating through successors after using the Succ.LowLink value |
| + // that was computed in the recursion. |
| + LoopNode &Succ = AllNodes[(*WorkNode.Succ)->getIndex()]; |
| + WorkNode.LowLink = std::min(WorkNode.LowLink, Succ.LowLink); |
| + ++WorkNode.Succ; |
| + } |
| + |
| + // Visit the successors and recurse into unvisited nodes. The recursion |
| + // could cause the iteration to be suspended but it will resume as the |
| + // stack is unwound. |
| + auto SuccEnd = WorkNode.BB->getOutEdges().end(); |
| + for (; WorkNode.Succ != SuccEnd; ++WorkNode.Succ) { |
| + LoopNode &Succ = AllNodes[(*WorkNode.Succ)->getIndex()]; |
| + |
| + if (Succ.isDeleted()) |
| + continue; |
| + |
| + if (!Succ.isVisited()) { |
| + WorkStack.insert(WorkStack.end(), {&WorkNode, &Succ}); |
| + goto continue_work_loop; // continue outer loop |
|
jvoung (off chromium)
2015/09/01 18:56:18
You might be able to get away without a goto, if y
ascull
2015/09/03 19:52:37
I refactored the loop stuff out into it's own clas
|
| + } else if (Succ.OnStack) { |
| + WorkNode.LowLink = std::min(WorkNode.LowLink, Succ.Index); |
| + } |
| + } |
| + |
| + // Check for a loop being closed |
| + if (WorkNode.LowLink == WorkNode.Index) { |
| + // Single node loops mean no loop in a pure CFG. After lowering there |
| + // may be reentrancy but we ignore that here. |
|
jvoung (off chromium)
2015/09/01 18:56:18
What do you mean by reentrancy here? Branches from
ascull
2015/09/03 19:52:37
That is what I meant, although it isn't a useful c
|
| + if (LoopStack.back() == &WorkNode) { |
| + LoopStack.back()->OnStack = false; |
| + LoopStack.back()->setDeleted(); |
| + ++NumDeletedNodes; |
| + LoopStack.pop_back(); |
| + continue; |
| + } |
| + |
| + // Reaching here means a loop has been found! It consists of the |
| + // nodes on the top of the stack, down until the current node being |
| + // processed, WorkNode, is found. |
| + for (auto It = LoopStack.rbegin(); It != LoopStack.rend(); ++It) { |
| + (*It)->OnStack = false; |
| + (*It)->BB->incrementLoopNestDepth(); |
| + // Remove the loop from the stack and delete the head node |
| + if (*It == &WorkNode) { |
| + (*It)->setDeleted(); |
| + ++NumDeletedNodes; |
| + LoopStack.erase(It.base() - 1, LoopStack.end()); |
| + break; |
| + } |
| + } |
| + } |
| + } |
| + } |
| + } |
| +} |
| + |
| +// This is a lightweight version of live-range-end calculation. Marks the last |
| +// use of only those variables whose definition and uses are completely with a |
| +// single block. It is a quick single pass and doesn't need to iterate until |
| +// convergence. |
| void Cfg::livenessLightweight() { |
| TimerMarker T(TimerStack::TT_livenessLightweight, this); |
| getVMetadata()->init(VMK_Uses); |
| @@ -606,12 +747,11 @@ bool Cfg::validateLiveness() const { |
| } |
| void Cfg::contractEmptyNodes() { |
| - // If we're decorating the asm output with register liveness info, |
| - // this information may become corrupted or incorrect after |
| - // contracting nodes that contain only redundant assignments. As |
| - // such, we disable this pass when DecorateAsm is specified. This |
| - // may make the resulting code look more branchy, but it should have |
| - // no effect on the register assignments. |
| + // If we're decorating the asm output with register liveness info, this |
| + // information may become corrupted or incorrect after contracting nodes that |
| + // contain only redundant assignments. As such, we disable this pass when |
| + // DecorateAsm is specified. This may make the resulting code look more |
| + // branchy, but it should have no effect on the register assignments. |
| if (Ctx->getFlags().getDecorateAsm()) |
| return; |
| for (CfgNode *Node : Nodes) { |