Chromium Code Reviews| Index: src/IceCfgNode.cpp |
| diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp |
| index a1804d5cdf4a30f477820d25b80ef6d1a7b7bb9e..925500ed9c15c5b17dd547299b0fe7e52284dc64 100644 |
| --- a/src/IceCfgNode.cpp |
| +++ b/src/IceCfgNode.cpp |
| @@ -24,7 +24,7 @@ namespace Ice { |
| CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber, IceString Name) |
| : Func(Func), Number(LabelNumber), Name(Name), HasReturn(false), |
| - InstCountEstimate(0) {} |
| + NeedsPlacement(false), InstCountEstimate(0) {} |
| // Returns the name the node was created with. If no name was given, |
| // it synthesizes a (hopefully) unique name. |
| @@ -204,6 +204,269 @@ void CfgNode::deletePhis() { |
| I->setDeleted(); |
| } |
| +// Splits the edge from Pred to this node by creating a new node and |
| +// hooking up the in and out edges appropriately. (The EdgeIndex |
| +// parameter is only used to make the new node's name unique when |
| +// there are multiple edges between the same pair of nodes.) The new |
| +// node's instruction list is initialized to the empty list, with no |
| +// terminator instruction. If there are multiple edges from Pred to |
| +// this node, only one edge is split, and the particular choice of |
| +// edge is undefined. This could happen with a switch instruction, or |
| +// a conditional branch that weirdly has both branches to the same |
| +// place. TODO(stichnot,kschimpf): Figure out whether this is legal |
| +// in the LLVM IR or the PNaCl bitcode, and if so, we need to |
| +// establish a strong relationship among the ordering of Pred's |
| +// out-edge list, this node's in-edge list, and the Phi instruction's |
|
jvoung (off chromium)
2014/10/27 22:12:20
So if this happens, the phi lowering might be wron
Jim Stichnoth
2014/10/28 01:20:14
After thinking about this, my belief is that if th
|
| +// operand list. |
| +CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) { |
| + CfgNode *NewNode = |
| + Func->makeNode("split_" + Pred->getName() + "_" + getName() + "_" + |
| + std::to_string(EdgeIndex)); |
| + // The new node is added to the end of the node list, and will later |
| + // need to be sorted into a reasonable topological order. |
| + NewNode->setNeedsPlacement(true); |
| + // Repoint Pred's out-edge. |
| + bool Found = false; |
| + for (auto I = Pred->OutEdges.begin(), E = Pred->OutEdges.end(); |
| + !Found && I != E; ++I) { |
| + if (*I == this) { |
| + *I = NewNode; |
| + NewNode->InEdges.push_back(Pred); |
| + Found = true; |
| + } |
| + } |
| + assert(Found); |
| + // Repoint this node's in-edge. |
| + Found = false; |
| + for (auto I = InEdges.begin(), E = InEdges.end(); !Found && I != E; ++I) { |
| + if (*I == Pred) { |
| + *I = NewNode; |
| + NewNode->OutEdges.push_back(this); |
| + Found = true; |
| + } |
| + } |
| + assert(Found); |
| + // Repoint a suitable branch instruction's target. |
| + Found = false; |
| + for (auto I = Pred->getInsts().rbegin(), E = Pred->getInsts().rend(); |
| + !Found && I != E; ++I) { |
| + if (!(*I)->isDeleted()) { |
| + Found = (*I)->repointEdge(this, NewNode); |
| + } |
| + } |
| + assert(Found); |
| + return NewNode; |
| +} |
| + |
| +namespace { |
| + |
| +// Helper function used by advancedPhiLowering(). |
| +bool sameVarOrReg(const Variable *Var, const Operand *Opnd) { |
| + if (Var == Opnd) |
| + return true; |
| + if (const auto Var2 = llvm::dyn_cast<Variable>(Opnd)) { |
| + if (Var->hasReg() && Var->getRegNum() == Var2->getRegNum()) |
| + return true; |
| + } |
| + return false; |
| +} |
| + |
| +} // end of anonymous namespace |
| + |
| +// This the "advanced" version of Phi lowering for a basic block, in |
| +// contrast to the simple version that lowers through assignments |
| +// involving temporaries. |
| +// |
| +// All Phi instructions in a basic block are conceptually executed in |
| +// parallel. However, if we lower Phis early and commit to a |
| +// sequential ordering, we may end up creating unnecessary |
| +// interferences which lead to worse register allocation. Delaying |
| +// Phi scheduling until after register allocation can helps unless |
|
jvoung (off chromium)
2014/10/27 22:12:21
"can helps" -> "can help"
Jim Stichnoth
2014/10/28 01:20:14
Done.
|
| +// there are no free registers for shuffling registers or stack slots |
| +// and spilling becomes necessary. |
| +// |
| +// The advanced Phi lowering starts by finding a topological sort of |
| +// the Phi instructions, where "A=B" comes before "B=C" due to the |
| +// anti-dependence on B. If a topological sort is not possible due to |
| +// a cycle, the cycle is broken by introducing a non-parallel |
| +// temporary. For example, a cycle arising from a permutation like |
| +// "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being equal, |
| +// prefer to schedule assignments with register-allocated Src operands |
| +// earlier, in case that register becomes free afterwards; and prefer |
|
jvoung (off chromium)
2014/10/27 22:12:21
", and" instead of "; and" ?
Jim Stichnoth
2014/10/28 01:20:14
Done.
|
| +// to schedule assignments with register-allocated Dest variables |
| +// later, to keep that register free for longer. |
| +// |
| +// Once the ordering is determined, the Cfg edge is split and the |
| +// assignment list is lowered by the target lowering layer. The |
| +// specific placement of the new node within the Cfg node list is |
| +// deferred until later, including after empty node contraction. |
| +void CfgNode::advancedPhiLowering() { |
| + if (getPhis().empty()) |
| + return; |
| + |
| + // Count the number of non-deleted Phi instructions. |
| + struct { |
| + InstPhi *Phi; |
| + Variable *Dest; |
| + Operand *Src; |
| + bool Processed; |
| + size_t NumPred; // number of entries whose Src is this Dest |
| + int32_t Weight; // preference for topological order |
| + } Desc[getPhis().size()]; |
| + |
| + size_t NumPhis = 0; |
| + for (InstPhi *Inst : getPhis()) { |
| + if (!Inst->isDeleted()) { |
| + Desc[NumPhis].Phi = Inst; |
| + Desc[NumPhis].Dest = Inst->getDest(); |
| + ++NumPhis; |
| + } |
| + } |
| + if (NumPhis == 0) |
| + return; |
| + |
| + SizeT InEdgeIndex = 0; |
| + for (CfgNode *Pred : InEdges) { |
| + CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); |
| + AssignList Assignments; |
| + |
| + // First pass computes Src and initializes NumPred. |
| + for (size_t I = 0; I < NumPhis; ++I) { |
| + Desc[I].Src = Desc[I].Phi->getOperandForTarget(Pred); |
| + Desc[I].Processed = false; |
| + Desc[I].NumPred = 0; |
| + } |
| + // Second pass computes NumPred by comparing every pair of Phi |
| + // instructions. |
| + for (size_t I = 0; I < NumPhis; ++I) { |
| + if (Desc[I].Processed) |
|
jvoung (off chromium)
2014/10/27 22:12:21
It wasn't clear to me why Processed would be true
Jim Stichnoth
2014/10/28 01:20:14
You're right - I think I over-conservatively added
Jim Stichnoth
2014/10/30 00:25:25
BTW, these checks are now back after moving the re
|
| + continue; |
| + const Variable *Dest = Desc[I].Dest; |
| + for (size_t J = 0; J < NumPhis; ++J) { |
| + if (Desc[J].Processed) |
| + continue; |
| + if (I != J) { |
| + // There shouldn't be two Phis with the same Dest variable |
| + // or register. |
| + assert(!sameVarOrReg(Dest, Desc[J].Dest)); |
| + } |
| + const Operand *Src = Desc[J].Src; |
| + if (sameVarOrReg(Dest, Src)) |
| + ++Desc[I].NumPred; |
| + } |
| + } |
| + |
| + // Another pass to compute initial Weight values. |
| + |
| + // Always pick NumPred=0 over NumPred>0. |
| + const int32_t WeightNoPreds = 4; |
| + // Prefer Src as a register because the register might free up. |
| + const int32_t WeightSrcIsReg = 2; |
| + // Prefer Dest not as a register because the register stays free |
| + // longer. |
| + const int32_t WeightDestNotReg = 1; |
| + |
| + for (size_t I = 0; I < NumPhis; ++I) { |
| + if (Desc[I].Processed) |
| + continue; |
| + int32_t Weight = 0; |
| + if (Desc[I].NumPred == 0) |
| + Weight += WeightNoPreds; |
| + if (auto Var = llvm::dyn_cast<Variable>(Desc[I].Src)) |
| + if (Var->hasReg()) |
| + Weight += WeightSrcIsReg; |
| + if (!Desc[I].Dest->hasReg()) |
| + Weight += WeightDestNotReg; |
| + Desc[I].Weight = Weight; |
| + } |
| + |
| + // Repeatedly choose and process the best candidate in the |
| + // topological sort, until no candidates remain. This |
| + // implementation is O(N^2) where N is the number of Phi |
| + // instructions, but with a small constant factor compared to a |
| + // likely implementation of O(N) topological sort. |
| + for (size_t Remaining = NumPhis; Remaining; --Remaining) { |
| + size_t BestIndex = 0; |
| + int32_t BestWeight = -1; |
| + // Find the best candidate. |
| + for (size_t I = 0; I < NumPhis; ++I) { |
| + if (Desc[I].Processed) |
| + continue; |
| + int32_t Weight = 0; |
| + Weight = Desc[I].Weight; |
| + if (Weight > BestWeight) { |
| + BestIndex = I; |
| + BestWeight = Weight; |
| + } |
| + } |
| + assert(BestWeight >= 0); |
| + assert(Desc[BestIndex].NumPred <= 1); |
| + Variable *Dest = Desc[BestIndex].Dest; |
| + Operand *Src = Desc[BestIndex].Src; |
| + // Break a cycle by introducing a temporary. Except don't |
| + // bother for a self-assignment, which looks like a self-cycle |
| + // with NumPred=1, since it can be left alone and will |
| + // eventually be removed as a redundant assignment. |
| + bool IsSelfAssignment = sameVarOrReg(Dest, Src); |
| + if (!IsSelfAssignment && Desc[BestIndex].NumPred) { |
| + bool Found = false; |
| + // If the target instruction "A=B" is part of a cycle, find |
| + // the "X=A" assignment in the cycle because it will have to |
| + // be rewritten as "X=tmp". |
| + for (size_t J = 0; !Found && J < NumPhis; ++J) { |
| + if (Desc[J].Processed) |
| + continue; |
| + Operand *OtherSrc = Desc[J].Src; |
| + if (Desc[J].NumPred && sameVarOrReg(Dest, OtherSrc)) { |
| + // Set Tmp's optional Name parameter to something like |
|
jvoung (off chromium)
2014/10/27 22:12:21
Did you want to do the name-setting here, or mark
Jim Stichnoth
2014/10/28 01:20:14
I meant it sort of as a weak TODO. But thinking a
|
| + // "__split" + std::to_string(Func->getNumVariables()) |
| + // for easier identification during debugging if desired. |
| + Variable *Tmp = Func->makeVariable(OtherSrc->getType()); |
| + Tmp->setNeedsStackSlot(); |
| + Assignments.push_back(InstAssign::create(Func, Tmp, OtherSrc)); |
| + Desc[J].Src = Tmp; |
| + Found = true; |
| + } |
| + } |
| + assert(Found); |
| + } |
| + // Now that a cycle (if any) has been broken, create the actual |
| + // assignment. Except if Dest and Src are syntactically the |
| + // same, don't bother adding the assignment, because in all |
| + // respects it would be redundant, and if Dest/Src are on the |
| + // stack, the target lowering may naively decide to lower it |
| + // using a temporary register. |
| + if (Dest != Src) |
| + Assignments.push_back(InstAssign::create(Func, Dest, Src)); |
| + // Update NumPred for all Phi assignments using this Phi's Src |
| + // as their Dest variable. Also update Weight if NumPred |
| + // dropped from 1 to 0. |
| + if (Variable *Var = llvm::dyn_cast<Variable>(Src)) { |
|
jvoung (off chromium)
2014/10/27 22:12:21
nit: could auto this too
Jim Stichnoth
2014/10/28 01:20:14
Done.
|
| + for (size_t I = 0; I < NumPhis; ++I) { |
| + if (Desc[I].Processed) |
| + continue; |
| + if (sameVarOrReg(Var, Desc[I].Dest)) { |
| + if (--Desc[I].NumPred == 0) |
| + Desc[I].Weight += WeightNoPreds; |
| + } |
| + } |
| + } |
| + Desc[BestIndex].Processed = true; |
| + } |
| + |
| + Func->getTarget()->lowerPhiAssignments(Split, Assignments); |
| + |
| + // Renumber the instructions to be monotonically increasing so |
| + // that addNode() doesn't assert when multi-definitions are added |
| + // out of order. |
| + Split->renumberInstructions(); |
| + Func->getVMetadata()->addNode(Split); |
| + } |
| + |
| + for (InstPhi *Inst : getPhis()) |
| + Inst->setDeleted(); |
| +} |
| + |
| // Does address mode optimization. Pass each instruction to the |
| // TargetLowering object. If it returns a new instruction |
| // (representing the optimized address mode), then insert the new |
| @@ -240,7 +503,7 @@ void CfgNode::doNopInsertion() { |
| void CfgNode::genCode() { |
| TargetLowering *Target = Func->getTarget(); |
| LoweringContext &Context = Target->getContext(); |
| - // Lower only the regular instructions. Defer the Phi instructions. |
| + // Lower the regular instructions. |
| Context.init(this); |
| while (!Context.atEnd()) { |
| InstList::iterator Orig = Context.getCur(); |
| @@ -250,6 +513,8 @@ void CfgNode::genCode() { |
| // Ensure target lowering actually moved the cursor. |
| assert(Context.getCur() != Orig); |
| } |
| + // Do preliminary lowering of the Phi instructions. |
| + Target->prelowerPhis(); |
| } |
| void CfgNode::livenessLightweight() { |
| @@ -300,7 +565,6 @@ bool CfgNode::liveness(Liveness *Liveness) { |
| Liveness->getLiveOut(this) = Live; |
| // Process regular instructions in reverse order. |
| - // TODO(stichnot): Use llvm::make_range with LLVM 3.5. |
| for (auto I = Insts.rbegin(), E = Insts.rend(); I != E; ++I) { |
| if ((*I)->isDeleted()) |
| continue; |
| @@ -309,13 +573,15 @@ bool CfgNode::liveness(Liveness *Liveness) { |
| // Process phis in forward order so that we can override the |
| // instruction number to be that of the earliest phi instruction in |
| // the block. |
| + SizeT NumNonDeadPhis = 0; |
| InstNumberT FirstPhiNumber = Inst::NumberSentinel; |
| for (InstPhi *I : Phis) { |
| if (I->isDeleted()) |
| continue; |
| if (FirstPhiNumber == Inst::NumberSentinel) |
| FirstPhiNumber = I->getNumber(); |
| - I->liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd); |
| + if (I->liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd)) |
| + ++NumNonDeadPhis; |
| } |
| // When using the sparse representation, after traversing the |
| @@ -350,9 +616,12 @@ bool CfgNode::liveness(Liveness *Liveness) { |
| // Add in current LiveIn |
| Live |= LiveIn; |
| // Check result, set LiveIn=Live |
| - Changed = (Live != LiveIn); |
| - if (Changed) |
| + SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this); |
| + bool LiveInChanged = (Live != LiveIn); |
| + Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged); |
| + if (LiveInChanged) |
| LiveIn = Live; |
| + PrevNumNonDeadPhis = NumNonDeadPhis; |
| return Changed; |
| } |
| @@ -472,6 +741,40 @@ void CfgNode::livenessPostprocess(LivenessMode Mode, Liveness *Liveness) { |
| } |
| } |
| +// If this node contains only deleted instructions, and ends in an |
| +// unconditional branch, contract the node by repointing all its |
| +// in-edges to its successor. |
| +void CfgNode::contractIfEmpty() { |
| + if (InEdges.size() == 0) |
| + return; |
| + Inst *Branch = NULL; |
| + for (Inst *I : Insts) { |
| + if (!I->isDeleted() && !I->isUnconditionalBranch()) |
| + return; |
| + Branch = I; |
| + } |
| + Branch->setDeleted(); |
| + assert(OutEdges.size() == 1); |
| + // Repoint all this node's in-edges to this node's successor. |
| + for (CfgNode *Pred : InEdges) { |
| + for (auto I = Pred->OutEdges.begin(), E = Pred->OutEdges.end(); I != E; |
| + ++I) { |
| + if (*I == this) { |
| + *I = OutEdges[0]; |
| + OutEdges[0]->InEdges.push_back(Pred); |
| + } |
| + } |
| + for (Inst *I : Pred->getInsts()) { |
| + if (!I->isDeleted()) |
| + I->repointEdge(this, OutEdges[0]); |
|
jvoung (off chromium)
2014/10/27 22:12:20
Could break out of the loop here too, once the edg
Jim Stichnoth
2014/10/28 01:20:14
E.g., there could be a lowered Switch instruction
jvoung (off chromium)
2014/10/29 13:46:50
Ah okay... I was looking at the way splitIncomingE
|
| + } |
| + } |
| + InEdges.clear(); |
| + // Don't bother removing the single out-edge, which would also |
| + // require finding the corresponding in-edge in the successor and |
| + // removing it. |
| +} |
| + |
| void CfgNode::doBranchOpt(const CfgNode *NextNode) { |
| TargetLowering *Target = Func->getTarget(); |
| // Check every instruction for a branch optimization opportunity. |
| @@ -479,8 +782,11 @@ void CfgNode::doBranchOpt(const CfgNode *NextNode) { |
| // first opportunity, unless there is some target lowering where we |
| // have the possibility of multiple such optimizations per block |
| // (currently not the case for x86 lowering). |
| - for (Inst *I : Insts) |
| - Target->doBranchOpt(I, NextNode); |
| + for (Inst *I : Insts) { |
| + if (!I->isDeleted()) { |
| + Target->doBranchOpt(I, NextNode); |
| + } |
| + } |
| } |
| // ======================== Dump routines ======================== // |