Index: src/IceCfg.cpp |
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp |
index 23c363fa7791938a5a05a0edfc6adaf72167f48b..42ed2173b8a0dea75ad3dba286695fabb640d3ce 100644 |
--- a/src/IceCfg.cpp |
+++ b/src/IceCfg.cpp |
@@ -788,6 +788,362 @@ void Cfg::shortCircuitJumps() { |
Nodes = NewList; |
} |
+namespace { |
+ |
+/// VariableMap is a simple helper class for splitLocalVars(), that keeps track |
+/// of the latest split version of the original Variables. |
+class VariableMap { |
+private: |
+ VariableMap() = delete; |
+ VariableMap(const VariableMap &) = delete; |
+ VariableMap &operator=(const VariableMap &) = delete; |
+ |
+public: |
+ explicit VariableMap(Cfg *Func) |
+ : Func(Func), NumVars(Func->getNumVariables()) {} |
+ /// Reset the mappings at the start of a block. |
+ void reset() { Map.assign(NumVars, nullptr); } |
+ /// Get Var's current mapping (or Var itself if it has no mapping yet). |
+ Variable *get(Variable *Var) const { |
+ const SizeT VarNum = getVarNum(Var); |
+ Variable *MappedVar = Map[VarNum]; |
+ return MappedVar == nullptr ? Var : MappedVar; |
+ } |
+ /// Create a new linked Variable in the LinkedTo chain, and set it as Var's |
+ /// latest mapping. |
+ Variable *makeLinked(Variable *Var) { |
+ Variable *NewVar = Func->makeVariable(Var->getType()); |
+ NewVar->setRegClass(Var->getRegClass()); |
+ NewVar->setLinkedTo(get(Var)); |
+ const SizeT VarNum = getVarNum(Var); |
+ Map[VarNum] = NewVar; |
+ return NewVar; |
+ } |
+ |
+private: |
+ Cfg *const Func; |
+ // NumVars is for the size of the Map array. It can be const because any new |
+ // Variables created during the splitting pass don't need to be mapped. |
+ const SizeT NumVars; |
+ CfgVector<Variable *> Map; |
+ /// Get Var's VarNum, and do some validation. |
+ SizeT getVarNum(Variable *Var) const { |
+ const SizeT VarNum = Var->getIndex(); |
+ assert(VarNum < NumVars); |
+ assert(Var->mayHaveReg()); |
+ return VarNum; |
+ } |
+}; |
+ |
+/// A Variable is "normal" if it is a register allocation candidate but doesn't |
+/// already have a register. |
+bool isNormal(const Variable *Var) { |
Eric Holk
2016/07/25 19:59:22
I'm not sure "normal" is the best name here, but I
Jim Stichnoth
2016/07/26 05:59:09
Done - using the shorter "allocable".
|
+ if (Var == nullptr) |
+ return false; |
+ return !Var->hasReg() && Var->mayHaveReg(); |
+} |
+ |
+/// A Variable is "inf" if it already has a register or is infinite-weight. |
+bool isInf(const Variable *Var) { |
+ if (Var == nullptr) |
+ return false; |
+ return Var->hasReg() || Var->mustHaveReg(); |
+} |
+ |
+} // end of anonymous namespace |
+ |
+/// Within each basic block, rewrite Variable references in terms of chained |
+/// copies of the original Variable. For example: |
+/// A = B + C |
+/// might be rewritten as: |
+/// B1 = B |
+/// C1 = C |
+/// A = B + C |
Eric Holk
2016/07/25 19:59:22
Should this be `A = B1 + C1`?
Jim Stichnoth
2016/07/26 05:59:09
No... see also the description in lines 1100-1101
|
+/// A1 = A |
+/// and then: |
+/// D = A + B |
+/// might be rewritten as: |
+/// A2 = A1 |
+/// B2 = B1 |
+/// D = A1 + B1 |
+/// D1 = D |
+/// |
+/// The purpose is to present the linear-scan register allocator with smaller |
+/// live ranges, to help mitigate its "all or nothing" allocation strategy, |
+/// while counting on its preference mechanism to keep the split versions in the |
+/// same register when possible. |
+/// |
+/// When creating new Variables, A2 is linked to A1 which is linked to A, and |
+/// similar for the other Variable linked-to chains. Rewrites apply only to |
+/// Variables where mayHaveReg() is true. |
+/// |
+/// At code emission time, redundant linked-to stack assignments will be |
+/// recognized and elided. To illustrate using the above example, if A1 gets a |
+/// register but A and A2 are on the stack, the "A2=A1" store instruction is |
+/// redundant since A and A2 share the same stack slot and A1 originated from A. |
+/// |
+/// Simple assignment instructions are rewritten slightly differently, to take |
+/// maximal advantage of Variables known to have registers. |
+/// |
+/// In general, there may be several valid ways to rewrite an instruction: add |
+/// the new assignment instruction either before or after the original |
+/// instruction, and rewrite the original instruction with either the old or the |
+/// new variable mapping. We try to pick a strategy most likely to avoid |
+/// potential performance problems. For example, try to avoid storing to the |
+/// stack and then immediately reloading from the same location. One |
+/// consequence is that code might be generated that loads a register from a |
+/// stack location, followed almost immediately by another use of the same stack |
+/// location, despite its value already being available in a register as a |
+/// result of the first instruction. However, the performance impact here is |
+/// likely to be negligible, and a simple availability peephole optimization |
+/// could clean it up. |
+/// |
+/// This pass potentially adds a lot of new instructions and variables, and as |
+/// such there are compile-time performance concerns, particularly with liveness |
+/// analysis and register allocation. Note that for liveness analysis, the new |
+/// variables have single-block liveness, so they don't increase the size of the |
+/// liveness bit vectors that need to be merged across blocks. As a result, the |
+/// performance impact is likely to be linearly related to the number of new |
+/// instructions, without the quadratic nature that liveness analysis usually |
Eric Holk
2016/07/25 19:59:22
Does this mean that splitting live ranges totally
Jim Stichnoth
2016/07/26 05:59:09
I rephrased the "quadratic" part in the comment.
|
+/// has. |
+void Cfg::splitLocalVars() { |
+ if (!getFlags().getSplitLocalVars()) |
+ return; |
+ TimerMarker _(TimerStack::TT_splitLocalVars, this); |
+ VariableMap VarMap(this); |
+ for (CfgNode *Node : getNodes()) { |
+ // Clear the VarMap at the start of every block. |
+ VarMap.reset(); |
+ auto &Insts = Node->getInsts(); |
+ auto Iter = Insts.begin(); |
+ auto IterEnd = Insts.end(); |
+ // TODO(stichnot): Also create assignments/mappings for phi dest variables. |
+ InstList::iterator NextIter; |
+ const Inst *WaitingForLabel = nullptr; |
+ const Inst *WaitingForBranchTo = nullptr; |
+ for (; Iter != IterEnd; Iter = NextIter) { |
+ NextIter = Iter; |
+ ++NextIter; |
+ Inst *Instr = iteratorToInst(Iter); |
+ if (Instr->isDeleted()) |
+ continue; |
+ |
+ // Before doing any transformations, take care of the bookkeeping for |
+ // intra-block branching. |
+ // |
+ // This is tricky because the transformation for one instruction may |
+ // depend on a transformation for a previous instruction, but if that |
+ // previous instruction is not dynamically executed due to intra-block |
+ // control flow, it may lead to an inconsistent state and incorrect code. |
+ // |
+ // We want to handle some simple cases, and reject some others: |
+ // |
+ // 1. For something like a select instruction, we could have: |
+ // test cond |
+ // dest = src_false |
+ // branch conditionally to label |
+ // dest = src_true |
+ // label: |
+ // |
+ // Between the conditional branch and the label, we need to treat dest and |
+ // src variables specially, specifically not creating any new state. |
+ // |
+ // 2. Some 64-bit atomic instructions may be lowered to a loop: |
+ // label: |
+ // ... |
+ // branch conditionally to label |
+ // |
+ // No special treatment is needed, but it's worth tracking so that case #1 |
+ // above can also be handled. |
+ // |
+ // 3. Advanced switch lowering can create really complex intra-block |
+ // control flow, so when we recognize this, we should just stop splitting |
+ // for the remainder of the block (which isn't much since a switch |
+ // instruction is a terminator). |
+ // |
+ // 4. Other complex lowering, e.g. an i64 icmp on a 32-bit architecture, |
+ // can result in an if/then/else like structure with two labels. One |
+ // possibility would be to suspect splitting for the remainder of the |
+ // lowered instruction, and then resume for the remainder of the block, |
+ // but since we don't have high-level instruction markers, we might as |
+ // well just stop splitting for the remainder of the block. |
+ if (Instr->isLabel()) { |
+ // A Label instruction shouldn't have any operands, so it can be handled |
+ // right here and then move on. |
+ assert(Instr->getDest() == nullptr); |
+ assert(Instr->getSrcSize() == 0); |
+ if (Instr == WaitingForLabel) { |
+ // If we found the forward-branch-target Label instruction we're |
+ // waiting for, then clear the WaitingForLabel state. |
+ WaitingForLabel = nullptr; |
+ } else if (WaitingForLabel == nullptr && |
+ WaitingForBranchTo == nullptr) { |
+ // If we found a new Label instruction while the WaitingFor* state is |
+ // clear, then set things up for this being a backward branch target. |
+ WaitingForBranchTo = Instr; |
+ } else { |
+ // We see something we don't understand, so skip to the next block. |
+ break; |
+ } |
+ continue; // move to next instruction |
+ } |
+ if (const Inst *Label = Instr->isIntraBlockBranch()) { |
+ // An intra-block branch instruction shouldn't have any operands, so it |
+ // can be handled right here and then move on. |
+ assert(Instr->getDest() == nullptr); |
+ assert(Instr->getSrcSize() == 0); |
+ if (WaitingForBranchTo == Label && WaitingForLabel == nullptr) { |
+ WaitingForBranchTo = nullptr; |
+ } else if (WaitingForBranchTo == nullptr && |
+ (WaitingForLabel == nullptr || WaitingForLabel == Label)) { |
+ WaitingForLabel = Label; |
+ } else { |
+ // We see something we don't understand, so skip to the next block. |
+ break; |
+ } |
+ continue; // move to next instruction |
+ } |
+ |
+ // Intra-block bookkeeping is complete, now do the transformations. |
+ static constexpr char AnInstructionHasNoName[] = ""; |
Eric Holk
2016/07/25 19:59:22
:)
|
+ // We can limit the splitting to an arbitrary subset of the instructions, |
+ // and still expect correct code. As such, we can do instruction-subset |
+ // bisection to help debug any problems in this pass. |
+ if (!BuildDefs::minimal() && |
+ !getFlags().matchSplitInsts(AnInstructionHasNoName, |
Eric Holk
2016/07/25 19:59:22
This is a little unclear to me, but I think I get
Jim Stichnoth
2016/07/26 05:59:09
Right -- class RangeSpec allows matching by name o
|
+ Instr->getNumber())) |
+ continue; |
+ |
+ if (!llvm::isa<InstTarget>(Instr)) { |
+ // Ignore non-lowered instructions like FakeDef/FakeUse. |
+ continue; |
+ } |
+ const bool IsUnconditionallyExecuted = (WaitingForLabel == nullptr); |
+ Variable *Dest = Instr->getDest(); |
+ const bool DestIsInf = isInf(Dest); |
+ const bool DestIsNormal = isNormal(Dest); |
+ // Determine the transformation based on the kind of instruction, and |
+ // whether its Variables are infinite-weight. New instructions can be |
+ // inserted before the current instruction via Iter, or after the current |
+ // instruction via NextIter. |
+ if (Instr->isVarAssign()) { |
+ auto *SrcVar = llvm::cast<Variable>(Instr->getSrc(0)); |
+ const bool SrcIsInf = isInf(SrcVar); |
+ const bool SrcIsNormal = isNormal(SrcVar); |
+ if (DestIsInf && SrcIsInf) { |
+ // The instruction: |
+ // t:inf = u:inf |
+ // No transformation is needed. |
+ continue; |
+ } else if (DestIsInf && SrcIsNormal && |
+ Dest->getType() == Instr->getSrc(0)->getType()) { |
+ // The instruction: |
+ // t:inf = v |
+ // gets transformed to: |
+ // t:inf = v1 |
+ // v2 = t:inf |
+ // where: |
+ // v1 := map[v] |
+ // v2 := linkTo(v) |
+ // map[v] := v2 |
+ // |
+ // If both v2 and its linkedToStackRoot get a stack slot, then |
+ // "v2=t:inf" is recognized as a redundant assignment and elided. |
+ // |
+ // Note that if the dest and src types are different, then this is |
+ // actually a truncation operation, which would make "v2=t:inf" an |
+ // invalid instruction. In this case, the type test will make it fall |
+ // through to the general case below. |
+ Variable *OldMapped = VarMap.get(SrcVar); |
+ Instr->replaceSource(0, OldMapped); |
+ if (IsUnconditionallyExecuted) { |
+ // Only create new mapping state if the instruction is |
+ // unconditionally executed. |
+ Variable *NewMapped = VarMap.makeLinked(SrcVar); |
+ Inst *Mov = Target->createLoweredMove(NewMapped, Dest); |
+ Insts.insert(NextIter, Mov); |
+ } |
+ continue; |
+ } else if (DestIsNormal && SrcIsInf) { |
+ // The instruction: |
+ // v = t:inf |
+ // gets transformed to: |
+ // v = t:inf |
+ // v2 = t:inf |
+ // where: |
+ // v2 := linkTo(v) |
+ // map[v] := v2 |
+ // |
+ // If both v2 and v get a stack slot, then "v2=t:inf" is recognized as |
+ // a redundant assignment and elided. |
+ if (IsUnconditionallyExecuted) { |
+ // Only create new mapping state if the instruction is |
+ // unconditionally executed. |
+ Variable *NewMapped = VarMap.makeLinked(Dest); |
+ Inst *Mov = Target->createLoweredMove(NewMapped, Dest); |
+ Insts.insert(NextIter, Mov); |
+ } else { |
+ // For a conditionally executed instruction, add a redefinition of |
+ // the original Dest mapping, without creating a new linked |
+ // variable. |
+ Variable *OldMapped = VarMap.get(Dest); |
+ Inst *Mov = Target->createLoweredMove(OldMapped, Dest); |
+ Mov->setDestRedefined(); |
+ Insts.insert(NextIter, Mov); |
+ } |
+ continue; |
+ } |
+ } |
+ // The (non-variable-assignment) instruction: |
+ // ... = F(v) |
+ // where v is not infinite-weight, gets transformed to: |
+ // v2 = v1 |
+ // ... = F(v1) |
+ // where: |
+ // v1 := map[v] |
+ // v2 := linkTo(v) |
+ // map[v] := v2 |
+ // After that, if the "..." dest=u is not infinite-weight, append: |
+ // u2 = u |
+ // where: |
+ // u2 := linkTo(u) |
+ // map[u] := u2 |
+ for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { |
+ // Iterate over the top-level src vars. Don't bother to dig into |
+ // e.g. MemOperands because their vars should all be infinite-weight. |
+ // (This assumption would need to change if the pass were done |
+ // pre-lowering.) |
+ if (auto *SrcVar = llvm::dyn_cast<Variable>(Instr->getSrc(i))) { |
+ const bool SrcIsNormal = isNormal(SrcVar); |
+ if (SrcIsNormal) { |
+ Variable *OldMapped = VarMap.get(SrcVar); |
+ if (IsUnconditionallyExecuted) { |
+ Variable *NewMapped = VarMap.makeLinked(SrcVar); |
+ Inst *Mov = Target->createLoweredMove(NewMapped, OldMapped); |
+ Insts.insert(Iter, Mov); |
+ } |
+ Instr->replaceSource(i, OldMapped); |
+ } |
+ } |
+ } |
+ // Transformation of Dest is the same as the "v=t:inf" case above. |
+ if (DestIsNormal) { |
+ if (IsUnconditionallyExecuted) { |
+ Variable *NewMapped = VarMap.makeLinked(Dest); |
+ Inst *Mov = Target->createLoweredMove(NewMapped, Dest); |
+ Insts.insert(NextIter, Mov); |
+ } else { |
+ Variable *OldMapped = VarMap.get(Dest); |
+ Inst *Mov = Target->createLoweredMove(OldMapped, Dest); |
+ Mov->setDestRedefined(); |
+ Insts.insert(NextIter, Mov); |
+ } |
+ } |
+ } |
+ } |
+ dump("After splitting local variables"); |
+} |
+ |
void Cfg::doArgLowering() { |
TimerMarker T(TimerStack::TT_doArgLowering, this); |
getTarget()->lowerArguments(); |