Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// | 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 770 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 781 } | 781 } |
| 782 } | 782 } |
| 783 | 783 |
| 784 SizeT NodeIndex = 0; | 784 SizeT NodeIndex = 0; |
| 785 for (auto *Node : NewList) { | 785 for (auto *Node : NewList) { |
| 786 Node->resetIndex(NodeIndex++); | 786 Node->resetIndex(NodeIndex++); |
| 787 } | 787 } |
| 788 Nodes = NewList; | 788 Nodes = NewList; |
| 789 } | 789 } |
| 790 | 790 |
| 791 namespace { | |
| 792 | |
| 793 /// VariableMap is a simple helper class for splitLocalVars(), that keeps track | |
| 794 /// of the latest split version of the original Variables. | |
| 795 class VariableMap { | |
| 796 private: | |
| 797 VariableMap() = delete; | |
| 798 VariableMap(const VariableMap &) = delete; | |
| 799 VariableMap &operator=(const VariableMap &) = delete; | |
| 800 | |
| 801 public: | |
| 802 explicit VariableMap(Cfg *Func) | |
| 803 : Func(Func), NumVars(Func->getNumVariables()) {} | |
| 804 /// Reset the mappings at the start of a block. | |
| 805 void reset() { Map.assign(NumVars, nullptr); } | |
| 806 /// Get Var's current mapping (or Var itself if it has no mapping yet). | |
| 807 Variable *get(Variable *Var) const { | |
| 808 const SizeT VarNum = getVarNum(Var); | |
| 809 Variable *MappedVar = Map[VarNum]; | |
| 810 return MappedVar == nullptr ? Var : MappedVar; | |
| 811 } | |
| 812 /// Create a new linked Variable in the LinkedTo chain, and set it as Var's | |
| 813 /// latest mapping. | |
| 814 Variable *makeLinked(Variable *Var) { | |
| 815 Variable *NewVar = Func->makeVariable(Var->getType()); | |
| 816 NewVar->setRegClass(Var->getRegClass()); | |
| 817 NewVar->setLinkedTo(get(Var)); | |
| 818 const SizeT VarNum = getVarNum(Var); | |
| 819 Map[VarNum] = NewVar; | |
| 820 return NewVar; | |
| 821 } | |
| 822 | |
| 823 private: | |
| 824 Cfg *const Func; | |
| 825 // NumVars is for the size of the Map array. It can be const because any new | |
| 826 // Variables created during the splitting pass don't need to be mapped. | |
| 827 const SizeT NumVars; | |
| 828 CfgVector<Variable *> Map; | |
| 829 /// Get Var's VarNum, and do some validation. | |
| 830 SizeT getVarNum(Variable *Var) const { | |
| 831 const SizeT VarNum = Var->getIndex(); | |
| 832 assert(VarNum < NumVars); | |
| 833 assert(Var->mayHaveReg()); | |
| 834 return VarNum; | |
| 835 } | |
| 836 }; | |
| 837 | |
| 838 /// A Variable is "allocable" if it is a register allocation candidate but | |
| 839 /// doesn't already have a register. | |
| 840 bool isAllocable(const Variable *Var) { | |
| 841 if (Var == nullptr) | |
| 842 return false; | |
| 843 return !Var->hasReg() && Var->mayHaveReg(); | |
| 844 } | |
| 845 | |
| 846 /// A Variable is "inf" if it already has a register or is infinite-weight. | |
| 847 bool isInf(const Variable *Var) { | |
| 848 if (Var == nullptr) | |
| 849 return false; | |
| 850 return Var->hasReg() || Var->mustHaveReg(); | |
| 851 } | |
| 852 | |
| 853 } // end of anonymous namespace | |
| 854 | |
| 855 /// Within each basic block, rewrite Variable references in terms of chained | |
| 856 /// copies of the original Variable. For example: | |
| 857 /// A = B + C | |
| 858 /// might be rewritten as: | |
| 859 /// B1 = B | |
| 860 /// C1 = C | |
| 861 /// A = B + C | |
| 862 /// A1 = A | |
| 863 /// and then: | |
| 864 /// D = A + B | |
| 865 /// might be rewritten as: | |
| 866 /// A2 = A1 | |
| 867 /// B2 = B1 | |
| 868 /// D = A1 + B1 | |
| 869 /// D1 = D | |
| 870 /// | |
| 871 /// The purpose is to present the linear-scan register allocator with smaller | |
| 872 /// live ranges, to help mitigate its "all or nothing" allocation strategy, | |
| 873 /// while counting on its preference mechanism to keep the split versions in the | |
| 874 /// same register when possible. | |
| 875 /// | |
| 876 /// When creating new Variables, A2 is linked to A1 which is linked to A, and | |
| 877 /// similar for the other Variable linked-to chains. Rewrites apply only to | |
| 878 /// Variables where mayHaveReg() is true. | |
| 879 /// | |
| 880 /// At code emission time, redundant linked-to stack assignments will be | |
| 881 /// recognized and elided. To illustrate using the above example, if A1 gets a | |
| 882 /// register but A and A2 are on the stack, the "A2=A1" store instruction is | |
| 883 /// redundant since A and A2 share the same stack slot and A1 originated from A. | |
| 884 /// | |
| 885 /// Simple assignment instructions are rewritten slightly differently, to take | |
| 886 /// maximal advantage of Variables known to have registers. | |
| 887 /// | |
| 888 /// In general, there may be several valid ways to rewrite an instruction: add | |
| 889 /// the new assignment instruction either before or after the original | |
| 890 /// instruction, and rewrite the original instruction with either the old or the | |
| 891 /// new variable mapping. We try to pick a strategy most likely to avoid | |
| 892 /// potential performance problems. For example, try to avoid storing to the | |
| 893 /// stack and then immediately reloading from the same location. One | |
| 894 /// consequence is that code might be generated that loads a register from a | |
| 895 /// stack location, followed almost immediately by another use of the same stack | |
| 896 /// location, despite its value already being available in a register as a | |
| 897 /// result of the first instruction. However, the performance impact here is | |
| 898 /// likely to be negligible, and a simple availability peephole optimization | |
| 899 /// could clean it up. | |
| 900 /// | |
| 901 /// This pass potentially adds a lot of new instructions and variables, and as | |
| 902 /// such there are compile-time performance concerns, particularly with liveness | |
| 903 /// analysis and register allocation. Note that for liveness analysis, the new | |
| 904 /// variables have single-block liveness, so they don't increase the size of the | |
| 905 /// liveness bit vectors that need to be merged across blocks. As a result, the | |
| 906 /// performance impact is likely to be linearly related to the number of new | |
| 907 /// instructions, rather than number of new variables times number of blocks | |
| 908 /// which would be the case if they were multi-block variables. | |
| 909 void Cfg::splitLocalVars() { | |
|
John
2016/07/26 18:59:39
this is really hard to follow. I know there are a
Jim Stichnoth
2016/07/28 23:37:03
Done.
| |
| 910 if (!getFlags().getSplitLocalVars()) | |
| 911 return; | |
| 912 TimerMarker _(TimerStack::TT_splitLocalVars, this); | |
| 913 VariableMap VarMap(this); | |
| 914 for (CfgNode *Node : getNodes()) { | |
| 915 // Clear the VarMap at the start of every block. | |
| 916 VarMap.reset(); | |
| 917 auto &Insts = Node->getInsts(); | |
| 918 auto Iter = Insts.begin(); | |
| 919 auto IterEnd = Insts.end(); | |
| 920 // TODO(stichnot): Also create assignments/mappings for phi dest variables. | |
| 921 InstList::iterator NextIter; | |
| 922 const Inst *WaitingForLabel = nullptr; | |
| 923 const Inst *WaitingForBranchTo = nullptr; | |
| 924 for (; Iter != IterEnd; Iter = NextIter) { | |
| 925 NextIter = Iter; | |
| 926 ++NextIter; | |
| 927 Inst *Instr = iteratorToInst(Iter); | |
| 928 if (Instr->isDeleted()) | |
| 929 continue; | |
| 930 | |
| 931 // Before doing any transformations, take care of the bookkeeping for | |
| 932 // intra-block branching. | |
| 933 // | |
| 934 // This is tricky because the transformation for one instruction may | |
| 935 // depend on a transformation for a previous instruction, but if that | |
| 936 // previous instruction is not dynamically executed due to intra-block | |
| 937 // control flow, it may lead to an inconsistent state and incorrect code. | |
| 938 // | |
| 939 // We want to handle some simple cases, and reject some others: | |
| 940 // | |
| 941 // 1. For something like a select instruction, we could have: | |
| 942 // test cond | |
| 943 // dest = src_false | |
| 944 // branch conditionally to label | |
| 945 // dest = src_true | |
| 946 // label: | |
| 947 // | |
| 948 // Between the conditional branch and the label, we need to treat dest and | |
| 949 // src variables specially, specifically not creating any new state. | |
| 950 // | |
| 951 // 2. Some 64-bit atomic instructions may be lowered to a loop: | |
| 952 // label: | |
| 953 // ... | |
| 954 // branch conditionally to label | |
| 955 // | |
| 956 // No special treatment is needed, but it's worth tracking so that case #1 | |
| 957 // above can also be handled. | |
| 958 // | |
| 959 // 3. Advanced switch lowering can create really complex intra-block | |
| 960 // control flow, so when we recognize this, we should just stop splitting | |
| 961 // for the remainder of the block (which isn't much since a switch | |
| 962 // instruction is a terminator). | |
| 963 // | |
| 964 // 4. Other complex lowering, e.g. an i64 icmp on a 32-bit architecture, | |
| 965 // can result in an if/then/else like structure with two labels. One | |
| 966 // possibility would be to suspect splitting for the remainder of the | |
| 967 // lowered instruction, and then resume for the remainder of the block, | |
| 968 // but since we don't have high-level instruction markers, we might as | |
| 969 // well just stop splitting for the remainder of the block. | |
| 970 if (Instr->isLabel()) { | |
| 971 // A Label instruction shouldn't have any operands, so it can be handled | |
| 972 // right here and then move on. | |
| 973 assert(Instr->getDest() == nullptr); | |
| 974 assert(Instr->getSrcSize() == 0); | |
| 975 if (Instr == WaitingForLabel) { | |
| 976 // If we found the forward-branch-target Label instruction we're | |
| 977 // waiting for, then clear the WaitingForLabel state. | |
| 978 WaitingForLabel = nullptr; | |
| 979 } else if (WaitingForLabel == nullptr && | |
| 980 WaitingForBranchTo == nullptr) { | |
| 981 // If we found a new Label instruction while the WaitingFor* state is | |
| 982 // clear, then set things up for this being a backward branch target. | |
| 983 WaitingForBranchTo = Instr; | |
| 984 } else { | |
| 985 // We see something we don't understand, so skip to the next block. | |
| 986 break; | |
| 987 } | |
| 988 continue; // move to next instruction | |
| 989 } | |
| 990 if (const Inst *Label = Instr->getIntraBlockBranchTarget()) { | |
| 991 // An intra-block branch instruction shouldn't have any operands, so it | |
| 992 // can be handled right here and then move on. | |
| 993 assert(Instr->getDest() == nullptr); | |
| 994 assert(Instr->getSrcSize() == 0); | |
| 995 if (WaitingForBranchTo == Label && WaitingForLabel == nullptr) { | |
| 996 WaitingForBranchTo = nullptr; | |
| 997 } else if (WaitingForBranchTo == nullptr && | |
| 998 (WaitingForLabel == nullptr || WaitingForLabel == Label)) { | |
| 999 WaitingForLabel = Label; | |
| 1000 } else { | |
| 1001 // We see something we don't understand, so skip to the next block. | |
| 1002 break; | |
| 1003 } | |
| 1004 continue; // move to next instruction | |
| 1005 } | |
| 1006 | |
| 1007 // Intra-block bookkeeping is complete, now do the transformations. | |
| 1008 static constexpr char AnInstructionHasNoName[] = ""; | |
| 1009 // We can limit the splitting to an arbitrary subset of the instructions, | |
| 1010 // and still expect correct code. As such, we can do instruction-subset | |
| 1011 // bisection to help debug any problems in this pass. | |
| 1012 if (!BuildDefs::minimal() && | |
| 1013 !getFlags().matchSplitInsts(AnInstructionHasNoName, | |
| 1014 Instr->getNumber())) | |
| 1015 continue; | |
| 1016 | |
| 1017 if (!llvm::isa<InstTarget>(Instr)) { | |
| 1018 // Ignore non-lowered instructions like FakeDef/FakeUse. | |
| 1019 continue; | |
| 1020 } | |
| 1021 const bool IsUnconditionallyExecuted = (WaitingForLabel == nullptr); | |
| 1022 Variable *Dest = Instr->getDest(); | |
| 1023 const bool DestIsInf = isInf(Dest); | |
| 1024 const bool DestIsAllocable = isAllocable(Dest); | |
| 1025 // Determine the transformation based on the kind of instruction, and | |
| 1026 // whether its Variables are infinite-weight. New instructions can be | |
| 1027 // inserted before the current instruction via Iter, or after the current | |
| 1028 // instruction via NextIter. | |
| 1029 if (Instr->isVarAssign()) { | |
| 1030 auto *SrcVar = llvm::cast<Variable>(Instr->getSrc(0)); | |
| 1031 const bool SrcIsInf = isInf(SrcVar); | |
| 1032 const bool SrcIsAllocable = isAllocable(SrcVar); | |
| 1033 if (DestIsInf && SrcIsInf) { | |
| 1034 // The instruction: | |
| 1035 // t:inf = u:inf | |
| 1036 // No transformation is needed. | |
| 1037 continue; | |
| 1038 } else if (DestIsInf && SrcIsAllocable && | |
| 1039 Dest->getType() == Instr->getSrc(0)->getType()) { | |
| 1040 // The instruction: | |
| 1041 // t:inf = v | |
| 1042 // gets transformed to: | |
| 1043 // t:inf = v1 | |
| 1044 // v2 = t:inf | |
| 1045 // where: | |
| 1046 // v1 := map[v] | |
| 1047 // v2 := linkTo(v) | |
| 1048 // map[v] := v2 | |
| 1049 // | |
| 1050 // If both v2 and its linkedToStackRoot get a stack slot, then | |
| 1051 // "v2=t:inf" is recognized as a redundant assignment and elided. | |
| 1052 // | |
| 1053 // Note that if the dest and src types are different, then this is | |
| 1054 // actually a truncation operation, which would make "v2=t:inf" an | |
| 1055 // invalid instruction. In this case, the type test will make it fall | |
| 1056 // through to the general case below. | |
| 1057 Variable *OldMapped = VarMap.get(SrcVar); | |
| 1058 Instr->replaceSource(0, OldMapped); | |
| 1059 if (IsUnconditionallyExecuted) { | |
| 1060 // Only create new mapping state if the instruction is | |
| 1061 // unconditionally executed. | |
| 1062 Variable *NewMapped = VarMap.makeLinked(SrcVar); | |
| 1063 Inst *Mov = Target->createLoweredMove(NewMapped, Dest); | |
| 1064 Insts.insert(NextIter, Mov); | |
| 1065 } | |
| 1066 continue; | |
| 1067 } else if (DestIsAllocable && SrcIsInf) { | |
| 1068 // The instruction: | |
| 1069 // v = t:inf | |
| 1070 // gets transformed to: | |
| 1071 // v = t:inf | |
| 1072 // v2 = t:inf | |
| 1073 // where: | |
| 1074 // v2 := linkTo(v) | |
| 1075 // map[v] := v2 | |
| 1076 // | |
| 1077 // If both v2 and v get a stack slot, then "v2=t:inf" is recognized as | |
| 1078 // a redundant assignment and elided. | |
| 1079 if (IsUnconditionallyExecuted) { | |
| 1080 // Only create new mapping state if the instruction is | |
| 1081 // unconditionally executed. | |
| 1082 Variable *NewMapped = VarMap.makeLinked(Dest); | |
| 1083 Inst *Mov = Target->createLoweredMove(NewMapped, SrcVar); | |
| 1084 Insts.insert(NextIter, Mov); | |
| 1085 } else { | |
| 1086 // For a conditionally executed instruction, add a redefinition of | |
| 1087 // the original Dest mapping, without creating a new linked | |
| 1088 // variable. | |
| 1089 Variable *OldMapped = VarMap.get(Dest); | |
| 1090 Inst *Mov = Target->createLoweredMove(OldMapped, SrcVar); | |
| 1091 Mov->setDestRedefined(); | |
| 1092 Insts.insert(NextIter, Mov); | |
| 1093 } | |
| 1094 continue; | |
| 1095 } | |
| 1096 } | |
| 1097 // The (non-variable-assignment) instruction: | |
| 1098 // ... = F(v) | |
| 1099 // where v is not infinite-weight, gets transformed to: | |
| 1100 // v2 = v1 | |
| 1101 // ... = F(v1) | |
| 1102 // where: | |
| 1103 // v1 := map[v] | |
| 1104 // v2 := linkTo(v) | |
| 1105 // map[v] := v2 | |
| 1106 // After that, if the "..." dest=u is not infinite-weight, append: | |
| 1107 // u2 = u | |
| 1108 // where: | |
| 1109 // u2 := linkTo(u) | |
| 1110 // map[u] := u2 | |
| 1111 for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { | |
| 1112 // Iterate over the top-level src vars. Don't bother to dig into | |
| 1113 // e.g. MemOperands because their vars should all be infinite-weight. | |
| 1114 // (This assumption would need to change if the pass were done | |
| 1115 // pre-lowering.) | |
| 1116 if (auto *SrcVar = llvm::dyn_cast<Variable>(Instr->getSrc(i))) { | |
| 1117 const bool SrcIsAllocable = isAllocable(SrcVar); | |
| 1118 if (SrcIsAllocable) { | |
| 1119 Variable *OldMapped = VarMap.get(SrcVar); | |
| 1120 if (IsUnconditionallyExecuted) { | |
| 1121 Variable *NewMapped = VarMap.makeLinked(SrcVar); | |
| 1122 Inst *Mov = Target->createLoweredMove(NewMapped, OldMapped); | |
| 1123 Insts.insert(Iter, Mov); | |
| 1124 } | |
| 1125 Instr->replaceSource(i, OldMapped); | |
| 1126 } | |
| 1127 } | |
| 1128 } | |
| 1129 // Transformation of Dest is the same as the "v=t:inf" case above. | |
| 1130 if (DestIsAllocable) { | |
| 1131 if (IsUnconditionallyExecuted) { | |
| 1132 Variable *NewMapped = VarMap.makeLinked(Dest); | |
| 1133 Inst *Mov = Target->createLoweredMove(NewMapped, Dest); | |
| 1134 Insts.insert(NextIter, Mov); | |
| 1135 } else { | |
| 1136 Variable *OldMapped = VarMap.get(Dest); | |
| 1137 Inst *Mov = Target->createLoweredMove(OldMapped, Dest); | |
| 1138 Mov->setDestRedefined(); | |
| 1139 Insts.insert(NextIter, Mov); | |
| 1140 } | |
| 1141 } | |
| 1142 } | |
| 1143 } | |
| 1144 dump("After splitting local variables"); | |
| 1145 } | |
| 1146 | |
| 791 void Cfg::doArgLowering() { | 1147 void Cfg::doArgLowering() { |
| 792 TimerMarker T(TimerStack::TT_doArgLowering, this); | 1148 TimerMarker T(TimerStack::TT_doArgLowering, this); |
| 793 getTarget()->lowerArguments(); | 1149 getTarget()->lowerArguments(); |
| 794 } | 1150 } |
| 795 | 1151 |
| 796 void Cfg::sortAndCombineAllocas(CfgVector<InstAlloca *> &Allocas, | 1152 void Cfg::sortAndCombineAllocas(CfgVector<InstAlloca *> &Allocas, |
| 797 uint32_t CombinedAlignment, InstList &Insts, | 1153 uint32_t CombinedAlignment, InstList &Insts, |
| 798 AllocaBaseVariableType BaseVariableType) { | 1154 AllocaBaseVariableType BaseVariableType) { |
| 799 if (Allocas.empty()) | 1155 if (Allocas.empty()) |
| 800 return; | 1156 return; |
| (...skipping 853 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1654 dump("After recomputing liveness for -decorate-asm"); | 2010 dump("After recomputing liveness for -decorate-asm"); |
| 1655 } | 2011 } |
| 1656 OstreamLocker L(Ctx); | 2012 OstreamLocker L(Ctx); |
| 1657 Ostream &Str = Ctx->getStrEmit(); | 2013 Ostream &Str = Ctx->getStrEmit(); |
| 1658 const Assembler *Asm = getAssembler<>(); | 2014 const Assembler *Asm = getAssembler<>(); |
| 1659 const bool NeedSandboxing = getFlags().getUseSandboxing(); | 2015 const bool NeedSandboxing = getFlags().getUseSandboxing(); |
| 1660 | 2016 |
| 1661 emitTextHeader(FunctionName, Ctx, Asm); | 2017 emitTextHeader(FunctionName, Ctx, Asm); |
| 1662 if (getFlags().getDecorateAsm()) { | 2018 if (getFlags().getDecorateAsm()) { |
| 1663 for (Variable *Var : getVariables()) { | 2019 for (Variable *Var : getVariables()) { |
| 1664 if (Var->hasStackOffset() && !Var->isRematerializable()) { | 2020 if (Var->hasKnownStackOffset() && !Var->isRematerializable()) { |
| 1665 Str << "\t" << Var->getSymbolicStackOffset() << " = " | 2021 Str << "\t" << Var->getSymbolicStackOffset() << " = " |
| 1666 << Var->getStackOffset() << "\n"; | 2022 << Var->getStackOffset() << "\n"; |
| 1667 } | 2023 } |
| 1668 } | 2024 } |
| 1669 } | 2025 } |
| 1670 for (CfgNode *Node : Nodes) { | 2026 for (CfgNode *Node : Nodes) { |
| 1671 if (NeedSandboxing && Node->needsAlignment()) { | 2027 if (NeedSandboxing && Node->needsAlignment()) { |
| 1672 Str << "\t" << Asm->getAlignDirective() << " " | 2028 Str << "\t" << Asm->getAlignDirective() << " " |
| 1673 << Asm->getBundleAlignLog2Bytes() << "\n"; | 2029 << Asm->getBundleAlignLog2Bytes() << "\n"; |
| 1674 } | 2030 } |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1767 } | 2123 } |
| 1768 } | 2124 } |
| 1769 // Print each basic block | 2125 // Print each basic block |
| 1770 for (CfgNode *Node : Nodes) | 2126 for (CfgNode *Node : Nodes) |
| 1771 Node->dump(this); | 2127 Node->dump(this); |
| 1772 if (isVerbose(IceV_Instructions)) | 2128 if (isVerbose(IceV_Instructions)) |
| 1773 Str << "}\n"; | 2129 Str << "}\n"; |
| 1774 } | 2130 } |
| 1775 | 2131 |
| 1776 } // end of namespace Ice | 2132 } // end of namespace Ice |
| OLD | NEW |