Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(602)

Side by Side Diff: src/IceCfg.cpp

Issue 2177033002: Subzero: Local variable splitting. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fix bug with LinkedTo chains. Fix performance problem. Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceCfg.h ('k') | src/IceClFlags.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 770 matching lines...) Expand 10 before | Expand all | Expand 10 after
781 } 781 }
782 } 782 }
783 783
784 SizeT NodeIndex = 0; 784 SizeT NodeIndex = 0;
785 for (auto *Node : NewList) { 785 for (auto *Node : NewList) {
786 Node->resetIndex(NodeIndex++); 786 Node->resetIndex(NodeIndex++);
787 } 787 }
788 Nodes = NewList; 788 Nodes = NewList;
789 } 789 }
790 790
791 namespace {
792
793 /// VariableMap is a simple helper class for splitLocalVars(), that keeps track
794 /// of the latest split version of the original Variables. For each entry, the
795 /// Variable is tagged with the CfgNode that it is valid in, so that we don't
796 /// need to clear the entire Map[] vector for each block.
797 class VariableMap {
798 private:
799 VariableMap() = delete;
800 VariableMap(const VariableMap &) = delete;
801 VariableMap &operator=(const VariableMap &) = delete;
802
803 /// VarNodePair is basically std::pair<Variable*,CfgNode*> .
804 struct VarNodePair {
805 Variable *Var = nullptr;
806 const CfgNode *Node = nullptr;
807 VarNodePair() = default;
808
809 private:
810 VarNodePair(const VarNodePair &) = delete;
811 VarNodePair &operator=(const VarNodePair &) = delete;
812 };
813
814 public:
815 explicit VariableMap(Cfg *Func)
816 : Func(Func), NumVars(Func->getNumVariables()), Map(NumVars) {}
817 /// Reset the mappings at the start of a block.
818 void reset(const CfgNode *CurNode) { Node = CurNode; }
819 /// Get Var's current mapping (or Var itself if it has no mapping yet).
820 Variable *get(Variable *Var) const {
821 const SizeT VarNum = getVarNum(Var);
822 Variable *MappedVar = Map[VarNum].Var;
823 if (MappedVar == nullptr)
824 return Var;
825 if (Map[VarNum].Node != Node)
826 return Var;
827 return MappedVar;
828 }
829 /// Create a new linked Variable in the LinkedTo chain, and set it as Var's
830 /// latest mapping.
831 Variable *makeLinked(Variable *Var) {
832 Variable *NewVar = Func->makeVariable(Var->getType());
833 NewVar->setRegClass(Var->getRegClass());
834 NewVar->setLinkedTo(get(Var));
835 const SizeT VarNum = getVarNum(Var);
836 Map[VarNum].Var = NewVar;
837 Map[VarNum].Node = Node;
838 return NewVar;
839 }
840 /// Given Var that is LinkedTo some other variable, re-splice it into the
841 /// LinkedTo chain so that the chain is ordered by Variable::getIndex().
842 void spliceBlockLocalLinkedToChain(Variable *Var) {
843 Variable *LinkedTo = Var->getLinkedTo();
844 assert(LinkedTo != nullptr);
845 assert(Var->getIndex() > LinkedTo->getIndex());
846 const SizeT VarNum = getVarNum(LinkedTo);
847 Variable *Link = Map[VarNum].Var;
848 if (Link == nullptr || Map[VarNum].Node != Node)
849 return;
850 Variable *LinkParent = Link->getLinkedTo();
851 while (LinkParent != nullptr && LinkParent->getIndex() >= Var->getIndex()) {
852 Link = LinkParent;
853 LinkParent = Link->getLinkedTo();
854 }
855 Var->setLinkedTo(LinkParent);
856 Link->setLinkedTo(Var);
857 }
858
859 private:
860 Cfg *const Func;
861 // NumVars is for the size of the Map array. It can be const because any new
862 // Variables created during the splitting pass don't need to be mapped.
863 const SizeT NumVars;
864 CfgVector<VarNodePair> Map;
865 const CfgNode *Node = nullptr;
866 /// Get Var's VarNum, and do some validation.
867 SizeT getVarNum(Variable *Var) const {
868 const SizeT VarNum = Var->getIndex();
869 assert(VarNum < NumVars);
870 return VarNum;
871 }
872 };
873
874 /// A Variable is "allocable" if it is a register allocation candidate but
875 /// doesn't already have a register.
876 bool isAllocable(const Variable *Var) {
877 if (Var == nullptr)
878 return false;
879 return !Var->hasReg() && Var->mayHaveReg();
880 }
881
882 /// A Variable is "inf" if it already has a register or is infinite-weight.
883 bool isInf(const Variable *Var) {
884 if (Var == nullptr)
885 return false;
886 return Var->hasReg() || Var->mustHaveReg();
887 }
888
889 } // end of anonymous namespace
890
891 /// Within each basic block, rewrite Variable references in terms of chained
892 /// copies of the original Variable. For example:
893 /// A = B + C
894 /// might be rewritten as:
895 /// B1 = B
896 /// C1 = C
897 /// A = B + C
898 /// A1 = A
899 /// and then:
900 /// D = A + B
901 /// might be rewritten as:
902 /// A2 = A1
903 /// B2 = B1
904 /// D = A1 + B1
905 /// D1 = D
906 ///
907 /// The purpose is to present the linear-scan register allocator with smaller
908 /// live ranges, to help mitigate its "all or nothing" allocation strategy,
909 /// while counting on its preference mechanism to keep the split versions in the
910 /// same register when possible.
911 ///
912 /// When creating new Variables, A2 is linked to A1 which is linked to A, and
913 /// similar for the other Variable linked-to chains. Rewrites apply only to
914 /// Variables where mayHaveReg() is true.
915 ///
916 /// At code emission time, redundant linked-to stack assignments will be
917 /// recognized and elided. To illustrate using the above example, if A1 gets a
918 /// register but A and A2 are on the stack, the "A2=A1" store instruction is
919 /// redundant since A and A2 share the same stack slot and A1 originated from A.
920 ///
921 /// Simple assignment instructions are rewritten slightly differently, to take
922 /// maximal advantage of Variables known to have registers.
923 ///
924 /// In general, there may be several valid ways to rewrite an instruction: add
925 /// the new assignment instruction either before or after the original
926 /// instruction, and rewrite the original instruction with either the old or the
927 /// new variable mapping. We try to pick a strategy most likely to avoid
928 /// potential performance problems. For example, try to avoid storing to the
929 /// stack and then immediately reloading from the same location. One
930 /// consequence is that code might be generated that loads a register from a
931 /// stack location, followed almost immediately by another use of the same stack
932 /// location, despite its value already being available in a register as a
933 /// result of the first instruction. However, the performance impact here is
934 /// likely to be negligible, and a simple availability peephole optimization
935 /// could clean it up.
936 ///
937 /// This pass potentially adds a lot of new instructions and variables, and as
938 /// such there are compile-time performance concerns, particularly with liveness
939 /// analysis and register allocation. Note that for liveness analysis, the new
940 /// variables have single-block liveness, so they don't increase the size of the
941 /// liveness bit vectors that need to be merged across blocks. As a result, the
942 /// performance impact is likely to be linearly related to the number of new
943 /// instructions, rather than number of new variables times number of blocks
944 /// which would be the case if they were multi-block variables.
945 void Cfg::splitLocalVars() {
946 if (!getFlags().getSplitLocalVars())
947 return;
948 TimerMarker _(TimerStack::TT_splitLocalVars, this);
949 VariableMap VarMap(this);
950 // TODO(stichnot): Fix this mechanism for LinkedTo variables and stack slot
951 // assignment.
952 //
953 // To work around shortcomings with stack frame mapping, we want to arrange
954 // LinkedTo structure such that within one block, the LinkedTo structure
955 // leading to a root forms a list, not a tree. A LinkedTo root can have
956 // multiple children linking to it, but only one per block. Furthermore,
957 // because stack slot mapping processes variables in numerical order, the
958 // LinkedTo chain needs to be ordered such that when A->getLinkedTo()==B, then
959 // A->getIndex()>B->getIndex().
John 2016/08/01 14:17:31 add spaces around ' > ' (the gt operator)
Jim Stichnoth 2016/08/01 15:13:50 Done.
960 //
961 // To effect this, while processing a block we keep track of preexisting
962 // LinkedTo relationships via the LinkedToFixups vector, and at the end of the
963 // block we splice them in such that the block has a single chain for each
964 // root, ordered by getIndex() value.
965 CfgVector<Variable *> LinkedToFixups;
966 for (CfgNode *Node : getNodes()) {
967 // Clear the VarMap and LinkedToFixups at the start of every block.
968 VarMap.reset(Node);
969 LinkedToFixups.clear();
970 auto &Insts = Node->getInsts();
971 auto Iter = Insts.begin();
972 auto IterEnd = Insts.end();
973 // TODO(stichnot): Also create assignments/mappings for phi dest variables.
974 InstList::iterator NextIter;
975 const Inst *WaitingForLabel = nullptr;
976 const Inst *WaitingForBranchTo = nullptr;
977 for (; Iter != IterEnd; Iter = NextIter) {
978 NextIter = Iter;
979 ++NextIter;
980 Inst *Instr = iteratorToInst(Iter);
981 if (Instr->isDeleted())
982 continue;
983 Variable *Dest = Instr->getDest();
984
985 // Before doing any transformations, take care of the bookkeeping for
986 // intra-block branching.
987 //
988 // This is tricky because the transformation for one instruction may
989 // depend on a transformation for a previous instruction, but if that
990 // previous instruction is not dynamically executed due to intra-block
991 // control flow, it may lead to an inconsistent state and incorrect code.
992 //
993 // We want to handle some simple cases, and reject some others:
994 //
995 // 1. For something like a select instruction, we could have:
996 // test cond
997 // dest = src_false
998 // branch conditionally to label
999 // dest = src_true
1000 // label:
1001 //
1002 // Between the conditional branch and the label, we need to treat dest and
1003 // src variables specially, specifically not creating any new state.
1004 //
1005 // 2. Some 64-bit atomic instructions may be lowered to a loop:
1006 // label:
1007 // ...
1008 // branch conditionally to label
1009 //
1010 // No special treatment is needed, but it's worth tracking so that case #1
1011 // above can also be handled.
1012 //
1013 // 3. Advanced switch lowering can create really complex intra-block
1014 // control flow, so when we recognize this, we should just stop splitting
1015 // for the remainder of the block (which isn't much since a switch
1016 // instruction is a terminator).
1017 //
1018 // 4. Other complex lowering, e.g. an i64 icmp on a 32-bit architecture,
1019 // can result in an if/then/else like structure with two labels. One
1020 // possibility would be to suspect splitting for the remainder of the
1021 // lowered instruction, and then resume for the remainder of the block,
1022 // but since we don't have high-level instruction markers, we might as
1023 // well just stop splitting for the remainder of the block.
1024 if (Instr->isLabel()) {
1025 // A Label instruction shouldn't have any operands, so it can be handled
1026 // right here and then move on.
1027 assert(Dest == nullptr);
1028 assert(Instr->getSrcSize() == 0);
1029 if (Instr == WaitingForLabel) {
1030 // If we found the forward-branch-target Label instruction we're
1031 // waiting for, then clear the WaitingForLabel state.
1032 WaitingForLabel = nullptr;
1033 } else if (WaitingForLabel == nullptr &&
1034 WaitingForBranchTo == nullptr) {
1035 // If we found a new Label instruction while the WaitingFor* state is
1036 // clear, then set things up for this being a backward branch target.
1037 WaitingForBranchTo = Instr;
1038 } else {
1039 // We see something we don't understand, so skip to the next block.
1040 break;
1041 }
1042 continue; // move to next instruction
1043 }
1044 if (const Inst *Label = Instr->getIntraBlockBranchTarget()) {
1045 // An intra-block branch instruction shouldn't have any operands, so it
1046 // can be handled right here and then move on.
1047 assert(Dest == nullptr);
1048 assert(Instr->getSrcSize() == 0);
1049 if (WaitingForBranchTo == Label && WaitingForLabel == nullptr) {
1050 WaitingForBranchTo = nullptr;
1051 } else if (WaitingForBranchTo == nullptr &&
1052 (WaitingForLabel == nullptr || WaitingForLabel == Label)) {
1053 WaitingForLabel = Label;
1054 } else {
1055 // We see something we don't understand, so skip to the next block.
1056 break;
1057 }
1058 continue; // move to next instruction
1059 }
1060
1061 // Intra-block bookkeeping is complete, now do the transformations.
1062 static constexpr char AnInstructionHasNoName[] = "";
1063 // We can limit the splitting to an arbitrary subset of the instructions,
1064 // and still expect correct code. As such, we can do instruction-subset
1065 // bisection to help debug any problems in this pass.
1066 if (!BuildDefs::minimal() &&
1067 !getFlags().matchSplitInsts(AnInstructionHasNoName,
1068 Instr->getNumber()))
1069 continue;
1070
1071 if (!llvm::isa<InstTarget>(Instr)) {
1072 // Ignore non-lowered instructions like FakeDef/FakeUse.
1073 continue;
1074 }
1075 const bool IsUnconditionallyExecuted = (WaitingForLabel == nullptr);
1076 const bool DestIsInf = isInf(Dest);
1077 const bool DestIsAllocable = isAllocable(Dest);
1078 // Note any preexisting LinkedTo relationships that were created during
1079 // target lowering.
1080 if (Dest != nullptr && Dest->getLinkedTo() != nullptr) {
1081 LinkedToFixups.emplace_back(Dest);
John 2016/08/01 14:17:31 Any reasons why you can't spliceBlockLocalLinkedTo
Jim Stichnoth 2016/08/01 15:13:50 I no longer remember the exact scenario, but this
1082 }
1083 // Determine the transformation based on the kind of instruction, and
1084 // whether its Variables are infinite-weight. New instructions can be
1085 // inserted before the current instruction via Iter, or after the current
1086 // instruction via NextIter.
1087 if (Instr->isVarAssign()) {
1088 auto *SrcVar = llvm::cast<Variable>(Instr->getSrc(0));
1089 const bool SrcIsInf = isInf(SrcVar);
1090 const bool SrcIsAllocable = isAllocable(SrcVar);
1091 if (DestIsInf && SrcIsInf) {
1092 // The instruction:
1093 // t:inf = u:inf
1094 // No transformation is needed.
1095 continue;
1096 } else if (DestIsInf && SrcIsAllocable &&
1097 Dest->getType() == Instr->getSrc(0)->getType()) {
1098 // The instruction:
1099 // t:inf = v
1100 // gets transformed to:
1101 // t:inf = v1
1102 // v2 = t:inf
1103 // where:
1104 // v1 := map[v]
1105 // v2 := linkTo(v)
1106 // map[v] := v2
1107 //
1108 // If both v2 and its linkedToStackRoot get a stack slot, then
1109 // "v2=t:inf" is recognized as a redundant assignment and elided.
1110 //
1111 // Note that if the dest and src types are different, then this is
1112 // actually a truncation operation, which would make "v2=t:inf" an
1113 // invalid instruction. In this case, the type test will make it fall
1114 // through to the general case below.
1115 Variable *OldMapped = VarMap.get(SrcVar);
1116 Instr->replaceSource(0, OldMapped);
1117 if (IsUnconditionallyExecuted) {
1118 // Only create new mapping state if the instruction is
1119 // unconditionally executed.
1120 Variable *NewMapped = VarMap.makeLinked(SrcVar);
1121 Inst *Mov = Target->createLoweredMove(NewMapped, Dest);
1122 Insts.insert(NextIter, Mov);
1123 }
1124 continue;
1125 } else if (DestIsAllocable && SrcIsInf) {
1126 // The instruction:
1127 // v = t:inf
1128 // gets transformed to:
1129 // v = t:inf
1130 // v2 = t:inf
1131 // where:
1132 // v2 := linkTo(v)
1133 // map[v] := v2
1134 //
1135 // If both v2 and v get a stack slot, then "v2=t:inf" is recognized as
1136 // a redundant assignment and elided.
1137 if (IsUnconditionallyExecuted) {
1138 // Only create new mapping state if the instruction is
1139 // unconditionally executed.
1140 Variable *NewMapped = VarMap.makeLinked(Dest);
1141 Inst *Mov = Target->createLoweredMove(NewMapped, SrcVar);
1142 Insts.insert(NextIter, Mov);
1143 } else {
1144 // For a conditionally executed instruction, add a redefinition of
1145 // the original Dest mapping, without creating a new linked
1146 // variable.
1147 Variable *OldMapped = VarMap.get(Dest);
1148 Inst *Mov = Target->createLoweredMove(OldMapped, SrcVar);
1149 Mov->setDestRedefined();
1150 Insts.insert(NextIter, Mov);
1151 }
1152 continue;
1153 }
1154 }
1155 // The (non-variable-assignment) instruction:
1156 // ... = F(v)
1157 // where v is not infinite-weight, gets transformed to:
1158 // v2 = v1
1159 // ... = F(v1)
1160 // where:
1161 // v1 := map[v]
1162 // v2 := linkTo(v)
1163 // map[v] := v2
1164 // After that, if the "..." dest=u is not infinite-weight, append:
1165 // u2 = u
1166 // where:
1167 // u2 := linkTo(u)
1168 // map[u] := u2
1169 for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
1170 // Iterate over the top-level src vars. Don't bother to dig into
1171 // e.g. MemOperands because their vars should all be infinite-weight.
1172 // (This assumption would need to change if the pass were done
1173 // pre-lowering.)
1174 if (auto *SrcVar = llvm::dyn_cast<Variable>(Instr->getSrc(i))) {
1175 const bool SrcIsAllocable = isAllocable(SrcVar);
1176 if (SrcIsAllocable) {
1177 Variable *OldMapped = VarMap.get(SrcVar);
1178 if (IsUnconditionallyExecuted) {
1179 Variable *NewMapped = VarMap.makeLinked(SrcVar);
1180 Inst *Mov = Target->createLoweredMove(NewMapped, OldMapped);
1181 Insts.insert(Iter, Mov);
1182 }
1183 Instr->replaceSource(i, OldMapped);
1184 }
1185 }
1186 }
1187 // Transformation of Dest is the same as the "v=t:inf" case above.
1188 if (DestIsAllocable) {
1189 if (IsUnconditionallyExecuted) {
1190 Variable *NewMapped = VarMap.makeLinked(Dest);
1191 Inst *Mov = Target->createLoweredMove(NewMapped, Dest);
1192 Insts.insert(NextIter, Mov);
1193 } else {
1194 Variable *OldMapped = VarMap.get(Dest);
1195 Inst *Mov = Target->createLoweredMove(OldMapped, Dest);
1196 Mov->setDestRedefined();
1197 Insts.insert(NextIter, Mov);
1198 }
1199 }
1200 }
1201 // Splice in any preexisting links into the single chain.
1202 for (Variable *Var : LinkedToFixups) {
1203 VarMap.spliceBlockLocalLinkedToChain(Var);
1204 }
1205 }
1206 dump("After splitting local variables");
1207 }
1208
791 void Cfg::doArgLowering() { 1209 void Cfg::doArgLowering() {
792 TimerMarker T(TimerStack::TT_doArgLowering, this); 1210 TimerMarker T(TimerStack::TT_doArgLowering, this);
793 getTarget()->lowerArguments(); 1211 getTarget()->lowerArguments();
794 } 1212 }
795 1213
796 void Cfg::sortAndCombineAllocas(CfgVector<InstAlloca *> &Allocas, 1214 void Cfg::sortAndCombineAllocas(CfgVector<InstAlloca *> &Allocas,
797 uint32_t CombinedAlignment, InstList &Insts, 1215 uint32_t CombinedAlignment, InstList &Insts,
798 AllocaBaseVariableType BaseVariableType) { 1216 AllocaBaseVariableType BaseVariableType) {
799 if (Allocas.empty()) 1217 if (Allocas.empty())
800 return; 1218 return;
(...skipping 853 matching lines...) Expand 10 before | Expand all | Expand 10 after
1654 dump("After recomputing liveness for -decorate-asm"); 2072 dump("After recomputing liveness for -decorate-asm");
1655 } 2073 }
1656 OstreamLocker L(Ctx); 2074 OstreamLocker L(Ctx);
1657 Ostream &Str = Ctx->getStrEmit(); 2075 Ostream &Str = Ctx->getStrEmit();
1658 const Assembler *Asm = getAssembler<>(); 2076 const Assembler *Asm = getAssembler<>();
1659 const bool NeedSandboxing = getFlags().getUseSandboxing(); 2077 const bool NeedSandboxing = getFlags().getUseSandboxing();
1660 2078
1661 emitTextHeader(FunctionName, Ctx, Asm); 2079 emitTextHeader(FunctionName, Ctx, Asm);
1662 if (getFlags().getDecorateAsm()) { 2080 if (getFlags().getDecorateAsm()) {
1663 for (Variable *Var : getVariables()) { 2081 for (Variable *Var : getVariables()) {
1664 if (Var->hasStackOffset() && !Var->isRematerializable()) { 2082 if (Var->hasKnownStackOffset() && !Var->isRematerializable()) {
1665 Str << "\t" << Var->getSymbolicStackOffset() << " = " 2083 Str << "\t" << Var->getSymbolicStackOffset() << " = "
1666 << Var->getStackOffset() << "\n"; 2084 << Var->getStackOffset() << "\n";
1667 } 2085 }
1668 } 2086 }
1669 } 2087 }
1670 for (CfgNode *Node : Nodes) { 2088 for (CfgNode *Node : Nodes) {
1671 if (NeedSandboxing && Node->needsAlignment()) { 2089 if (NeedSandboxing && Node->needsAlignment()) {
1672 Str << "\t" << Asm->getAlignDirective() << " " 2090 Str << "\t" << Asm->getAlignDirective() << " "
1673 << Asm->getBundleAlignLog2Bytes() << "\n"; 2091 << Asm->getBundleAlignLog2Bytes() << "\n";
1674 } 2092 }
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
1767 } 2185 }
1768 } 2186 }
1769 // Print each basic block 2187 // Print each basic block
1770 for (CfgNode *Node : Nodes) 2188 for (CfgNode *Node : Nodes)
1771 Node->dump(this); 2189 Node->dump(this);
1772 if (isVerbose(IceV_Instructions)) 2190 if (isVerbose(IceV_Instructions))
1773 Str << "}\n"; 2191 Str << "}\n";
1774 } 2192 }
1775 2193
1776 } // end of namespace Ice 2194 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceCfg.h ('k') | src/IceClFlags.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698