OLD | NEW |
1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// | 1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
11 /// This file implements the CfgNode class, including the complexities | 11 /// This file implements the CfgNode class, including the complexities of |
12 /// of instruction insertion and in-edge calculation. | 12 /// instruction insertion and in-edge calculation. |
13 /// | 13 /// |
14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
15 | 15 |
16 #include "IceCfgNode.h" | 16 #include "IceCfgNode.h" |
17 | 17 |
18 #include "IceAssembler.h" | 18 #include "IceAssembler.h" |
19 #include "IceCfg.h" | 19 #include "IceCfg.h" |
20 #include "IceGlobalInits.h" | 20 #include "IceGlobalInits.h" |
21 #include "IceInst.h" | 21 #include "IceInst.h" |
22 #include "IceInstVarIter.h" | 22 #include "IceInstVarIter.h" |
23 #include "IceLiveness.h" | 23 #include "IceLiveness.h" |
24 #include "IceOperand.h" | 24 #include "IceOperand.h" |
25 #include "IceTargetLowering.h" | 25 #include "IceTargetLowering.h" |
26 | 26 |
27 namespace Ice { | 27 namespace Ice { |
28 | 28 |
29 CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber) | 29 CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber) |
30 : Func(Func), Number(LabelNumber), LabelNumber(LabelNumber) {} | 30 : Func(Func), Number(LabelNumber), LabelNumber(LabelNumber) {} |
31 | 31 |
32 // Returns the name the node was created with. If no name was given, | 32 // Returns the name the node was created with. If no name was given, it |
33 // it synthesizes a (hopefully) unique name. | 33 // synthesizes a (hopefully) unique name. |
34 IceString CfgNode::getName() const { | 34 IceString CfgNode::getName() const { |
35 if (NameIndex >= 0) | 35 if (NameIndex >= 0) |
36 return Func->getIdentifierName(NameIndex); | 36 return Func->getIdentifierName(NameIndex); |
37 return "__" + std::to_string(LabelNumber); | 37 return "__" + std::to_string(LabelNumber); |
38 } | 38 } |
39 | 39 |
40 // Adds an instruction to either the Phi list or the regular | 40 // Adds an instruction to either the Phi list or the regular instruction list. |
41 // instruction list. Validates that all Phis are added before all | 41 // Validates that all Phis are added before all regular instructions. |
42 // regular instructions. | |
43 void CfgNode::appendInst(Inst *Inst) { | 42 void CfgNode::appendInst(Inst *Inst) { |
44 ++InstCountEstimate; | 43 ++InstCountEstimate; |
45 if (InstPhi *Phi = llvm::dyn_cast<InstPhi>(Inst)) { | 44 if (InstPhi *Phi = llvm::dyn_cast<InstPhi>(Inst)) { |
46 if (!Insts.empty()) { | 45 if (!Insts.empty()) { |
47 Func->setError("Phi instruction added to the middle of a block"); | 46 Func->setError("Phi instruction added to the middle of a block"); |
48 return; | 47 return; |
49 } | 48 } |
50 Phis.push_back(Phi); | 49 Phis.push_back(Phi); |
51 } else { | 50 } else { |
52 Insts.push_back(Inst); | 51 Insts.push_back(Inst); |
53 } | 52 } |
54 } | 53 } |
55 | 54 |
56 // Renumbers the non-deleted instructions in the node. This needs to | 55 // Renumbers the non-deleted instructions in the node. This needs to be done in |
57 // be done in preparation for live range analysis. The instruction | 56 // preparation for live range analysis. The instruction numbers in a block must |
58 // numbers in a block must be monotonically increasing. The range of | 57 // be monotonically increasing. The range of instruction numbers in a block, |
59 // instruction numbers in a block, from lowest to highest, must not | 58 // from lowest to highest, must not overlap with the range of any other block. |
60 // overlap with the range of any other block. | |
61 void CfgNode::renumberInstructions() { | 59 void CfgNode::renumberInstructions() { |
62 InstNumberT FirstNumber = Func->getNextInstNumber(); | 60 InstNumberT FirstNumber = Func->getNextInstNumber(); |
63 for (Inst &I : Phis) | 61 for (Inst &I : Phis) |
64 I.renumber(Func); | 62 I.renumber(Func); |
65 for (Inst &I : Insts) | 63 for (Inst &I : Insts) |
66 I.renumber(Func); | 64 I.renumber(Func); |
67 InstCountEstimate = Func->getNextInstNumber() - FirstNumber; | 65 InstCountEstimate = Func->getNextInstNumber() - FirstNumber; |
68 } | 66 } |
69 | 67 |
70 // When a node is created, the OutEdges are immediately known, but the | 68 // When a node is created, the OutEdges are immediately known, but the InEdges |
71 // InEdges have to be built up incrementally. After the CFG has been | 69 // have to be built up incrementally. After the CFG has been constructed, the |
72 // constructed, the computePredecessors() pass finalizes it by | 70 // computePredecessors() pass finalizes it by creating the InEdges list. |
73 // creating the InEdges list. | |
74 void CfgNode::computePredecessors() { | 71 void CfgNode::computePredecessors() { |
75 for (CfgNode *Succ : OutEdges) | 72 for (CfgNode *Succ : OutEdges) |
76 Succ->InEdges.push_back(this); | 73 Succ->InEdges.push_back(this); |
77 } | 74 } |
78 | 75 |
79 void CfgNode::computeSuccessors() { | 76 void CfgNode::computeSuccessors() { |
80 OutEdges = Insts.rbegin()->getTerminatorEdges(); | 77 OutEdges = Insts.rbegin()->getTerminatorEdges(); |
81 } | 78 } |
82 | 79 |
83 // Validate each Phi instruction in the node with respect to control flow. For | 80 // Validate each Phi instruction in the node with respect to control flow. For |
84 // every phi argument, its label must appear in the predecessor list. For each | 81 // every phi argument, its label must appear in the predecessor list. For each |
85 // predecessor, there must be a phi argument with that label. We don't check | 82 // predecessor, there must be a phi argument with that label. We don't check |
86 // that phi arguments with the same label have the same value. | 83 // that phi arguments with the same label have the same value. |
87 void CfgNode::validatePhis() { | 84 void CfgNode::validatePhis() { |
88 for (Inst &Instr : Phis) { | 85 for (Inst &Instr : Phis) { |
89 auto *Phi = llvm::cast<InstPhi>(&Instr); | 86 auto *Phi = llvm::cast<InstPhi>(&Instr); |
90 // We do a simple O(N^2) algorithm to check for consistency. Even so, it | 87 // We do a simple O(N^2) algorithm to check for consistency. Even so, it |
91 // shows up as only about 0.2% of the total translation time. But if | 88 // shows up as only about 0.2% of the total translation time. But if |
92 // necessary, we could improve the complexity by using a hash table to count | 89 // necessary, we could improve the complexity by using a hash table to |
93 // how many times each node is referenced in the Phi instruction, and how | 90 // count how many times each node is referenced in the Phi instruction, and |
94 // many times each node is referenced in the incoming edge list, and compare | 91 // how many times each node is referenced in the incoming edge list, and |
95 // the two for equality. | 92 // compare the two for equality. |
96 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { | 93 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { |
97 CfgNode *Label = Phi->getLabel(i); | 94 CfgNode *Label = Phi->getLabel(i); |
98 bool Found = false; | 95 bool Found = false; |
99 for (CfgNode *InNode : getInEdges()) { | 96 for (CfgNode *InNode : getInEdges()) { |
100 if (InNode == Label) { | 97 if (InNode == Label) { |
101 Found = true; | 98 Found = true; |
102 break; | 99 break; |
103 } | 100 } |
104 } | 101 } |
105 if (!Found) | 102 if (!Found) |
106 llvm::report_fatal_error("Phi error: label for bad incoming edge"); | 103 llvm::report_fatal_error("Phi error: label for bad incoming edge"); |
107 } | 104 } |
108 for (CfgNode *InNode : getInEdges()) { | 105 for (CfgNode *InNode : getInEdges()) { |
109 bool Found = false; | 106 bool Found = false; |
110 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { | 107 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { |
111 CfgNode *Label = Phi->getLabel(i); | 108 CfgNode *Label = Phi->getLabel(i); |
112 if (InNode == Label) { | 109 if (InNode == Label) { |
113 Found = true; | 110 Found = true; |
114 break; | 111 break; |
115 } | 112 } |
116 } | 113 } |
117 if (!Found) | 114 if (!Found) |
118 llvm::report_fatal_error("Phi error: missing label for incoming edge"); | 115 llvm::report_fatal_error("Phi error: missing label for incoming edge"); |
119 } | 116 } |
120 } | 117 } |
121 } | 118 } |
122 | 119 |
123 // This does part 1 of Phi lowering, by creating a new dest variable | 120 // This does part 1 of Phi lowering, by creating a new dest variable for each |
124 // for each Phi instruction, replacing the Phi instruction's dest with | 121 // Phi instruction, replacing the Phi instruction's dest with that variable, |
125 // that variable, and adding an explicit assignment of the old dest to | 122 // and adding an explicit assignment of the old dest to the new dest. For |
126 // the new dest. For example, | 123 // example, |
127 // a=phi(...) | 124 // a=phi(...) |
128 // changes to | 125 // changes to |
129 // "a_phi=phi(...); a=a_phi". | 126 // "a_phi=phi(...); a=a_phi". |
130 // | 127 // |
131 // This is in preparation for part 2 which deletes the Phi | 128 // This is in preparation for part 2 which deletes the Phi instructions and |
132 // instructions and appends assignment instructions to predecessor | 129 // appends assignment instructions to predecessor blocks. Note that this |
133 // blocks. Note that this transformation preserves SSA form. | 130 // transformation preserves SSA form. |
134 void CfgNode::placePhiLoads() { | 131 void CfgNode::placePhiLoads() { |
135 for (Inst &I : Phis) { | 132 for (Inst &I : Phis) { |
136 auto Phi = llvm::dyn_cast<InstPhi>(&I); | 133 auto Phi = llvm::dyn_cast<InstPhi>(&I); |
137 Insts.insert(Insts.begin(), Phi->lower(Func)); | 134 Insts.insert(Insts.begin(), Phi->lower(Func)); |
138 } | 135 } |
139 } | 136 } |
140 | 137 |
141 // This does part 2 of Phi lowering. For each Phi instruction at each | 138 // This does part 2 of Phi lowering. For each Phi instruction at each out-edge, |
142 // out-edge, create a corresponding assignment instruction, and add | 139 // create a corresponding assignment instruction, and add all the assignments |
143 // all the assignments near the end of this block. They need to be | 140 // near the end of this block. They need to be added before any branch |
144 // added before any branch instruction, and also if the block ends | 141 // instruction, and also if the block ends with a compare instruction followed |
145 // with a compare instruction followed by a branch instruction that we | 142 // by a branch instruction that we may want to fuse, it's better to insert the |
146 // may want to fuse, it's better to insert the new assignments before | 143 // new assignments before the compare instruction. The |
147 // the compare instruction. The tryOptimizedCmpxchgCmpBr() method | 144 // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions. |
148 // assumes this ordering of instructions. | |
149 // | 145 // |
150 // Note that this transformation takes the Phi dest variables out of | 146 // Note that this transformation takes the Phi dest variables out of SSA form, |
151 // SSA form, as there may be assignments to the dest variable in | 147 // as there may be assignments to the dest variable in multiple blocks. |
152 // multiple blocks. | |
153 void CfgNode::placePhiStores() { | 148 void CfgNode::placePhiStores() { |
154 // Find the insertion point. | 149 // Find the insertion point. |
155 InstList::iterator InsertionPoint = Insts.end(); | 150 InstList::iterator InsertionPoint = Insts.end(); |
156 // Every block must end in a terminator instruction, and therefore | 151 // Every block must end in a terminator instruction, and therefore must have |
157 // must have at least one instruction, so it's valid to decrement | 152 // at least one instruction, so it's valid to decrement InsertionPoint (but |
158 // InsertionPoint (but assert just in case). | 153 // assert just in case). |
159 assert(InsertionPoint != Insts.begin()); | 154 assert(InsertionPoint != Insts.begin()); |
160 --InsertionPoint; | 155 --InsertionPoint; |
161 // Confirm that InsertionPoint is a terminator instruction. Calling | 156 // Confirm that InsertionPoint is a terminator instruction. Calling |
162 // getTerminatorEdges() on a non-terminator instruction will cause | 157 // getTerminatorEdges() on a non-terminator instruction will cause an |
163 // an llvm_unreachable(). | 158 // llvm_unreachable(). |
164 (void)InsertionPoint->getTerminatorEdges(); | 159 (void)InsertionPoint->getTerminatorEdges(); |
165 // SafeInsertionPoint is always immediately before the terminator | 160 // SafeInsertionPoint is always immediately before the terminator |
166 // instruction. If the block ends in a compare and conditional | 161 // instruction. If the block ends in a compare and conditional branch, it's |
167 // branch, it's better to place the Phi store before the compare so | 162 // better to place the Phi store before the compare so as not to interfere |
168 // as not to interfere with compare/branch fusing. However, if the | 163 // with compare/branch fusing. However, if the compare instruction's dest |
169 // compare instruction's dest operand is the same as the new | 164 // operand is the same as the new assignment statement's source operand, this |
170 // assignment statement's source operand, this can't be done due to | 165 // can't be done due to data dependences, so we need to fall back to the |
171 // data dependences, so we need to fall back to the | 166 // SafeInsertionPoint. To illustrate: |
172 // SafeInsertionPoint. To illustrate: | |
173 // ; <label>:95 | 167 // ; <label>:95 |
174 // %97 = load i8* %96, align 1 | 168 // %97 = load i8* %96, align 1 |
175 // %98 = icmp ne i8 %97, 0 | 169 // %98 = icmp ne i8 %97, 0 |
176 // br i1 %98, label %99, label %2132 | 170 // br i1 %98, label %99, label %2132 |
177 // ; <label>:99 | 171 // ; <label>:99 |
178 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ] | 172 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ] |
179 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ] | 173 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ] |
180 // would be Phi-lowered as: | 174 // would be Phi-lowered as: |
181 // ; <label>:95 | 175 // ; <label>:95 |
182 // %97 = load i8* %96, align 1 | 176 // %97 = load i8* %96, align 1 |
183 // %100_phi = %97 ; can be at InsertionPoint | 177 // %100_phi = %97 ; can be at InsertionPoint |
184 // %98 = icmp ne i8 %97, 0 | 178 // %98 = icmp ne i8 %97, 0 |
185 // %101_phi = %98 ; must be at SafeInsertionPoint | 179 // %101_phi = %98 ; must be at SafeInsertionPoint |
186 // br i1 %98, label %99, label %2132 | 180 // br i1 %98, label %99, label %2132 |
187 // ; <label>:99 | 181 // ; <label>:99 |
188 // %100 = %100_phi | 182 // %100 = %100_phi |
189 // %101 = %101_phi | 183 // %101 = %101_phi |
190 // | 184 // |
191 // TODO(stichnot): It may be possible to bypass this whole | 185 // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint |
192 // SafeInsertionPoint mechanism. If a source basic block ends in a | 186 // mechanism. If a source basic block ends in a conditional branch: |
193 // conditional branch: | |
194 // labelSource: | 187 // labelSource: |
195 // ... | 188 // ... |
196 // br i1 %foo, label %labelTrue, label %labelFalse | 189 // br i1 %foo, label %labelTrue, label %labelFalse |
197 // and a branch target has a Phi involving the branch operand: | 190 // and a branch target has a Phi involving the branch operand: |
198 // labelTrue: | 191 // labelTrue: |
199 // %bar = phi i1 [ %foo, %labelSource ], ... | 192 // %bar = phi i1 [ %foo, %labelSource ], ... |
200 // then we actually know the constant i1 value of the Phi operand: | 193 // then we actually know the constant i1 value of the Phi operand: |
201 // labelTrue: | 194 // labelTrue: |
202 // %bar = phi i1 [ true, %labelSource ], ... | 195 // %bar = phi i1 [ true, %labelSource ], ... |
203 // It seems that this optimization should be done by clang or opt, | 196 // It seems that this optimization should be done by clang or opt, but we |
204 // but we could also do it here. | 197 // could also do it here. |
205 InstList::iterator SafeInsertionPoint = InsertionPoint; | 198 InstList::iterator SafeInsertionPoint = InsertionPoint; |
206 // Keep track of the dest variable of a compare instruction, so that | 199 // Keep track of the dest variable of a compare instruction, so that we |
207 // we insert the new instruction at the SafeInsertionPoint if the | 200 // insert the new instruction at the SafeInsertionPoint if the compare's dest |
208 // compare's dest matches the Phi-lowered assignment's source. | 201 // matches the Phi-lowered assignment's source. |
209 Variable *CmpInstDest = nullptr; | 202 Variable *CmpInstDest = nullptr; |
210 // If the current insertion point is at a conditional branch | 203 // If the current insertion point is at a conditional branch instruction, and |
211 // instruction, and the previous instruction is a compare | 204 // the previous instruction is a compare instruction, then we move the |
212 // instruction, then we move the insertion point before the compare | 205 // insertion point before the compare instruction so as not to interfere with |
213 // instruction so as not to interfere with compare/branch fusing. | 206 // compare/branch fusing. |
214 if (InstBr *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) { | 207 if (InstBr *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) { |
215 if (!Branch->isUnconditional()) { | 208 if (!Branch->isUnconditional()) { |
216 if (InsertionPoint != Insts.begin()) { | 209 if (InsertionPoint != Insts.begin()) { |
217 --InsertionPoint; | 210 --InsertionPoint; |
218 if (llvm::isa<InstIcmp>(InsertionPoint) || | 211 if (llvm::isa<InstIcmp>(InsertionPoint) || |
219 llvm::isa<InstFcmp>(InsertionPoint)) { | 212 llvm::isa<InstFcmp>(InsertionPoint)) { |
220 CmpInstDest = InsertionPoint->getDest(); | 213 CmpInstDest = InsertionPoint->getDest(); |
221 } else { | 214 } else { |
222 ++InsertionPoint; | 215 ++InsertionPoint; |
223 } | 216 } |
(...skipping 18 matching lines...) Expand all Loading... |
242 } | 235 } |
243 } | 236 } |
244 } | 237 } |
245 | 238 |
246 // Deletes the phi instructions after the loads and stores are placed. | 239 // Deletes the phi instructions after the loads and stores are placed. |
247 void CfgNode::deletePhis() { | 240 void CfgNode::deletePhis() { |
248 for (Inst &I : Phis) | 241 for (Inst &I : Phis) |
249 I.setDeleted(); | 242 I.setDeleted(); |
250 } | 243 } |
251 | 244 |
252 // Splits the edge from Pred to this node by creating a new node and | 245 // Splits the edge from Pred to this node by creating a new node and hooking up |
253 // hooking up the in and out edges appropriately. (The EdgeIndex | 246 // the in and out edges appropriately. (The EdgeIndex parameter is only used to |
254 // parameter is only used to make the new node's name unique when | 247 // make the new node's name unique when there are multiple edges between the |
255 // there are multiple edges between the same pair of nodes.) The new | 248 // same pair of nodes.) The new node's instruction list is initialized to the |
256 // node's instruction list is initialized to the empty list, with no | 249 // empty list, with no terminator instruction. There must not be multiple edges |
257 // terminator instruction. There must not be multiple edges from Pred | 250 // from Pred to this node so all Inst::getTerminatorEdges implementations must |
258 // to this node so all Inst::getTerminatorEdges implementations must | |
259 // not contain duplicates. | 251 // not contain duplicates. |
260 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) { | 252 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) { |
261 CfgNode *NewNode = Func->makeNode(); | 253 CfgNode *NewNode = Func->makeNode(); |
262 // Depth is the minimum as it works if both are the same, but if one is | 254 // Depth is the minimum as it works if both are the same, but if one is |
263 // outside the loop and the other is inside, the new node should be placed | 255 // outside the loop and the other is inside, the new node should be placed |
264 // outside and not be executed multiple times within the loop. | 256 // outside and not be executed multiple times within the loop. |
265 NewNode->setLoopNestDepth( | 257 NewNode->setLoopNestDepth( |
266 std::min(getLoopNestDepth(), Pred->getLoopNestDepth())); | 258 std::min(getLoopNestDepth(), Pred->getLoopNestDepth())); |
267 if (BuildDefs::dump()) | 259 if (BuildDefs::dump()) |
268 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" + | 260 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" + |
269 std::to_string(EdgeIndex)); | 261 std::to_string(EdgeIndex)); |
270 // The new node is added to the end of the node list, and will later | 262 // The new node is added to the end of the node list, and will later need to |
271 // need to be sorted into a reasonable topological order. | 263 // be sorted into a reasonable topological order. |
272 NewNode->setNeedsPlacement(true); | 264 NewNode->setNeedsPlacement(true); |
273 // Repoint Pred's out-edge. | 265 // Repoint Pred's out-edge. |
274 bool Found = false; | 266 bool Found = false; |
275 for (CfgNode *&I : Pred->OutEdges) { | 267 for (CfgNode *&I : Pred->OutEdges) { |
276 if (I == this) { | 268 if (I == this) { |
277 I = NewNode; | 269 I = NewNode; |
278 NewNode->InEdges.push_back(Pred); | 270 NewNode->InEdges.push_back(Pred); |
279 Found = true; | 271 Found = true; |
280 break; | 272 break; |
281 } | 273 } |
(...skipping 30 matching lines...) Expand all Loading... |
312 return true; | 304 return true; |
313 if (const auto Var2 = llvm::dyn_cast<Variable>(Opnd)) { | 305 if (const auto Var2 = llvm::dyn_cast<Variable>(Opnd)) { |
314 if (Var->hasReg() && Var->getRegNum() == Var2->getRegNum()) | 306 if (Var->hasReg() && Var->getRegNum() == Var2->getRegNum()) |
315 return true; | 307 return true; |
316 } | 308 } |
317 return false; | 309 return false; |
318 } | 310 } |
319 | 311 |
320 } // end of anonymous namespace | 312 } // end of anonymous namespace |
321 | 313 |
322 // This the "advanced" version of Phi lowering for a basic block, in contrast to | 314 // This the "advanced" version of Phi lowering for a basic block, in contrast |
323 // the simple version that lowers through assignments involving temporaries. | 315 // to the simple version that lowers through assignments involving temporaries. |
324 // | 316 // |
325 // All Phi instructions in a basic block are conceptually executed in parallel. | 317 // All Phi instructions in a basic block are conceptually executed in parallel. |
326 // However, if we lower Phis early and commit to a sequential ordering, we may | 318 // However, if we lower Phis early and commit to a sequential ordering, we may |
327 // end up creating unnecessary interferences which lead to worse register | 319 // end up creating unnecessary interferences which lead to worse register |
328 // allocation. Delaying Phi scheduling until after register allocation can help | 320 // allocation. Delaying Phi scheduling until after register allocation can help |
329 // unless there are no free registers for shuffling registers or stack slots and | 321 // unless there are no free registers for shuffling registers or stack slots |
330 // spilling becomes necessary. | 322 // and spilling becomes necessary. |
331 // | 323 // |
332 // The advanced Phi lowering starts by finding a topological sort of the Phi | 324 // The advanced Phi lowering starts by finding a topological sort of the Phi |
333 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on B. | 325 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on |
334 // Preexisting register assignments are considered in the topological sort. If | 326 // B. Preexisting register assignments are considered in the topological sort. |
335 // a topological sort is not possible due to a cycle, the cycle is broken by | 327 // If a topological sort is not possible due to a cycle, the cycle is broken by |
336 // introducing a non-parallel temporary. For example, a cycle arising from a | 328 // introducing a non-parallel temporary. For example, a cycle arising from a |
337 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being | 329 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being |
338 // equal, prefer to schedule assignments with register-allocated Src operands | 330 // equal, prefer to schedule assignments with register-allocated Src operands |
339 // earlier, in case that register becomes free afterwards, and prefer to | 331 // earlier, in case that register becomes free afterwards, and prefer to |
340 // schedule assignments with register-allocated Dest variables later, to keep | 332 // schedule assignments with register-allocated Dest variables later, to keep |
341 // that register free for longer. | 333 // that register free for longer. |
342 // | 334 // |
343 // Once the ordering is determined, the Cfg edge is split and the assignment | 335 // Once the ordering is determined, the Cfg edge is split and the assignment |
344 // list is lowered by the target lowering layer. Since the assignment lowering | 336 // list is lowered by the target lowering layer. Since the assignment lowering |
345 // may create new infinite-weight temporaries, a follow-on register allocation | 337 // may create new infinite-weight temporaries, a follow-on register allocation |
346 // pass will be needed. To prepare for this, liveness (including live range | 338 // pass will be needed. To prepare for this, liveness (including live range |
347 // calculation) of the split nodes needs to be calculated, and liveness of the | 339 // calculation) of the split nodes needs to be calculated, and liveness of the |
348 // original node need to be updated to "undo" the effects of the phi | 340 // original node need to be updated to "undo" the effects of the phi |
349 // assignments. | 341 // assignments. |
350 | 342 |
351 // The specific placement of the new node within the Cfg node list is deferred | 343 // The specific placement of the new node within the Cfg node list is deferred |
352 // until later, including after empty node contraction. | 344 // until later, including after empty node contraction. |
353 // | 345 // |
354 // After phi assignments are lowered across all blocks, another register | 346 // After phi assignments are lowered across all blocks, another register |
355 // allocation pass is run, focusing only on pre-colored and infinite-weight | 347 // allocation pass is run, focusing only on pre-colored and infinite-weight |
356 // variables, similar to Om1 register allocation (except without the need to | 348 // variables, similar to Om1 register allocation (except without the need to |
357 // specially compute these variables' live ranges, since they have already been | 349 // specially compute these variables' live ranges, since they have already been |
358 // precisely calculated). The register allocator in this mode needs the ability | 350 // precisely calculated). The register allocator in this mode needs the ability |
359 // to forcibly spill and reload registers in case none are naturally available. | 351 // to forcibly spill and reload registers in case none are naturally available. |
360 void CfgNode::advancedPhiLowering() { | 352 void CfgNode::advancedPhiLowering() { |
361 if (getPhis().empty()) | 353 if (getPhis().empty()) |
362 return; | 354 return; |
363 | 355 |
364 // Count the number of non-deleted Phi instructions. | 356 // Count the number of non-deleted Phi instructions. |
365 struct PhiDesc { | 357 struct PhiDesc { |
366 InstPhi *Phi; | 358 InstPhi *Phi; |
367 Variable *Dest; | 359 Variable *Dest; |
368 Operand *Src; | 360 Operand *Src; |
(...skipping 27 matching lines...) Expand all Loading... |
396 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); | 388 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); |
397 SizeT Remaining = NumPhis; | 389 SizeT Remaining = NumPhis; |
398 | 390 |
399 // First pass computes Src and initializes NumPred. | 391 // First pass computes Src and initializes NumPred. |
400 for (size_t I = 0; I < NumPhis; ++I) { | 392 for (size_t I = 0; I < NumPhis; ++I) { |
401 Variable *Dest = Desc[I].Dest; | 393 Variable *Dest = Desc[I].Dest; |
402 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred); | 394 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred); |
403 Desc[I].Src = Src; | 395 Desc[I].Src = Src; |
404 Desc[I].Processed = false; | 396 Desc[I].Processed = false; |
405 Desc[I].NumPred = 0; | 397 Desc[I].NumPred = 0; |
406 // Cherry-pick any trivial assignments, so that they don't | 398 // Cherry-pick any trivial assignments, so that they don't contribute to |
407 // contribute to the running complexity of the topological sort. | 399 // the running complexity of the topological sort. |
408 if (sameVarOrReg(Dest, Src)) { | 400 if (sameVarOrReg(Dest, Src)) { |
409 Desc[I].Processed = true; | 401 Desc[I].Processed = true; |
410 --Remaining; | 402 --Remaining; |
411 if (Dest != Src) | 403 if (Dest != Src) |
412 // If Dest and Src are syntactically the same, don't bother | 404 // If Dest and Src are syntactically the same, don't bother adding |
413 // adding the assignment, because in all respects it would | 405 // the assignment, because in all respects it would be redundant, and |
414 // be redundant, and if Dest/Src are on the stack, the | 406 // if Dest/Src are on the stack, the target lowering may naively |
415 // target lowering may naively decide to lower it using a | 407 // decide to lower it using a temporary register. |
416 // temporary register. | |
417 Split->appendInst(InstAssign::create(Func, Dest, Src)); | 408 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
418 } | 409 } |
419 } | 410 } |
420 // Second pass computes NumPred by comparing every pair of Phi | 411 // Second pass computes NumPred by comparing every pair of Phi |
421 // instructions. | 412 // instructions. |
422 for (size_t I = 0; I < NumPhis; ++I) { | 413 for (size_t I = 0; I < NumPhis; ++I) { |
423 if (Desc[I].Processed) | 414 if (Desc[I].Processed) |
424 continue; | 415 continue; |
425 const Variable *Dest = Desc[I].Dest; | 416 const Variable *Dest = Desc[I].Dest; |
426 for (size_t J = 0; J < NumPhis; ++J) { | 417 for (size_t J = 0; J < NumPhis; ++J) { |
427 if (Desc[J].Processed) | 418 if (Desc[J].Processed) |
428 continue; | 419 continue; |
429 if (I != J) { | 420 if (I != J) { |
430 // There shouldn't be two Phis with the same Dest variable | 421 // There shouldn't be two Phis with the same Dest variable or |
431 // or register. | 422 // register. |
432 assert(!sameVarOrReg(Dest, Desc[J].Dest)); | 423 assert(!sameVarOrReg(Dest, Desc[J].Dest)); |
433 } | 424 } |
434 const Operand *Src = Desc[J].Src; | 425 const Operand *Src = Desc[J].Src; |
435 if (sameVarOrReg(Dest, Src)) | 426 if (sameVarOrReg(Dest, Src)) |
436 ++Desc[I].NumPred; | 427 ++Desc[I].NumPred; |
437 } | 428 } |
438 } | 429 } |
439 | 430 |
440 // Another pass to compute initial Weight values. | 431 // Another pass to compute initial Weight values. |
441 | 432 |
442 // Always pick NumPred=0 over NumPred>0. | 433 // Always pick NumPred=0 over NumPred>0. |
443 constexpr int32_t WeightNoPreds = 4; | 434 constexpr int32_t WeightNoPreds = 4; |
444 // Prefer Src as a register because the register might free up. | 435 // Prefer Src as a register because the register might free up. |
445 constexpr int32_t WeightSrcIsReg = 2; | 436 constexpr int32_t WeightSrcIsReg = 2; |
446 // Prefer Dest not as a register because the register stays free | 437 // Prefer Dest not as a register because the register stays free longer. |
447 // longer. | |
448 constexpr int32_t WeightDestNotReg = 1; | 438 constexpr int32_t WeightDestNotReg = 1; |
449 | 439 |
450 for (size_t I = 0; I < NumPhis; ++I) { | 440 for (size_t I = 0; I < NumPhis; ++I) { |
451 if (Desc[I].Processed) | 441 if (Desc[I].Processed) |
452 continue; | 442 continue; |
453 int32_t Weight = 0; | 443 int32_t Weight = 0; |
454 if (Desc[I].NumPred == 0) | 444 if (Desc[I].NumPred == 0) |
455 Weight += WeightNoPreds; | 445 Weight += WeightNoPreds; |
456 if (auto Var = llvm::dyn_cast<Variable>(Desc[I].Src)) | 446 if (auto Var = llvm::dyn_cast<Variable>(Desc[I].Src)) |
457 if (Var->hasReg()) | 447 if (Var->hasReg()) |
458 Weight += WeightSrcIsReg; | 448 Weight += WeightSrcIsReg; |
459 if (!Desc[I].Dest->hasReg()) | 449 if (!Desc[I].Dest->hasReg()) |
460 Weight += WeightDestNotReg; | 450 Weight += WeightDestNotReg; |
461 Desc[I].Weight = Weight; | 451 Desc[I].Weight = Weight; |
462 } | 452 } |
463 | 453 |
464 // Repeatedly choose and process the best candidate in the | 454 // Repeatedly choose and process the best candidate in the topological |
465 // topological sort, until no candidates remain. This | 455 // sort, until no candidates remain. This implementation is O(N^2) where N |
466 // implementation is O(N^2) where N is the number of Phi | 456 // is the number of Phi instructions, but with a small constant factor |
467 // instructions, but with a small constant factor compared to a | 457 // compared to a likely implementation of O(N) topological sort. |
468 // likely implementation of O(N) topological sort. | |
469 for (; Remaining; --Remaining) { | 458 for (; Remaining; --Remaining) { |
470 size_t BestIndex = 0; | 459 size_t BestIndex = 0; |
471 int32_t BestWeight = -1; | 460 int32_t BestWeight = -1; |
472 // Find the best candidate. | 461 // Find the best candidate. |
473 for (size_t I = 0; I < NumPhis; ++I) { | 462 for (size_t I = 0; I < NumPhis; ++I) { |
474 if (Desc[I].Processed) | 463 if (Desc[I].Processed) |
475 continue; | 464 continue; |
476 int32_t Weight = 0; | 465 int32_t Weight = 0; |
477 Weight = Desc[I].Weight; | 466 Weight = Desc[I].Weight; |
478 if (Weight > BestWeight) { | 467 if (Weight > BestWeight) { |
479 BestIndex = I; | 468 BestIndex = I; |
480 BestWeight = Weight; | 469 BestWeight = Weight; |
481 } | 470 } |
482 } | 471 } |
483 assert(BestWeight >= 0); | 472 assert(BestWeight >= 0); |
484 assert(Desc[BestIndex].NumPred <= 1); | 473 assert(Desc[BestIndex].NumPred <= 1); |
485 Variable *Dest = Desc[BestIndex].Dest; | 474 Variable *Dest = Desc[BestIndex].Dest; |
486 Operand *Src = Desc[BestIndex].Src; | 475 Operand *Src = Desc[BestIndex].Src; |
487 assert(!sameVarOrReg(Dest, Src)); | 476 assert(!sameVarOrReg(Dest, Src)); |
488 // Break a cycle by introducing a temporary. | 477 // Break a cycle by introducing a temporary. |
489 if (Desc[BestIndex].NumPred) { | 478 if (Desc[BestIndex].NumPred) { |
490 bool Found = false; | 479 bool Found = false; |
491 // If the target instruction "A=B" is part of a cycle, find | 480 // If the target instruction "A=B" is part of a cycle, find the "X=A" |
492 // the "X=A" assignment in the cycle because it will have to | 481 // assignment in the cycle because it will have to be rewritten as |
493 // be rewritten as "X=tmp". | 482 // "X=tmp". |
494 for (size_t J = 0; !Found && J < NumPhis; ++J) { | 483 for (size_t J = 0; !Found && J < NumPhis; ++J) { |
495 if (Desc[J].Processed) | 484 if (Desc[J].Processed) |
496 continue; | 485 continue; |
497 Operand *OtherSrc = Desc[J].Src; | 486 Operand *OtherSrc = Desc[J].Src; |
498 if (Desc[J].NumPred && sameVarOrReg(Dest, OtherSrc)) { | 487 if (Desc[J].NumPred && sameVarOrReg(Dest, OtherSrc)) { |
499 SizeT VarNum = Func->getNumVariables(); | 488 SizeT VarNum = Func->getNumVariables(); |
500 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); | 489 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); |
501 if (BuildDefs::dump()) | 490 if (BuildDefs::dump()) |
502 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); | 491 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); |
503 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); | 492 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); |
504 Desc[J].Src = Tmp; | 493 Desc[J].Src = Tmp; |
505 Found = true; | 494 Found = true; |
506 } | 495 } |
507 } | 496 } |
508 assert(Found); | 497 assert(Found); |
509 } | 498 } |
510 // Now that a cycle (if any) has been broken, create the actual | 499 // Now that a cycle (if any) has been broken, create the actual |
511 // assignment. | 500 // assignment. |
512 Split->appendInst(InstAssign::create(Func, Dest, Src)); | 501 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
513 // Update NumPred for all Phi assignments using this Phi's Src | 502 // Update NumPred for all Phi assignments using this Phi's Src as their |
514 // as their Dest variable. Also update Weight if NumPred | 503 // Dest variable. Also update Weight if NumPred dropped from 1 to 0. |
515 // dropped from 1 to 0. | |
516 if (auto Var = llvm::dyn_cast<Variable>(Src)) { | 504 if (auto Var = llvm::dyn_cast<Variable>(Src)) { |
517 for (size_t I = 0; I < NumPhis; ++I) { | 505 for (size_t I = 0; I < NumPhis; ++I) { |
518 if (Desc[I].Processed) | 506 if (Desc[I].Processed) |
519 continue; | 507 continue; |
520 if (sameVarOrReg(Var, Desc[I].Dest)) { | 508 if (sameVarOrReg(Var, Desc[I].Dest)) { |
521 if (--Desc[I].NumPred == 0) | 509 if (--Desc[I].NumPred == 0) |
522 Desc[I].Weight += WeightNoPreds; | 510 Desc[I].Weight += WeightNoPreds; |
523 } | 511 } |
524 } | 512 } |
525 } | 513 } |
526 Desc[BestIndex].Processed = true; | 514 Desc[BestIndex].Processed = true; |
527 } | 515 } |
528 Split->appendInst(InstBr::create(Func, this)); | 516 Split->appendInst(InstBr::create(Func, this)); |
529 | 517 |
530 Split->genCode(); | 518 Split->genCode(); |
531 Func->getVMetadata()->addNode(Split); | 519 Func->getVMetadata()->addNode(Split); |
532 } | 520 } |
533 } | 521 } |
534 | 522 |
535 // Does address mode optimization. Pass each instruction to the | 523 // Does address mode optimization. Pass each instruction to the TargetLowering |
536 // TargetLowering object. If it returns a new instruction | 524 // object. If it returns a new instruction (representing the optimized address |
537 // (representing the optimized address mode), then insert the new | 525 // mode), then insert the new instruction and delete the old. |
538 // instruction and delete the old. | |
539 void CfgNode::doAddressOpt() { | 526 void CfgNode::doAddressOpt() { |
540 TargetLowering *Target = Func->getTarget(); | 527 TargetLowering *Target = Func->getTarget(); |
541 LoweringContext &Context = Target->getContext(); | 528 LoweringContext &Context = Target->getContext(); |
542 Context.init(this); | 529 Context.init(this); |
543 while (!Context.atEnd()) { | 530 while (!Context.atEnd()) { |
544 Target->doAddressOpt(); | 531 Target->doAddressOpt(); |
545 } | 532 } |
546 } | 533 } |
547 | 534 |
548 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) { | 535 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) { |
(...skipping 11 matching lines...) Expand all Loading... |
560 } | 547 } |
561 if (!PauseNopInsertion) | 548 if (!PauseNopInsertion) |
562 Target->doNopInsertion(RNG); | 549 Target->doNopInsertion(RNG); |
563 // Ensure Cur=Next, so that the nops are inserted before the current | 550 // Ensure Cur=Next, so that the nops are inserted before the current |
564 // instruction rather than after. | 551 // instruction rather than after. |
565 Context.advanceCur(); | 552 Context.advanceCur(); |
566 Context.advanceNext(); | 553 Context.advanceNext(); |
567 } | 554 } |
568 } | 555 } |
569 | 556 |
570 // Drives the target lowering. Passes the current instruction and the | 557 // Drives the target lowering. Passes the current instruction and the next |
571 // next non-deleted instruction for target lowering. | 558 // non-deleted instruction for target lowering. |
572 void CfgNode::genCode() { | 559 void CfgNode::genCode() { |
573 TargetLowering *Target = Func->getTarget(); | 560 TargetLowering *Target = Func->getTarget(); |
574 LoweringContext &Context = Target->getContext(); | 561 LoweringContext &Context = Target->getContext(); |
575 // Lower the regular instructions. | 562 // Lower the regular instructions. |
576 Context.init(this); | 563 Context.init(this); |
577 Target->initNodeForLowering(this); | 564 Target->initNodeForLowering(this); |
578 while (!Context.atEnd()) { | 565 while (!Context.atEnd()) { |
579 InstList::iterator Orig = Context.getCur(); | 566 InstList::iterator Orig = Context.getCur(); |
580 if (llvm::isa<InstRet>(*Orig)) | 567 if (llvm::isa<InstRet>(*Orig)) |
581 setHasReturn(); | 568 setHasReturn(); |
(...skipping 14 matching lines...) Expand all Loading... |
596 continue; | 583 continue; |
597 I.livenessLightweight(Func, Live); | 584 I.livenessLightweight(Func, Live); |
598 } | 585 } |
599 for (Inst &I : Phis) { | 586 for (Inst &I : Phis) { |
600 if (I.isDeleted()) | 587 if (I.isDeleted()) |
601 continue; | 588 continue; |
602 I.livenessLightweight(Func, Live); | 589 I.livenessLightweight(Func, Live); |
603 } | 590 } |
604 } | 591 } |
605 | 592 |
606 // Performs liveness analysis on the block. Returns true if the | 593 // Performs liveness analysis on the block. Returns true if the incoming |
607 // incoming liveness changed from before, false if it stayed the same. | 594 // liveness changed from before, false if it stayed the same. (If it changes, |
608 // (If it changes, the node's predecessors need to be processed | 595 // the node's predecessors need to be processed again.) |
609 // again.) | |
610 bool CfgNode::liveness(Liveness *Liveness) { | 596 bool CfgNode::liveness(Liveness *Liveness) { |
611 SizeT NumVars = Liveness->getNumVarsInNode(this); | 597 SizeT NumVars = Liveness->getNumVarsInNode(this); |
612 LivenessBV Live(NumVars); | 598 LivenessBV Live(NumVars); |
613 LiveBeginEndMap *LiveBegin = nullptr; | 599 LiveBeginEndMap *LiveBegin = nullptr; |
614 LiveBeginEndMap *LiveEnd = nullptr; | 600 LiveBeginEndMap *LiveEnd = nullptr; |
615 // Mark the beginning and ending of each variable's live range | 601 // Mark the beginning and ending of each variable's live range with the |
616 // with the sentinel instruction number 0. | 602 // sentinel instruction number 0. |
617 if (Liveness->getMode() == Liveness_Intervals) { | 603 if (Liveness->getMode() == Liveness_Intervals) { |
618 LiveBegin = Liveness->getLiveBegin(this); | 604 LiveBegin = Liveness->getLiveBegin(this); |
619 LiveEnd = Liveness->getLiveEnd(this); | 605 LiveEnd = Liveness->getLiveEnd(this); |
620 LiveBegin->clear(); | 606 LiveBegin->clear(); |
621 LiveEnd->clear(); | 607 LiveEnd->clear(); |
622 // Guess that the number of live ranges beginning is roughly the | 608 // Guess that the number of live ranges beginning is roughly the number of |
623 // number of instructions, and same for live ranges ending. | 609 // instructions, and same for live ranges ending. |
624 LiveBegin->reserve(getInstCountEstimate()); | 610 LiveBegin->reserve(getInstCountEstimate()); |
625 LiveEnd->reserve(getInstCountEstimate()); | 611 LiveEnd->reserve(getInstCountEstimate()); |
626 } | 612 } |
627 // Initialize Live to be the union of all successors' LiveIn. | 613 // Initialize Live to be the union of all successors' LiveIn. |
628 for (CfgNode *Succ : OutEdges) { | 614 for (CfgNode *Succ : OutEdges) { |
629 Live |= Liveness->getLiveIn(Succ); | 615 Live |= Liveness->getLiveIn(Succ); |
630 // Mark corresponding argument of phis in successor as live. | 616 // Mark corresponding argument of phis in successor as live. |
631 for (Inst &I : Succ->Phis) { | 617 for (Inst &I : Succ->Phis) { |
632 if (I.isDeleted()) | 618 if (I.isDeleted()) |
633 continue; | 619 continue; |
634 auto Phi = llvm::dyn_cast<InstPhi>(&I); | 620 auto Phi = llvm::dyn_cast<InstPhi>(&I); |
635 Phi->livenessPhiOperand(Live, this, Liveness); | 621 Phi->livenessPhiOperand(Live, this, Liveness); |
636 } | 622 } |
637 } | 623 } |
638 Liveness->getLiveOut(this) = Live; | 624 Liveness->getLiveOut(this) = Live; |
639 | 625 |
640 // Process regular instructions in reverse order. | 626 // Process regular instructions in reverse order. |
641 for (Inst &I : reverse_range(Insts)) { | 627 for (Inst &I : reverse_range(Insts)) { |
642 if (I.isDeleted()) | 628 if (I.isDeleted()) |
643 continue; | 629 continue; |
644 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd); | 630 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd); |
645 } | 631 } |
646 // Process phis in forward order so that we can override the | 632 // Process phis in forward order so that we can override the instruction |
647 // instruction number to be that of the earliest phi instruction in | 633 // number to be that of the earliest phi instruction in the block. |
648 // the block. | |
649 SizeT NumNonDeadPhis = 0; | 634 SizeT NumNonDeadPhis = 0; |
650 InstNumberT FirstPhiNumber = Inst::NumberSentinel; | 635 InstNumberT FirstPhiNumber = Inst::NumberSentinel; |
651 for (Inst &I : Phis) { | 636 for (Inst &I : Phis) { |
652 if (I.isDeleted()) | 637 if (I.isDeleted()) |
653 continue; | 638 continue; |
654 if (FirstPhiNumber == Inst::NumberSentinel) | 639 if (FirstPhiNumber == Inst::NumberSentinel) |
655 FirstPhiNumber = I.getNumber(); | 640 FirstPhiNumber = I.getNumber(); |
656 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd)) | 641 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd)) |
657 ++NumNonDeadPhis; | 642 ++NumNonDeadPhis; |
658 } | 643 } |
659 | 644 |
660 // When using the sparse representation, after traversing the | 645 // When using the sparse representation, after traversing the instructions in |
661 // instructions in the block, the Live bitvector should only contain | 646 // the block, the Live bitvector should only contain set bits for global |
662 // set bits for global variables upon block entry. We validate this | 647 // variables upon block entry. We validate this by shrinking the Live vector |
663 // by shrinking the Live vector and then testing it against the | 648 // and then testing it against the pre-shrunk version. (The shrinking is |
664 // pre-shrunk version. (The shrinking is required, but the | 649 // required, but the validation is not.) |
665 // validation is not.) | |
666 LivenessBV LiveOrig = Live; | 650 LivenessBV LiveOrig = Live; |
667 Live.resize(Liveness->getNumGlobalVars()); | 651 Live.resize(Liveness->getNumGlobalVars()); |
668 if (Live != LiveOrig) { | 652 if (Live != LiveOrig) { |
669 if (BuildDefs::dump()) { | 653 if (BuildDefs::dump()) { |
670 // This is a fatal liveness consistency error. Print some | 654 // This is a fatal liveness consistency error. Print some diagnostics and |
671 // diagnostics and abort. | 655 // abort. |
672 Ostream &Str = Func->getContext()->getStrDump(); | 656 Ostream &Str = Func->getContext()->getStrDump(); |
673 Func->resetCurrentNode(); | 657 Func->resetCurrentNode(); |
674 Str << "LiveOrig-Live ="; | 658 Str << "LiveOrig-Live ="; |
675 for (SizeT i = Live.size(); i < LiveOrig.size(); ++i) { | 659 for (SizeT i = Live.size(); i < LiveOrig.size(); ++i) { |
676 if (LiveOrig.test(i)) { | 660 if (LiveOrig.test(i)) { |
677 Str << " "; | 661 Str << " "; |
678 Liveness->getVariable(i, this)->dump(Func); | 662 Liveness->getVariable(i, this)->dump(Func); |
679 } | 663 } |
680 } | 664 } |
681 Str << "\n"; | 665 Str << "\n"; |
682 } | 666 } |
683 llvm::report_fatal_error("Fatal inconsistency in liveness analysis"); | 667 llvm::report_fatal_error("Fatal inconsistency in liveness analysis"); |
684 } | 668 } |
685 | 669 |
686 bool Changed = false; | 670 bool Changed = false; |
687 LivenessBV &LiveIn = Liveness->getLiveIn(this); | 671 LivenessBV &LiveIn = Liveness->getLiveIn(this); |
688 // Add in current LiveIn | 672 // Add in current LiveIn |
689 Live |= LiveIn; | 673 Live |= LiveIn; |
690 // Check result, set LiveIn=Live | 674 // Check result, set LiveIn=Live |
691 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this); | 675 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this); |
692 bool LiveInChanged = (Live != LiveIn); | 676 bool LiveInChanged = (Live != LiveIn); |
693 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged); | 677 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged); |
694 if (LiveInChanged) | 678 if (LiveInChanged) |
695 LiveIn = Live; | 679 LiveIn = Live; |
696 PrevNumNonDeadPhis = NumNonDeadPhis; | 680 PrevNumNonDeadPhis = NumNonDeadPhis; |
697 return Changed; | 681 return Changed; |
698 } | 682 } |
699 | 683 |
700 // Once basic liveness is complete, compute actual live ranges. It is | 684 // Once basic liveness is complete, compute actual live ranges. It is assumed |
701 // assumed that within a single basic block, a live range begins at | 685 // that within a single basic block, a live range begins at most once and ends |
702 // most once and ends at most once. This is certainly true for pure | 686 // at most once. This is certainly true for pure SSA form. It is also true once |
703 // SSA form. It is also true once phis are lowered, since each | 687 // phis are lowered, since each assignment to the phi-based temporary is in a |
704 // assignment to the phi-based temporary is in a different basic | 688 // different basic block, and there is a single read that ends the live in the |
705 // block, and there is a single read that ends the live in the basic | 689 // basic block that contained the actual phi instruction. |
706 // block that contained the actual phi instruction. | |
707 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum, | 690 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum, |
708 InstNumberT LastInstNum) { | 691 InstNumberT LastInstNum) { |
709 TimerMarker T1(TimerStack::TT_liveRange, Func); | 692 TimerMarker T1(TimerStack::TT_liveRange, Func); |
710 | 693 |
711 SizeT NumVars = Liveness->getNumVarsInNode(this); | 694 SizeT NumVars = Liveness->getNumVarsInNode(this); |
712 LivenessBV &LiveIn = Liveness->getLiveIn(this); | 695 LivenessBV &LiveIn = Liveness->getLiveIn(this); |
713 LivenessBV &LiveOut = Liveness->getLiveOut(this); | 696 LivenessBV &LiveOut = Liveness->getLiveOut(this); |
714 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this); | 697 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this); |
715 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this); | 698 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this); |
716 std::sort(MapBegin.begin(), MapBegin.end()); | 699 std::sort(MapBegin.begin(), MapBegin.end()); |
(...skipping 12 matching lines...) Expand all Loading... |
729 LivenessBV LiveInAndOut = LiveIn; | 712 LivenessBV LiveInAndOut = LiveIn; |
730 LiveInAndOut &= LiveOut; | 713 LiveInAndOut &= LiveOut; |
731 | 714 |
732 // Iterate in parallel across the sorted MapBegin[] and MapEnd[]. | 715 // Iterate in parallel across the sorted MapBegin[] and MapEnd[]. |
733 auto IBB = MapBegin.begin(), IEB = MapEnd.begin(); | 716 auto IBB = MapBegin.begin(), IEB = MapEnd.begin(); |
734 auto IBE = MapBegin.end(), IEE = MapEnd.end(); | 717 auto IBE = MapBegin.end(), IEE = MapEnd.end(); |
735 while (IBB != IBE || IEB != IEE) { | 718 while (IBB != IBE || IEB != IEE) { |
736 SizeT i1 = IBB == IBE ? NumVars : IBB->first; | 719 SizeT i1 = IBB == IBE ? NumVars : IBB->first; |
737 SizeT i2 = IEB == IEE ? NumVars : IEB->first; | 720 SizeT i2 = IEB == IEE ? NumVars : IEB->first; |
738 SizeT i = std::min(i1, i2); | 721 SizeT i = std::min(i1, i2); |
739 // i1 is the Variable number of the next MapBegin entry, and i2 is | 722 // i1 is the Variable number of the next MapBegin entry, and i2 is the |
740 // the Variable number of the next MapEnd entry. If i1==i2, then | 723 // Variable number of the next MapEnd entry. If i1==i2, then the Variable's |
741 // the Variable's live range begins and ends in this block. If | 724 // live range begins and ends in this block. If i1<i2, then i1's live range |
742 // i1<i2, then i1's live range begins at instruction IBB->second | 725 // begins at instruction IBB->second and extends through the end of the |
743 // and extends through the end of the block. If i1>i2, then i2's | 726 // block. If i1>i2, then i2's live range begins at the first instruction of |
744 // live range begins at the first instruction of the block and | 727 // the block and ends at IEB->second. In any case, we choose the lesser of |
745 // ends at IEB->second. In any case, we choose the lesser of i1 | 728 // i1 and i2 and proceed accordingly. |
746 // and i2 and proceed accordingly. | |
747 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum; | 729 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum; |
748 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1; | 730 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1; |
749 | 731 |
750 Variable *Var = Liveness->getVariable(i, this); | 732 Variable *Var = Liveness->getVariable(i, this); |
751 if (LB > LE) { | 733 if (LB > LE) { |
752 Var->addLiveRange(FirstInstNum, LE); | 734 Var->addLiveRange(FirstInstNum, LE); |
753 Var->addLiveRange(LB, LastInstNum + 1); | 735 Var->addLiveRange(LB, LastInstNum + 1); |
754 // Assert that Var is a global variable by checking that its | 736 // Assert that Var is a global variable by checking that its liveness |
755 // liveness index is less than the number of globals. This | 737 // index is less than the number of globals. This ensures that the |
756 // ensures that the LiveInAndOut[] access is valid. | 738 // LiveInAndOut[] access is valid. |
757 assert(i < Liveness->getNumGlobalVars()); | 739 assert(i < Liveness->getNumGlobalVars()); |
758 LiveInAndOut[i] = false; | 740 LiveInAndOut[i] = false; |
759 } else { | 741 } else { |
760 Var->addLiveRange(LB, LE); | 742 Var->addLiveRange(LB, LE); |
761 } | 743 } |
762 if (i == i1) | 744 if (i == i1) |
763 ++IBB; | 745 ++IBB; |
764 if (i == i2) | 746 if (i == i2) |
765 ++IEB; | 747 ++IEB; |
766 } | 748 } |
767 // Process the variables that are live across the entire block. | 749 // Process the variables that are live across the entire block. |
768 for (int i = LiveInAndOut.find_first(); i != -1; | 750 for (int i = LiveInAndOut.find_first(); i != -1; |
769 i = LiveInAndOut.find_next(i)) { | 751 i = LiveInAndOut.find_next(i)) { |
770 Variable *Var = Liveness->getVariable(i, this); | 752 Variable *Var = Liveness->getVariable(i, this); |
771 if (Liveness->getRangeMask(Var->getIndex())) | 753 if (Liveness->getRangeMask(Var->getIndex())) |
772 Var->addLiveRange(FirstInstNum, LastInstNum + 1); | 754 Var->addLiveRange(FirstInstNum, LastInstNum + 1); |
773 } | 755 } |
774 } | 756 } |
775 | 757 |
776 // If this node contains only deleted instructions, and ends in an | 758 // If this node contains only deleted instructions, and ends in an |
777 // unconditional branch, contract the node by repointing all its | 759 // unconditional branch, contract the node by repointing all its in-edges to |
778 // in-edges to its successor. | 760 // its successor. |
779 void CfgNode::contractIfEmpty() { | 761 void CfgNode::contractIfEmpty() { |
780 if (InEdges.empty()) | 762 if (InEdges.empty()) |
781 return; | 763 return; |
782 Inst *Branch = nullptr; | 764 Inst *Branch = nullptr; |
783 for (Inst &I : Insts) { | 765 for (Inst &I : Insts) { |
784 if (I.isDeleted()) | 766 if (I.isDeleted()) |
785 continue; | 767 continue; |
786 if (I.isUnconditionalBranch()) | 768 if (I.isUnconditionalBranch()) |
787 Branch = &I; | 769 Branch = &I; |
788 else if (!I.isRedundantAssign()) | 770 else if (!I.isRedundantAssign()) |
789 return; | 771 return; |
790 } | 772 } |
791 assert(OutEdges.size() == 1); | 773 assert(OutEdges.size() == 1); |
792 // Don't try to delete a self-loop. | 774 // Don't try to delete a self-loop. |
793 if (OutEdges[0] == this) | 775 if (OutEdges[0] == this) |
794 return; | 776 return; |
795 | 777 |
796 Branch->setDeleted(); | 778 Branch->setDeleted(); |
797 CfgNode *Successor = OutEdges.front(); | 779 CfgNode *Successor = OutEdges.front(); |
798 // Repoint all this node's in-edges to this node's successor, unless | 780 // Repoint all this node's in-edges to this node's successor, unless this |
799 // this node's successor is actually itself (in which case the | 781 // node's successor is actually itself (in which case the statement |
800 // statement "OutEdges.front()->InEdges.push_back(Pred)" could | 782 // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator |
801 // invalidate the iterator over this->InEdges). | 783 // over this->InEdges). |
802 if (Successor != this) { | 784 if (Successor != this) { |
803 for (CfgNode *Pred : InEdges) { | 785 for (CfgNode *Pred : InEdges) { |
804 for (CfgNode *&I : Pred->OutEdges) { | 786 for (CfgNode *&I : Pred->OutEdges) { |
805 if (I == this) { | 787 if (I == this) { |
806 I = Successor; | 788 I = Successor; |
807 Successor->InEdges.push_back(Pred); | 789 Successor->InEdges.push_back(Pred); |
808 } | 790 } |
809 } | 791 } |
810 for (Inst &I : Pred->getInsts()) { | 792 for (Inst &I : Pred->getInsts()) { |
811 if (!I.isDeleted()) | 793 if (!I.isDeleted()) |
812 I.repointEdges(this, Successor); | 794 I.repointEdges(this, Successor); |
813 } | 795 } |
814 } | 796 } |
815 | 797 |
816 // Remove the in-edge to the successor to allow node reordering to make | 798 // Remove the in-edge to the successor to allow node reordering to make |
817 // better decisions. For example it's more helpful to place a node after | 799 // better decisions. For example it's more helpful to place a node after a |
818 // a reachable predecessor than an unreachable one (like the one we just | 800 // reachable predecessor than an unreachable one (like the one we just |
819 // contracted). | 801 // contracted). |
820 Successor->InEdges.erase( | 802 Successor->InEdges.erase( |
821 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this)); | 803 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this)); |
822 } | 804 } |
823 InEdges.clear(); | 805 InEdges.clear(); |
824 } | 806 } |
825 | 807 |
826 void CfgNode::doBranchOpt(const CfgNode *NextNode) { | 808 void CfgNode::doBranchOpt(const CfgNode *NextNode) { |
827 TargetLowering *Target = Func->getTarget(); | 809 TargetLowering *Target = Func->getTarget(); |
828 // Find the first opportunity for branch optimization (which will be the last | 810 // Find the first opportunity for branch optimization (which will be the last |
829 // instruction in the block) and stop. This is sufficient unless there is some | 811 // instruction in the block) and stop. This is sufficient unless there is |
830 // target lowering where we have the possibility of multiple optimizations per | 812 // some target lowering where we have the possibility of multiple |
831 // block. Take care with switch lowering as there are multiple unconditional | 813 // optimizations per block. Take care with switch lowering as there are |
832 // branches and only the last can be deleted. | 814 // multiple unconditional branches and only the last can be deleted. |
833 for (Inst &I : reverse_range(Insts)) { | 815 for (Inst &I : reverse_range(Insts)) { |
834 if (!I.isDeleted()) { | 816 if (!I.isDeleted()) { |
835 Target->doBranchOpt(&I, NextNode); | 817 Target->doBranchOpt(&I, NextNode); |
836 return; | 818 return; |
837 } | 819 } |
838 } | 820 } |
839 } | 821 } |
840 | 822 |
841 // ======================== Dump routines ======================== // | 823 // ======================== Dump routines ======================== // |
842 | 824 |
(...skipping 19 matching lines...) Expand all Loading... |
862 for (SizeT i = 0; i < Live->size(); ++i) { | 844 for (SizeT i = 0; i < Live->size(); ++i) { |
863 if ((*Live)[i]) { | 845 if ((*Live)[i]) { |
864 Variable *Var = Liveness->getVariable(i, Node); | 846 Variable *Var = Liveness->getVariable(i, Node); |
865 if (Var->hasReg()) { | 847 if (Var->hasReg()) { |
866 if (IsLiveIn) | 848 if (IsLiveIn) |
867 ++LiveRegCount[Var->getRegNum()]; | 849 ++LiveRegCount[Var->getRegNum()]; |
868 LiveRegs.push_back(Var); | 850 LiveRegs.push_back(Var); |
869 } | 851 } |
870 } | 852 } |
871 } | 853 } |
872 // Sort the variables by regnum so they are always printed in a | 854 // Sort the variables by regnum so they are always printed in a familiar |
873 // familiar order. | 855 // order. |
874 std::sort(LiveRegs.begin(), LiveRegs.end(), | 856 std::sort(LiveRegs.begin(), LiveRegs.end(), |
875 [](const Variable *V1, const Variable *V2) { | 857 [](const Variable *V1, const Variable *V2) { |
876 return V1->getRegNum() < V2->getRegNum(); | 858 return V1->getRegNum() < V2->getRegNum(); |
877 }); | 859 }); |
878 bool First = true; | 860 bool First = true; |
879 for (Variable *Var : LiveRegs) { | 861 for (Variable *Var : LiveRegs) { |
880 if (!First) | 862 if (!First) |
881 Str << ","; | 863 Str << ","; |
882 First = false; | 864 First = false; |
883 Var->emit(Func); | 865 Var->emit(Func); |
884 } | 866 } |
885 } | 867 } |
886 Str << "\n"; | 868 Str << "\n"; |
887 } | 869 } |
888 | 870 |
889 void emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr, | 871 void emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr, |
890 std::vector<SizeT> &LiveRegCount) { | 872 std::vector<SizeT> &LiveRegCount) { |
891 if (!BuildDefs::dump()) | 873 if (!BuildDefs::dump()) |
892 return; | 874 return; |
893 bool First = true; | 875 bool First = true; |
894 Variable *Dest = Instr->getDest(); | 876 Variable *Dest = Instr->getDest(); |
895 // Normally we increment the live count for the dest register. But | 877 // Normally we increment the live count for the dest register. But we |
896 // we shouldn't if the instruction's IsDestNonKillable flag is set, | 878 // shouldn't if the instruction's IsDestNonKillable flag is set, because this |
897 // because this means that the target lowering created this | 879 // means that the target lowering created this instruction as a non-SSA |
898 // instruction as a non-SSA assignment; i.e., a different, previous | 880 // assignment; i.e., a different, previous instruction started the dest |
899 // instruction started the dest variable's live range. | 881 // variable's live range. |
900 if (!Instr->isDestNonKillable() && Dest && Dest->hasReg()) | 882 if (!Instr->isDestNonKillable() && Dest && Dest->hasReg()) |
901 ++LiveRegCount[Dest->getRegNum()]; | 883 ++LiveRegCount[Dest->getRegNum()]; |
902 FOREACH_VAR_IN_INST(Var, *Instr) { | 884 FOREACH_VAR_IN_INST(Var, *Instr) { |
903 bool ShouldReport = Instr->isLastUse(Var); | 885 bool ShouldReport = Instr->isLastUse(Var); |
904 if (ShouldReport && Var->hasReg()) { | 886 if (ShouldReport && Var->hasReg()) { |
905 // Don't report end of live range until the live count reaches 0. | 887 // Don't report end of live range until the live count reaches 0. |
906 SizeT NewCount = --LiveRegCount[Var->getRegNum()]; | 888 SizeT NewCount = --LiveRegCount[Var->getRegNum()]; |
907 if (NewCount) | 889 if (NewCount) |
908 ShouldReport = false; | 890 ShouldReport = false; |
909 } | 891 } |
910 if (ShouldReport) { | 892 if (ShouldReport) { |
911 if (First) | 893 if (First) |
912 Str << " \t# END="; | 894 Str << " \t# END="; |
913 else | 895 else |
914 Str << ","; | 896 Str << ","; |
915 Var->emit(Func); | 897 Var->emit(Func); |
916 First = false; | 898 First = false; |
917 } | 899 } |
918 } | 900 } |
919 } | 901 } |
920 | 902 |
921 void updateStats(Cfg *Func, const Inst *I) { | 903 void updateStats(Cfg *Func, const Inst *I) { |
922 if (!BuildDefs::dump()) | 904 if (!BuildDefs::dump()) |
923 return; | 905 return; |
924 // Update emitted instruction count, plus fill/spill count for | 906 // Update emitted instruction count, plus fill/spill count for Variable |
925 // Variable operands without a physical register. | 907 // operands without a physical register. |
926 if (uint32_t Count = I->getEmitInstCount()) { | 908 if (uint32_t Count = I->getEmitInstCount()) { |
927 Func->getContext()->statsUpdateEmitted(Count); | 909 Func->getContext()->statsUpdateEmitted(Count); |
928 if (Variable *Dest = I->getDest()) { | 910 if (Variable *Dest = I->getDest()) { |
929 if (!Dest->hasReg()) | 911 if (!Dest->hasReg()) |
930 Func->getContext()->statsUpdateFills(); | 912 Func->getContext()->statsUpdateFills(); |
931 } | 913 } |
932 for (SizeT S = 0; S < I->getSrcSize(); ++S) { | 914 for (SizeT S = 0; S < I->getSrcSize(); ++S) { |
933 if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) { | 915 if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) { |
934 if (!Src->hasReg()) | 916 if (!Src->hasReg()) |
935 Func->getContext()->statsUpdateSpills(); | 917 Func->getContext()->statsUpdateSpills(); |
936 } | 918 } |
937 } | 919 } |
938 } | 920 } |
939 } | 921 } |
940 | 922 |
941 } // end of anonymous namespace | 923 } // end of anonymous namespace |
942 | 924 |
943 void CfgNode::emit(Cfg *Func) const { | 925 void CfgNode::emit(Cfg *Func) const { |
944 if (!BuildDefs::dump()) | 926 if (!BuildDefs::dump()) |
945 return; | 927 return; |
946 Func->setCurrentNode(this); | 928 Func->setCurrentNode(this); |
947 Ostream &Str = Func->getContext()->getStrEmit(); | 929 Ostream &Str = Func->getContext()->getStrEmit(); |
948 Liveness *Liveness = Func->getLiveness(); | 930 Liveness *Liveness = Func->getLiveness(); |
949 bool DecorateAsm = | 931 bool DecorateAsm = |
950 Liveness && Func->getContext()->getFlags().getDecorateAsm(); | 932 Liveness && Func->getContext()->getFlags().getDecorateAsm(); |
951 Str << getAsmName() << ":\n"; | 933 Str << getAsmName() << ":\n"; |
952 // LiveRegCount keeps track of the number of currently live | 934 // LiveRegCount keeps track of the number of currently live variables that |
953 // variables that each register is assigned to. Normally that would | 935 // each register is assigned to. Normally that would be only 0 or 1, but the |
954 // be only 0 or 1, but the register allocator's AllowOverlap | 936 // register allocator's AllowOverlap inference allows it to be greater than 1 |
955 // inference allows it to be greater than 1 for short periods. | 937 // for short periods. |
956 std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters()); | 938 std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters()); |
957 if (DecorateAsm) { | 939 if (DecorateAsm) { |
958 constexpr bool IsLiveIn = true; | 940 constexpr bool IsLiveIn = true; |
959 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); | 941 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); |
960 } | 942 } |
961 | 943 |
962 for (const Inst &I : Phis) { | 944 for (const Inst &I : Phis) { |
963 if (I.isDeleted()) | 945 if (I.isDeleted()) |
964 continue; | 946 continue; |
965 // Emitting a Phi instruction should cause an error. | 947 // Emitting a Phi instruction should cause an error. |
966 I.emit(Func); | 948 I.emit(Func); |
967 } | 949 } |
968 for (const Inst &I : Insts) { | 950 for (const Inst &I : Insts) { |
969 if (I.isDeleted()) | 951 if (I.isDeleted()) |
970 continue; | 952 continue; |
971 if (I.isRedundantAssign()) { | 953 if (I.isRedundantAssign()) { |
972 // Usually, redundant assignments end the live range of the src | 954 // Usually, redundant assignments end the live range of the src variable |
973 // variable and begin the live range of the dest variable, with | 955 // and begin the live range of the dest variable, with no net effect on |
974 // no net effect on the liveness of their register. However, if | 956 // the liveness of their register. However, if the register allocator |
975 // the register allocator infers the AllowOverlap condition, | 957 // infers the AllowOverlap condition, then this may be a redundant |
976 // then this may be a redundant assignment that does not end the | 958 // assignment that does not end the src variable's live range, in which |
977 // src variable's live range, in which case the active variable | 959 // case the active variable count for that register needs to be bumped. |
978 // count for that register needs to be bumped. That normally | 960 // That normally would have happened as part of emitLiveRangesEnded(), |
979 // would have happened as part of emitLiveRangesEnded(), but | 961 // but that isn't called for redundant assignments. |
980 // that isn't called for redundant assignments. | |
981 Variable *Dest = I.getDest(); | 962 Variable *Dest = I.getDest(); |
982 if (DecorateAsm && Dest->hasReg() && !I.isLastUse(I.getSrc(0))) | 963 if (DecorateAsm && Dest->hasReg() && !I.isLastUse(I.getSrc(0))) |
983 ++LiveRegCount[Dest->getRegNum()]; | 964 ++LiveRegCount[Dest->getRegNum()]; |
984 continue; | 965 continue; |
985 } | 966 } |
986 I.emit(Func); | 967 I.emit(Func); |
987 if (DecorateAsm) | 968 if (DecorateAsm) |
988 emitLiveRangesEnded(Str, Func, &I, LiveRegCount); | 969 emitLiveRangesEnded(Str, Func, &I, LiveRegCount); |
989 Str << "\n"; | 970 Str << "\n"; |
990 updateStats(Func, &I); | 971 updateStats(Func, &I); |
(...skipping 12 matching lines...) Expand all Loading... |
1003 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete; | 984 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete; |
1004 | 985 |
1005 public: | 986 public: |
1006 BundleEmitHelper(Assembler *Asm, TargetLowering *Target, | 987 BundleEmitHelper(Assembler *Asm, TargetLowering *Target, |
1007 const InstList &Insts) | 988 const InstList &Insts) |
1008 : Asm(Asm), Target(Target), End(Insts.end()), BundleLockStart(End), | 989 : Asm(Asm), Target(Target), End(Insts.end()), BundleLockStart(End), |
1009 BundleSize(1 << Asm->getBundleAlignLog2Bytes()), | 990 BundleSize(1 << Asm->getBundleAlignLog2Bytes()), |
1010 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {} | 991 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {} |
1011 // Check whether we're currently within a bundle_lock region. | 992 // Check whether we're currently within a bundle_lock region. |
1012 bool isInBundleLockRegion() const { return BundleLockStart != End; } | 993 bool isInBundleLockRegion() const { return BundleLockStart != End; } |
1013 // Check whether the current bundle_lock region has the align_to_end | 994 // Check whether the current bundle_lock region has the align_to_end option. |
1014 // option. | |
1015 bool isAlignToEnd() const { | 995 bool isAlignToEnd() const { |
1016 assert(isInBundleLockRegion()); | 996 assert(isInBundleLockRegion()); |
1017 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == | 997 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == |
1018 InstBundleLock::Opt_AlignToEnd; | 998 InstBundleLock::Opt_AlignToEnd; |
1019 } | 999 } |
1020 // Check whether the entire bundle_lock region falls within the same | 1000 // Check whether the entire bundle_lock region falls within the same bundle. |
1021 // bundle. | |
1022 bool isSameBundle() const { | 1001 bool isSameBundle() const { |
1023 assert(isInBundleLockRegion()); | 1002 assert(isInBundleLockRegion()); |
1024 return SizeSnapshotPre == SizeSnapshotPost || | 1003 return SizeSnapshotPre == SizeSnapshotPost || |
1025 (SizeSnapshotPre & BundleMaskHi) == | 1004 (SizeSnapshotPre & BundleMaskHi) == |
1026 ((SizeSnapshotPost - 1) & BundleMaskHi); | 1005 ((SizeSnapshotPost - 1) & BundleMaskHi); |
1027 } | 1006 } |
1028 // Get the bundle alignment of the first instruction of the | 1007 // Get the bundle alignment of the first instruction of the bundle_lock |
1029 // bundle_lock region. | 1008 // region. |
1030 intptr_t getPreAlignment() const { | 1009 intptr_t getPreAlignment() const { |
1031 assert(isInBundleLockRegion()); | 1010 assert(isInBundleLockRegion()); |
1032 return SizeSnapshotPre & BundleMaskLo; | 1011 return SizeSnapshotPre & BundleMaskLo; |
1033 } | 1012 } |
1034 // Get the bundle alignment of the first instruction past the | 1013 // Get the bundle alignment of the first instruction past the bundle_lock |
1035 // bundle_lock region. | 1014 // region. |
1036 intptr_t getPostAlignment() const { | 1015 intptr_t getPostAlignment() const { |
1037 assert(isInBundleLockRegion()); | 1016 assert(isInBundleLockRegion()); |
1038 return SizeSnapshotPost & BundleMaskLo; | 1017 return SizeSnapshotPost & BundleMaskLo; |
1039 } | 1018 } |
1040 // Get the iterator pointing to the bundle_lock instruction, e.g. to | 1019 // Get the iterator pointing to the bundle_lock instruction, e.g. to roll |
1041 // roll back the instruction iteration to that point. | 1020 // back the instruction iteration to that point. |
1042 InstList::const_iterator getBundleLockStart() const { | 1021 InstList::const_iterator getBundleLockStart() const { |
1043 assert(isInBundleLockRegion()); | 1022 assert(isInBundleLockRegion()); |
1044 return BundleLockStart; | 1023 return BundleLockStart; |
1045 } | 1024 } |
1046 // Set up bookkeeping when the bundle_lock instruction is first | 1025 // Set up bookkeeping when the bundle_lock instruction is first processed. |
1047 // processed. | |
1048 void enterBundleLock(InstList::const_iterator I) { | 1026 void enterBundleLock(InstList::const_iterator I) { |
1049 assert(!isInBundleLockRegion()); | 1027 assert(!isInBundleLockRegion()); |
1050 BundleLockStart = I; | 1028 BundleLockStart = I; |
1051 SizeSnapshotPre = Asm->getBufferSize(); | 1029 SizeSnapshotPre = Asm->getBufferSize(); |
1052 Asm->setPreliminary(true); | 1030 Asm->setPreliminary(true); |
1053 Target->snapshotEmitState(); | 1031 Target->snapshotEmitState(); |
1054 assert(isInBundleLockRegion()); | 1032 assert(isInBundleLockRegion()); |
1055 } | 1033 } |
1056 // Update bookkeeping when the bundle_unlock instruction is | 1034 // Update bookkeeping when the bundle_unlock instruction is processed. |
1057 // processed. | |
1058 void enterBundleUnlock() { | 1035 void enterBundleUnlock() { |
1059 assert(isInBundleLockRegion()); | 1036 assert(isInBundleLockRegion()); |
1060 SizeSnapshotPost = Asm->getBufferSize(); | 1037 SizeSnapshotPost = Asm->getBufferSize(); |
1061 } | 1038 } |
1062 // Update bookkeeping when we are completely finished with the | 1039 // Update bookkeeping when we are completely finished with the bundle_lock |
1063 // bundle_lock region. | 1040 // region. |
1064 void leaveBundleLockRegion() { BundleLockStart = End; } | 1041 void leaveBundleLockRegion() { BundleLockStart = End; } |
1065 // Check whether the instruction sequence fits within the current | 1042 // Check whether the instruction sequence fits within the current bundle, and |
1066 // bundle, and if not, add nop padding to the end of the current | 1043 // if not, add nop padding to the end of the current bundle. |
1067 // bundle. | |
1068 void padToNextBundle() { | 1044 void padToNextBundle() { |
1069 assert(isInBundleLockRegion()); | 1045 assert(isInBundleLockRegion()); |
1070 if (!isSameBundle()) { | 1046 if (!isSameBundle()) { |
1071 intptr_t PadToNextBundle = BundleSize - getPreAlignment(); | 1047 intptr_t PadToNextBundle = BundleSize - getPreAlignment(); |
1072 Asm->padWithNop(PadToNextBundle); | 1048 Asm->padWithNop(PadToNextBundle); |
1073 SizeSnapshotPre += PadToNextBundle; | 1049 SizeSnapshotPre += PadToNextBundle; |
1074 SizeSnapshotPost += PadToNextBundle; | 1050 SizeSnapshotPost += PadToNextBundle; |
1075 assert((Asm->getBufferSize() & BundleMaskLo) == 0); | 1051 assert((Asm->getBufferSize() & BundleMaskLo) == 0); |
1076 assert(Asm->getBufferSize() == SizeSnapshotPre); | 1052 assert(Asm->getBufferSize() == SizeSnapshotPre); |
1077 } | 1053 } |
1078 } | 1054 } |
1079 // If align_to_end is specified, add padding such that the | 1055 // If align_to_end is specified, add padding such that the instruction |
1080 // instruction sequences ends precisely at a bundle boundary. | 1056 // sequences ends precisely at a bundle boundary. |
1081 void padForAlignToEnd() { | 1057 void padForAlignToEnd() { |
1082 assert(isInBundleLockRegion()); | 1058 assert(isInBundleLockRegion()); |
1083 if (isAlignToEnd()) { | 1059 if (isAlignToEnd()) { |
1084 if (intptr_t Offset = getPostAlignment()) { | 1060 if (intptr_t Offset = getPostAlignment()) { |
1085 Asm->padWithNop(BundleSize - Offset); | 1061 Asm->padWithNop(BundleSize - Offset); |
1086 SizeSnapshotPre = Asm->getBufferSize(); | 1062 SizeSnapshotPre = Asm->getBufferSize(); |
1087 } | 1063 } |
1088 } | 1064 } |
1089 } | 1065 } |
1090 // Update bookkeeping when rolling back for the second pass. | 1066 // Update bookkeeping when rolling back for the second pass. |
1091 void rollback() { | 1067 void rollback() { |
1092 assert(isInBundleLockRegion()); | 1068 assert(isInBundleLockRegion()); |
1093 Asm->setBufferSize(SizeSnapshotPre); | 1069 Asm->setBufferSize(SizeSnapshotPre); |
1094 Asm->setPreliminary(false); | 1070 Asm->setPreliminary(false); |
1095 Target->rollbackEmitState(); | 1071 Target->rollbackEmitState(); |
1096 } | 1072 } |
1097 | 1073 |
1098 private: | 1074 private: |
1099 Assembler *const Asm; | 1075 Assembler *const Asm; |
1100 TargetLowering *const Target; | 1076 TargetLowering *const Target; |
1101 // End is a sentinel value such that BundleLockStart==End implies | 1077 // End is a sentinel value such that BundleLockStart==End implies that we are |
1102 // that we are not in a bundle_lock region. | 1078 // not in a bundle_lock region. |
1103 const InstList::const_iterator End; | 1079 const InstList::const_iterator End; |
1104 InstList::const_iterator BundleLockStart; | 1080 InstList::const_iterator BundleLockStart; |
1105 const intptr_t BundleSize; | 1081 const intptr_t BundleSize; |
1106 // Masking with BundleMaskLo identifies an address's bundle offset. | 1082 // Masking with BundleMaskLo identifies an address's bundle offset. |
1107 const intptr_t BundleMaskLo; | 1083 const intptr_t BundleMaskLo; |
1108 // Masking with BundleMaskHi identifies an address's bundle. | 1084 // Masking with BundleMaskHi identifies an address's bundle. |
1109 const intptr_t BundleMaskHi; | 1085 const intptr_t BundleMaskHi; |
1110 intptr_t SizeSnapshotPre = 0; | 1086 intptr_t SizeSnapshotPre = 0; |
1111 intptr_t SizeSnapshotPost = 0; | 1087 intptr_t SizeSnapshotPost = 0; |
1112 }; | 1088 }; |
1113 | 1089 |
1114 } // end of anonymous namespace | 1090 } // end of anonymous namespace |
1115 | 1091 |
1116 void CfgNode::emitIAS(Cfg *Func) const { | 1092 void CfgNode::emitIAS(Cfg *Func) const { |
1117 Func->setCurrentNode(this); | 1093 Func->setCurrentNode(this); |
1118 Assembler *Asm = Func->getAssembler<>(); | 1094 Assembler *Asm = Func->getAssembler<>(); |
1119 // TODO(stichnot): When sandboxing, defer binding the node label | 1095 // TODO(stichnot): When sandboxing, defer binding the node label until just |
1120 // until just before the first instruction is emitted, to reduce the | 1096 // before the first instruction is emitted, to reduce the chance that a |
1121 // chance that a padding nop is a branch target. | 1097 // padding nop is a branch target. |
1122 Asm->bindCfgNodeLabel(getIndex()); | 1098 Asm->bindCfgNodeLabel(getIndex()); |
1123 for (const Inst &I : Phis) { | 1099 for (const Inst &I : Phis) { |
1124 if (I.isDeleted()) | 1100 if (I.isDeleted()) |
1125 continue; | 1101 continue; |
1126 // Emitting a Phi instruction should cause an error. | 1102 // Emitting a Phi instruction should cause an error. |
1127 I.emitIAS(Func); | 1103 I.emitIAS(Func); |
1128 } | 1104 } |
1129 | 1105 |
1130 // Do the simple emission if not sandboxed. | 1106 // Do the simple emission if not sandboxed. |
1131 if (!Func->getContext()->getFlags().getUseSandboxing()) { | 1107 if (!Func->getContext()->getFlags().getUseSandboxing()) { |
1132 for (const Inst &I : Insts) { | 1108 for (const Inst &I : Insts) { |
1133 if (!I.isDeleted() && !I.isRedundantAssign()) { | 1109 if (!I.isDeleted() && !I.isRedundantAssign()) { |
1134 I.emitIAS(Func); | 1110 I.emitIAS(Func); |
1135 updateStats(Func, &I); | 1111 updateStats(Func, &I); |
1136 } | 1112 } |
1137 } | 1113 } |
1138 return; | 1114 return; |
1139 } | 1115 } |
1140 | 1116 |
1141 // The remainder of the function handles emission with sandboxing. | 1117 // The remainder of the function handles emission with sandboxing. There are |
1142 // There are explicit bundle_lock regions delimited by bundle_lock | 1118 // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock |
1143 // and bundle_unlock instructions. All other instructions are | 1119 // instructions. All other instructions are treated as an implicit |
1144 // treated as an implicit one-instruction bundle_lock region. | 1120 // one-instruction bundle_lock region. Emission is done twice for each |
1145 // Emission is done twice for each bundle_lock region. The first | 1121 // bundle_lock region. The first pass is a preliminary pass, after which we |
1146 // pass is a preliminary pass, after which we can figure out what | 1122 // can figure out what nop padding is needed, then roll back, and make the |
1147 // nop padding is needed, then roll back, and make the final pass. | 1123 // final pass. |
1148 // | 1124 // |
1149 // Ideally, the first pass would be speculative and the second pass | 1125 // Ideally, the first pass would be speculative and the second pass would |
1150 // would only be done if nop padding were needed, but the structure | 1126 // only be done if nop padding were needed, but the structure of the |
1151 // of the integrated assembler makes it hard to roll back the state | 1127 // integrated assembler makes it hard to roll back the state of label |
1152 // of label bindings, label links, and relocation fixups. Instead, | 1128 // bindings, label links, and relocation fixups. Instead, the first pass just |
1153 // the first pass just disables all mutation of that state. | 1129 // disables all mutation of that state. |
1154 | 1130 |
1155 BundleEmitHelper Helper(Asm, Func->getTarget(), Insts); | 1131 BundleEmitHelper Helper(Asm, Func->getTarget(), Insts); |
1156 InstList::const_iterator End = Insts.end(); | 1132 InstList::const_iterator End = Insts.end(); |
1157 // Retrying indicates that we had to roll back to the bundle_lock | 1133 // Retrying indicates that we had to roll back to the bundle_lock instruction |
1158 // instruction to apply padding before the bundle_lock sequence. | 1134 // to apply padding before the bundle_lock sequence. |
1159 bool Retrying = false; | 1135 bool Retrying = false; |
1160 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) { | 1136 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) { |
1161 if (I->isDeleted() || I->isRedundantAssign()) | 1137 if (I->isDeleted() || I->isRedundantAssign()) |
1162 continue; | 1138 continue; |
1163 | 1139 |
1164 if (llvm::isa<InstBundleLock>(I)) { | 1140 if (llvm::isa<InstBundleLock>(I)) { |
1165 // Set up the initial bundle_lock state. This should not happen | 1141 // Set up the initial bundle_lock state. This should not happen while |
1166 // while retrying, because the retry rolls back to the | 1142 // retrying, because the retry rolls back to the instruction following |
1167 // instruction following the bundle_lock instruction. | 1143 // the bundle_lock instruction. |
1168 assert(!Retrying); | 1144 assert(!Retrying); |
1169 Helper.enterBundleLock(I); | 1145 Helper.enterBundleLock(I); |
1170 continue; | 1146 continue; |
1171 } | 1147 } |
1172 | 1148 |
1173 if (llvm::isa<InstBundleUnlock>(I)) { | 1149 if (llvm::isa<InstBundleUnlock>(I)) { |
1174 Helper.enterBundleUnlock(); | 1150 Helper.enterBundleUnlock(); |
1175 if (Retrying) { | 1151 if (Retrying) { |
1176 // Make sure all instructions are in the same bundle. | 1152 // Make sure all instructions are in the same bundle. |
1177 assert(Helper.isSameBundle()); | 1153 assert(Helper.isSameBundle()); |
1178 // If align_to_end is specified, make sure the next | 1154 // If align_to_end is specified, make sure the next instruction begins |
1179 // instruction begins the bundle. | 1155 // the bundle. |
1180 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0); | 1156 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0); |
1181 Helper.leaveBundleLockRegion(); | 1157 Helper.leaveBundleLockRegion(); |
1182 Retrying = false; | 1158 Retrying = false; |
1183 } else { | 1159 } else { |
1184 // This is the first pass, so roll back for the retry pass. | 1160 // This is the first pass, so roll back for the retry pass. |
1185 Helper.rollback(); | 1161 Helper.rollback(); |
1186 // Pad to the next bundle if the instruction sequence crossed | 1162 // Pad to the next bundle if the instruction sequence crossed a bundle |
1187 // a bundle boundary. | 1163 // boundary. |
1188 Helper.padToNextBundle(); | 1164 Helper.padToNextBundle(); |
1189 // Insert additional padding to make AlignToEnd work. | 1165 // Insert additional padding to make AlignToEnd work. |
1190 Helper.padForAlignToEnd(); | 1166 Helper.padForAlignToEnd(); |
1191 // Prepare for the retry pass after padding is done. | 1167 // Prepare for the retry pass after padding is done. |
1192 Retrying = true; | 1168 Retrying = true; |
1193 I = Helper.getBundleLockStart(); | 1169 I = Helper.getBundleLockStart(); |
1194 } | 1170 } |
1195 continue; | 1171 continue; |
1196 } | 1172 } |
1197 | 1173 |
(...skipping 10 matching lines...) Expand all Loading... |
1208 I->emitIAS(Func); | 1184 I->emitIAS(Func); |
1209 Helper.enterBundleUnlock(); | 1185 Helper.enterBundleUnlock(); |
1210 Helper.rollback(); | 1186 Helper.rollback(); |
1211 Helper.padToNextBundle(); | 1187 Helper.padToNextBundle(); |
1212 I->emitIAS(Func); | 1188 I->emitIAS(Func); |
1213 updateStats(Func, I); | 1189 updateStats(Func, I); |
1214 Helper.leaveBundleLockRegion(); | 1190 Helper.leaveBundleLockRegion(); |
1215 } | 1191 } |
1216 } | 1192 } |
1217 | 1193 |
1218 // Don't allow bundle locking across basic blocks, to keep the | 1194 // Don't allow bundle locking across basic blocks, to keep the backtracking |
1219 // backtracking mechanism simple. | 1195 // mechanism simple. |
1220 assert(!Helper.isInBundleLockRegion()); | 1196 assert(!Helper.isInBundleLockRegion()); |
1221 assert(!Retrying); | 1197 assert(!Retrying); |
1222 } | 1198 } |
1223 | 1199 |
1224 void CfgNode::dump(Cfg *Func) const { | 1200 void CfgNode::dump(Cfg *Func) const { |
1225 if (!BuildDefs::dump()) | 1201 if (!BuildDefs::dump()) |
1226 return; | 1202 return; |
1227 Func->setCurrentNode(this); | 1203 Func->setCurrentNode(this); |
1228 Ostream &Str = Func->getContext()->getStrDump(); | 1204 Ostream &Str = Func->getContext()->getStrDump(); |
1229 Liveness *Liveness = Func->getLiveness(); | 1205 Liveness *Liveness = Func->getLiveness(); |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1327 InstIntrinsicCall *Inst = InstIntrinsicCall::create( | 1303 InstIntrinsicCall *Inst = InstIntrinsicCall::create( |
1328 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); | 1304 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); |
1329 Inst->addArg(AtomicRMWOp); | 1305 Inst->addArg(AtomicRMWOp); |
1330 Inst->addArg(Counter); | 1306 Inst->addArg(Counter); |
1331 Inst->addArg(One); | 1307 Inst->addArg(One); |
1332 Inst->addArg(OrderAcquireRelease); | 1308 Inst->addArg(OrderAcquireRelease); |
1333 Insts.push_front(Inst); | 1309 Insts.push_front(Inst); |
1334 } | 1310 } |
1335 | 1311 |
1336 } // end of namespace Ice | 1312 } // end of namespace Ice |
OLD | NEW |