OLD | NEW |
1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// | 1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
11 /// This file implements the CfgNode class, including the complexities | 11 /// This file implements the CfgNode class, including the complexities of |
12 /// of instruction insertion and in-edge calculation. | 12 /// instruction insertion and in-edge calculation. |
13 /// | 13 /// |
14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
15 | 15 |
16 #include "IceCfgNode.h" | 16 #include "IceCfgNode.h" |
17 | 17 |
18 #include "IceAssembler.h" | 18 #include "IceAssembler.h" |
19 #include "IceCfg.h" | 19 #include "IceCfg.h" |
20 #include "IceGlobalInits.h" | 20 #include "IceGlobalInits.h" |
21 #include "IceInst.h" | 21 #include "IceInst.h" |
22 #include "IceInstVarIter.h" | 22 #include "IceInstVarIter.h" |
23 #include "IceLiveness.h" | 23 #include "IceLiveness.h" |
24 #include "IceOperand.h" | 24 #include "IceOperand.h" |
25 #include "IceTargetLowering.h" | 25 #include "IceTargetLowering.h" |
26 | 26 |
27 namespace Ice { | 27 namespace Ice { |
28 | 28 |
29 CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber) | 29 CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber) |
30 : Func(Func), Number(LabelNumber) {} | 30 : Func(Func), Number(LabelNumber) {} |
31 | 31 |
32 // Returns the name the node was created with. If no name was given, | 32 // Returns the name the node was created with. If no name was given, it |
33 // it synthesizes a (hopefully) unique name. | 33 // synthesizes a (hopefully) unique name. |
34 IceString CfgNode::getName() const { | 34 IceString CfgNode::getName() const { |
35 if (NameIndex >= 0) | 35 if (NameIndex >= 0) |
36 return Func->getIdentifierName(NameIndex); | 36 return Func->getIdentifierName(NameIndex); |
37 return "__" + std::to_string(getIndex()); | 37 return "__" + std::to_string(getIndex()); |
38 } | 38 } |
39 | 39 |
40 // Adds an instruction to either the Phi list or the regular | 40 // Adds an instruction to either the Phi list or the regular instruction list. |
41 // instruction list. Validates that all Phis are added before all | 41 // Validates that all Phis are added before all regular instructions. |
42 // regular instructions. | |
43 void CfgNode::appendInst(Inst *Inst) { | 42 void CfgNode::appendInst(Inst *Inst) { |
44 ++InstCountEstimate; | 43 ++InstCountEstimate; |
45 if (InstPhi *Phi = llvm::dyn_cast<InstPhi>(Inst)) { | 44 if (InstPhi *Phi = llvm::dyn_cast<InstPhi>(Inst)) { |
46 if (!Insts.empty()) { | 45 if (!Insts.empty()) { |
47 Func->setError("Phi instruction added to the middle of a block"); | 46 Func->setError("Phi instruction added to the middle of a block"); |
48 return; | 47 return; |
49 } | 48 } |
50 Phis.push_back(Phi); | 49 Phis.push_back(Phi); |
51 } else { | 50 } else { |
52 Insts.push_back(Inst); | 51 Insts.push_back(Inst); |
53 } | 52 } |
54 } | 53 } |
55 | 54 |
56 // Renumbers the non-deleted instructions in the node. This needs to | 55 // Renumbers the non-deleted instructions in the node. This needs to be done in |
57 // be done in preparation for live range analysis. The instruction | 56 // preparation for live range analysis. The instruction numbers in a block must |
58 // numbers in a block must be monotonically increasing. The range of | 57 // be monotonically increasing. The range of instruction numbers in a block, |
59 // instruction numbers in a block, from lowest to highest, must not | 58 // from lowest to highest, must not overlap with the range of any other block. |
60 // overlap with the range of any other block. | |
61 void CfgNode::renumberInstructions() { | 59 void CfgNode::renumberInstructions() { |
62 InstNumberT FirstNumber = Func->getNextInstNumber(); | 60 InstNumberT FirstNumber = Func->getNextInstNumber(); |
63 for (Inst &I : Phis) | 61 for (Inst &I : Phis) |
64 I.renumber(Func); | 62 I.renumber(Func); |
65 for (Inst &I : Insts) | 63 for (Inst &I : Insts) |
66 I.renumber(Func); | 64 I.renumber(Func); |
67 InstCountEstimate = Func->getNextInstNumber() - FirstNumber; | 65 InstCountEstimate = Func->getNextInstNumber() - FirstNumber; |
68 } | 66 } |
69 | 67 |
70 // When a node is created, the OutEdges are immediately known, but the | 68 // When a node is created, the OutEdges are immediately known, but the InEdges |
71 // InEdges have to be built up incrementally. After the CFG has been | 69 // have to be built up incrementally. After the CFG has been constructed, the |
72 // constructed, the computePredecessors() pass finalizes it by | 70 // computePredecessors() pass finalizes it by creating the InEdges list. |
73 // creating the InEdges list. | |
74 void CfgNode::computePredecessors() { | 71 void CfgNode::computePredecessors() { |
75 for (CfgNode *Succ : OutEdges) | 72 for (CfgNode *Succ : OutEdges) |
76 Succ->InEdges.push_back(this); | 73 Succ->InEdges.push_back(this); |
77 } | 74 } |
78 | 75 |
79 void CfgNode::computeSuccessors() { | 76 void CfgNode::computeSuccessors() { |
80 OutEdges = Insts.rbegin()->getTerminatorEdges(); | 77 OutEdges = Insts.rbegin()->getTerminatorEdges(); |
81 } | 78 } |
82 | 79 |
83 // This does part 1 of Phi lowering, by creating a new dest variable | 80 // This does part 1 of Phi lowering, by creating a new dest variable for each |
84 // for each Phi instruction, replacing the Phi instruction's dest with | 81 // Phi instruction, replacing the Phi instruction's dest with that variable, |
85 // that variable, and adding an explicit assignment of the old dest to | 82 // and adding an explicit assignment of the old dest to the new dest. For |
86 // the new dest. For example, | 83 // example, |
87 // a=phi(...) | 84 // a=phi(...) |
88 // changes to | 85 // changes to |
89 // "a_phi=phi(...); a=a_phi". | 86 // "a_phi=phi(...); a=a_phi". |
90 // | 87 // |
91 // This is in preparation for part 2 which deletes the Phi | 88 // This is in preparation for part 2 which deletes the Phi instructions and |
92 // instructions and appends assignment instructions to predecessor | 89 // appends assignment instructions to predecessor blocks. Note that this |
93 // blocks. Note that this transformation preserves SSA form. | 90 // transformation preserves SSA form. |
94 void CfgNode::placePhiLoads() { | 91 void CfgNode::placePhiLoads() { |
95 for (Inst &I : Phis) { | 92 for (Inst &I : Phis) { |
96 auto Phi = llvm::dyn_cast<InstPhi>(&I); | 93 auto Phi = llvm::dyn_cast<InstPhi>(&I); |
97 Insts.insert(Insts.begin(), Phi->lower(Func)); | 94 Insts.insert(Insts.begin(), Phi->lower(Func)); |
98 } | 95 } |
99 } | 96 } |
100 | 97 |
101 // This does part 2 of Phi lowering. For each Phi instruction at each | 98 // This does part 2 of Phi lowering. For each Phi instruction at each out-edge, |
102 // out-edge, create a corresponding assignment instruction, and add | 99 // create a corresponding assignment instruction, and add all the assignments |
103 // all the assignments near the end of this block. They need to be | 100 // near the end of this block. They need to be added before any branch |
104 // added before any branch instruction, and also if the block ends | 101 // instruction, and also if the block ends with a compare instruction followed |
105 // with a compare instruction followed by a branch instruction that we | 102 // by a branch instruction that we may want to fuse, it's better to insert the |
106 // may want to fuse, it's better to insert the new assignments before | 103 // new assignments before the compare instruction. The |
107 // the compare instruction. The tryOptimizedCmpxchgCmpBr() method | 104 // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions. |
108 // assumes this ordering of instructions. | |
109 // | 105 // |
110 // Note that this transformation takes the Phi dest variables out of | 106 // Note that this transformation takes the Phi dest variables out of SSA form, |
111 // SSA form, as there may be assignments to the dest variable in | 107 // as there may be assignments to the dest variable in multiple blocks. |
112 // multiple blocks. | |
113 void CfgNode::placePhiStores() { | 108 void CfgNode::placePhiStores() { |
114 // Find the insertion point. | 109 // Find the insertion point. |
115 InstList::iterator InsertionPoint = Insts.end(); | 110 InstList::iterator InsertionPoint = Insts.end(); |
116 // Every block must end in a terminator instruction, and therefore | 111 // Every block must end in a terminator instruction, and therefore must have |
117 // must have at least one instruction, so it's valid to decrement | 112 // at least one instruction, so it's valid to decrement InsertionPoint (but |
118 // InsertionPoint (but assert just in case). | 113 // assert just in case). |
119 assert(InsertionPoint != Insts.begin()); | 114 assert(InsertionPoint != Insts.begin()); |
120 --InsertionPoint; | 115 --InsertionPoint; |
121 // Confirm that InsertionPoint is a terminator instruction. Calling | 116 // Confirm that InsertionPoint is a terminator instruction. Calling |
122 // getTerminatorEdges() on a non-terminator instruction will cause | 117 // getTerminatorEdges() on a non-terminator instruction will cause an |
123 // an llvm_unreachable(). | 118 // llvm_unreachable(). |
124 (void)InsertionPoint->getTerminatorEdges(); | 119 (void)InsertionPoint->getTerminatorEdges(); |
125 // SafeInsertionPoint is always immediately before the terminator | 120 // SafeInsertionPoint is always immediately before the terminator |
126 // instruction. If the block ends in a compare and conditional | 121 // instruction. If the block ends in a compare and conditional branch, it's |
127 // branch, it's better to place the Phi store before the compare so | 122 // better to place the Phi store before the compare so as not to interfere |
128 // as not to interfere with compare/branch fusing. However, if the | 123 // with compare/branch fusing. However, if the compare instruction's dest |
129 // compare instruction's dest operand is the same as the new | 124 // operand is the same as the new assignment statement's source operand, this |
130 // assignment statement's source operand, this can't be done due to | 125 // can't be done due to data dependences, so we need to fall back to the |
131 // data dependences, so we need to fall back to the | 126 // SafeInsertionPoint. To illustrate: |
132 // SafeInsertionPoint. To illustrate: | |
133 // ; <label>:95 | 127 // ; <label>:95 |
134 // %97 = load i8* %96, align 1 | 128 // %97 = load i8* %96, align 1 |
135 // %98 = icmp ne i8 %97, 0 | 129 // %98 = icmp ne i8 %97, 0 |
136 // br i1 %98, label %99, label %2132 | 130 // br i1 %98, label %99, label %2132 |
137 // ; <label>:99 | 131 // ; <label>:99 |
138 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ] | 132 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ] |
139 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ] | 133 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ] |
140 // would be Phi-lowered as: | 134 // would be Phi-lowered as: |
141 // ; <label>:95 | 135 // ; <label>:95 |
142 // %97 = load i8* %96, align 1 | 136 // %97 = load i8* %96, align 1 |
143 // %100_phi = %97 ; can be at InsertionPoint | 137 // %100_phi = %97 ; can be at InsertionPoint |
144 // %98 = icmp ne i8 %97, 0 | 138 // %98 = icmp ne i8 %97, 0 |
145 // %101_phi = %98 ; must be at SafeInsertionPoint | 139 // %101_phi = %98 ; must be at SafeInsertionPoint |
146 // br i1 %98, label %99, label %2132 | 140 // br i1 %98, label %99, label %2132 |
147 // ; <label>:99 | 141 // ; <label>:99 |
148 // %100 = %100_phi | 142 // %100 = %100_phi |
149 // %101 = %101_phi | 143 // %101 = %101_phi |
150 // | 144 // |
151 // TODO(stichnot): It may be possible to bypass this whole | 145 // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint |
152 // SafeInsertionPoint mechanism. If a source basic block ends in a | 146 // mechanism. If a source basic block ends in a conditional branch: |
153 // conditional branch: | |
154 // labelSource: | 147 // labelSource: |
155 // ... | 148 // ... |
156 // br i1 %foo, label %labelTrue, label %labelFalse | 149 // br i1 %foo, label %labelTrue, label %labelFalse |
157 // and a branch target has a Phi involving the branch operand: | 150 // and a branch target has a Phi involving the branch operand: |
158 // labelTrue: | 151 // labelTrue: |
159 // %bar = phi i1 [ %foo, %labelSource ], ... | 152 // %bar = phi i1 [ %foo, %labelSource ], ... |
160 // then we actually know the constant i1 value of the Phi operand: | 153 // then we actually know the constant i1 value of the Phi operand: |
161 // labelTrue: | 154 // labelTrue: |
162 // %bar = phi i1 [ true, %labelSource ], ... | 155 // %bar = phi i1 [ true, %labelSource ], ... |
163 // It seems that this optimization should be done by clang or opt, | 156 // It seems that this optimization should be done by clang or opt, but we |
164 // but we could also do it here. | 157 // could also do it here. |
165 InstList::iterator SafeInsertionPoint = InsertionPoint; | 158 InstList::iterator SafeInsertionPoint = InsertionPoint; |
166 // Keep track of the dest variable of a compare instruction, so that | 159 // Keep track of the dest variable of a compare instruction, so that we |
167 // we insert the new instruction at the SafeInsertionPoint if the | 160 // insert the new instruction at the SafeInsertionPoint if the compare's dest |
168 // compare's dest matches the Phi-lowered assignment's source. | 161 // matches the Phi-lowered assignment's source. |
169 Variable *CmpInstDest = nullptr; | 162 Variable *CmpInstDest = nullptr; |
170 // If the current insertion point is at a conditional branch | 163 // If the current insertion point is at a conditional branch instruction, and |
171 // instruction, and the previous instruction is a compare | 164 // the previous instruction is a compare instruction, then we move the |
172 // instruction, then we move the insertion point before the compare | 165 // insertion point before the compare instruction so as not to interfere with |
173 // instruction so as not to interfere with compare/branch fusing. | 166 // compare/branch fusing. |
174 if (InstBr *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) { | 167 if (InstBr *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) { |
175 if (!Branch->isUnconditional()) { | 168 if (!Branch->isUnconditional()) { |
176 if (InsertionPoint != Insts.begin()) { | 169 if (InsertionPoint != Insts.begin()) { |
177 --InsertionPoint; | 170 --InsertionPoint; |
178 if (llvm::isa<InstIcmp>(InsertionPoint) || | 171 if (llvm::isa<InstIcmp>(InsertionPoint) || |
179 llvm::isa<InstFcmp>(InsertionPoint)) { | 172 llvm::isa<InstFcmp>(InsertionPoint)) { |
180 CmpInstDest = InsertionPoint->getDest(); | 173 CmpInstDest = InsertionPoint->getDest(); |
181 } else { | 174 } else { |
182 ++InsertionPoint; | 175 ++InsertionPoint; |
183 } | 176 } |
(...skipping 18 matching lines...) Expand all Loading... |
202 } | 195 } |
203 } | 196 } |
204 } | 197 } |
205 | 198 |
206 // Deletes the phi instructions after the loads and stores are placed. | 199 // Deletes the phi instructions after the loads and stores are placed. |
207 void CfgNode::deletePhis() { | 200 void CfgNode::deletePhis() { |
208 for (Inst &I : Phis) | 201 for (Inst &I : Phis) |
209 I.setDeleted(); | 202 I.setDeleted(); |
210 } | 203 } |
211 | 204 |
212 // Splits the edge from Pred to this node by creating a new node and | 205 // Splits the edge from Pred to this node by creating a new node and hooking up |
213 // hooking up the in and out edges appropriately. (The EdgeIndex | 206 // the in and out edges appropriately. (The EdgeIndex parameter is only used to |
214 // parameter is only used to make the new node's name unique when | 207 // make the new node's name unique when there are multiple edges between the |
215 // there are multiple edges between the same pair of nodes.) The new | 208 // same pair of nodes.) The new node's instruction list is initialized to the |
216 // node's instruction list is initialized to the empty list, with no | 209 // empty list, with no terminator instruction. There must not be multiple edges |
217 // terminator instruction. There must not be multiple edges from Pred | 210 // from Pred to this node so all Inst::getTerminatorEdges implementations must |
218 // to this node so all Inst::getTerminatorEdges implementations must | |
219 // not contain duplicates. | 211 // not contain duplicates. |
220 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) { | 212 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) { |
221 CfgNode *NewNode = Func->makeNode(); | 213 CfgNode *NewNode = Func->makeNode(); |
222 // Depth is the minimum as it works if both are the same, but if one is | 214 // Depth is the minimum as it works if both are the same, but if one is |
223 // outside the loop and the other is inside, the new node should be placed | 215 // outside the loop and the other is inside, the new node should be placed |
224 // outside and not be executed multiple times within the loop. | 216 // outside and not be executed multiple times within the loop. |
225 NewNode->setLoopNestDepth( | 217 NewNode->setLoopNestDepth( |
226 std::min(getLoopNestDepth(), Pred->getLoopNestDepth())); | 218 std::min(getLoopNestDepth(), Pred->getLoopNestDepth())); |
227 if (BuildDefs::dump()) | 219 if (BuildDefs::dump()) |
228 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" + | 220 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" + |
229 std::to_string(EdgeIndex)); | 221 std::to_string(EdgeIndex)); |
230 // The new node is added to the end of the node list, and will later | 222 // The new node is added to the end of the node list, and will later need to |
231 // need to be sorted into a reasonable topological order. | 223 // be sorted into a reasonable topological order. |
232 NewNode->setNeedsPlacement(true); | 224 NewNode->setNeedsPlacement(true); |
233 // Repoint Pred's out-edge. | 225 // Repoint Pred's out-edge. |
234 bool Found = false; | 226 bool Found = false; |
235 for (CfgNode *&I : Pred->OutEdges) { | 227 for (CfgNode *&I : Pred->OutEdges) { |
236 if (I == this) { | 228 if (I == this) { |
237 I = NewNode; | 229 I = NewNode; |
238 NewNode->InEdges.push_back(Pred); | 230 NewNode->InEdges.push_back(Pred); |
239 Found = true; | 231 Found = true; |
240 break; | 232 break; |
241 } | 233 } |
(...skipping 30 matching lines...) Expand all Loading... |
272 return true; | 264 return true; |
273 if (const auto Var2 = llvm::dyn_cast<Variable>(Opnd)) { | 265 if (const auto Var2 = llvm::dyn_cast<Variable>(Opnd)) { |
274 if (Var->hasReg() && Var->getRegNum() == Var2->getRegNum()) | 266 if (Var->hasReg() && Var->getRegNum() == Var2->getRegNum()) |
275 return true; | 267 return true; |
276 } | 268 } |
277 return false; | 269 return false; |
278 } | 270 } |
279 | 271 |
280 } // end of anonymous namespace | 272 } // end of anonymous namespace |
281 | 273 |
282 // This the "advanced" version of Phi lowering for a basic block, in contrast to | 274 // This the "advanced" version of Phi lowering for a basic block, in contrast |
283 // the simple version that lowers through assignments involving temporaries. | 275 // to the simple version that lowers through assignments involving temporaries. |
284 // | 276 // |
285 // All Phi instructions in a basic block are conceptually executed in parallel. | 277 // All Phi instructions in a basic block are conceptually executed in parallel. |
286 // However, if we lower Phis early and commit to a sequential ordering, we may | 278 // However, if we lower Phis early and commit to a sequential ordering, we may |
287 // end up creating unnecessary interferences which lead to worse register | 279 // end up creating unnecessary interferences which lead to worse register |
288 // allocation. Delaying Phi scheduling until after register allocation can help | 280 // allocation. Delaying Phi scheduling until after register allocation can help |
289 // unless there are no free registers for shuffling registers or stack slots and | 281 // unless there are no free registers for shuffling registers or stack slots |
290 // spilling becomes necessary. | 282 // and spilling becomes necessary. |
291 // | 283 // |
292 // The advanced Phi lowering starts by finding a topological sort of the Phi | 284 // The advanced Phi lowering starts by finding a topological sort of the Phi |
293 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on B. | 285 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on |
294 // Preexisting register assignments are considered in the topological sort. If | 286 // B. Preexisting register assignments are considered in the topological sort. |
295 // a topological sort is not possible due to a cycle, the cycle is broken by | 287 // If a topological sort is not possible due to a cycle, the cycle is broken by |
296 // introducing a non-parallel temporary. For example, a cycle arising from a | 288 // introducing a non-parallel temporary. For example, a cycle arising from a |
297 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being | 289 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being |
298 // equal, prefer to schedule assignments with register-allocated Src operands | 290 // equal, prefer to schedule assignments with register-allocated Src operands |
299 // earlier, in case that register becomes free afterwards, and prefer to | 291 // earlier, in case that register becomes free afterwards, and prefer to |
300 // schedule assignments with register-allocated Dest variables later, to keep | 292 // schedule assignments with register-allocated Dest variables later, to keep |
301 // that register free for longer. | 293 // that register free for longer. |
302 // | 294 // |
303 // Once the ordering is determined, the Cfg edge is split and the assignment | 295 // Once the ordering is determined, the Cfg edge is split and the assignment |
304 // list is lowered by the target lowering layer. Since the assignment lowering | 296 // list is lowered by the target lowering layer. Since the assignment lowering |
305 // may create new infinite-weight temporaries, a follow-on register allocation | 297 // may create new infinite-weight temporaries, a follow-on register allocation |
306 // pass will be needed. To prepare for this, liveness (including live range | 298 // pass will be needed. To prepare for this, liveness (including live range |
307 // calculation) of the split nodes needs to be calculated, and liveness of the | 299 // calculation) of the split nodes needs to be calculated, and liveness of the |
308 // original node need to be updated to "undo" the effects of the phi | 300 // original node need to be updated to "undo" the effects of the phi |
309 // assignments. | 301 // assignments. |
310 | 302 |
311 // The specific placement of the new node within the Cfg node list is deferred | 303 // The specific placement of the new node within the Cfg node list is deferred |
312 // until later, including after empty node contraction. | 304 // until later, including after empty node contraction. |
313 // | 305 // |
314 // After phi assignments are lowered across all blocks, another register | 306 // After phi assignments are lowered across all blocks, another register |
315 // allocation pass is run, focusing only on pre-colored and infinite-weight | 307 // allocation pass is run, focusing only on pre-colored and infinite-weight |
316 // variables, similar to Om1 register allocation (except without the need to | 308 // variables, similar to Om1 register allocation (except without the need to |
317 // specially compute these variables' live ranges, since they have already been | 309 // specially compute these variables' live ranges, since they have already been |
318 // precisely calculated). The register allocator in this mode needs the ability | 310 // precisely calculated). The register allocator in this mode needs the ability |
319 // to forcibly spill and reload registers in case none are naturally available. | 311 // to forcibly spill and reload registers in case none are naturally available. |
320 void CfgNode::advancedPhiLowering() { | 312 void CfgNode::advancedPhiLowering() { |
321 if (getPhis().empty()) | 313 if (getPhis().empty()) |
322 return; | 314 return; |
323 | 315 |
324 // Count the number of non-deleted Phi instructions. | 316 // Count the number of non-deleted Phi instructions. |
325 struct PhiDesc { | 317 struct PhiDesc { |
326 InstPhi *Phi; | 318 InstPhi *Phi; |
327 Variable *Dest; | 319 Variable *Dest; |
328 Operand *Src; | 320 Operand *Src; |
(...skipping 27 matching lines...) Expand all Loading... |
356 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); | 348 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); |
357 SizeT Remaining = NumPhis; | 349 SizeT Remaining = NumPhis; |
358 | 350 |
359 // First pass computes Src and initializes NumPred. | 351 // First pass computes Src and initializes NumPred. |
360 for (size_t I = 0; I < NumPhis; ++I) { | 352 for (size_t I = 0; I < NumPhis; ++I) { |
361 Variable *Dest = Desc[I].Dest; | 353 Variable *Dest = Desc[I].Dest; |
362 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred); | 354 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred); |
363 Desc[I].Src = Src; | 355 Desc[I].Src = Src; |
364 Desc[I].Processed = false; | 356 Desc[I].Processed = false; |
365 Desc[I].NumPred = 0; | 357 Desc[I].NumPred = 0; |
366 // Cherry-pick any trivial assignments, so that they don't | 358 // Cherry-pick any trivial assignments, so that they don't contribute to |
367 // contribute to the running complexity of the topological sort. | 359 // the running complexity of the topological sort. |
368 if (sameVarOrReg(Dest, Src)) { | 360 if (sameVarOrReg(Dest, Src)) { |
369 Desc[I].Processed = true; | 361 Desc[I].Processed = true; |
370 --Remaining; | 362 --Remaining; |
371 if (Dest != Src) | 363 if (Dest != Src) |
372 // If Dest and Src are syntactically the same, don't bother | 364 // If Dest and Src are syntactically the same, don't bother adding |
373 // adding the assignment, because in all respects it would | 365 // the assignment, because in all respects it would be redundant, and |
374 // be redundant, and if Dest/Src are on the stack, the | 366 // if Dest/Src are on the stack, the target lowering may naively |
375 // target lowering may naively decide to lower it using a | 367 // decide to lower it using a temporary register. |
376 // temporary register. | |
377 Split->appendInst(InstAssign::create(Func, Dest, Src)); | 368 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
378 } | 369 } |
379 } | 370 } |
380 // Second pass computes NumPred by comparing every pair of Phi | 371 // Second pass computes NumPred by comparing every pair of Phi |
381 // instructions. | 372 // instructions. |
382 for (size_t I = 0; I < NumPhis; ++I) { | 373 for (size_t I = 0; I < NumPhis; ++I) { |
383 if (Desc[I].Processed) | 374 if (Desc[I].Processed) |
384 continue; | 375 continue; |
385 const Variable *Dest = Desc[I].Dest; | 376 const Variable *Dest = Desc[I].Dest; |
386 for (size_t J = 0; J < NumPhis; ++J) { | 377 for (size_t J = 0; J < NumPhis; ++J) { |
387 if (Desc[J].Processed) | 378 if (Desc[J].Processed) |
388 continue; | 379 continue; |
389 if (I != J) { | 380 if (I != J) { |
390 // There shouldn't be two Phis with the same Dest variable | 381 // There shouldn't be two Phis with the same Dest variable or |
391 // or register. | 382 // register. |
392 assert(!sameVarOrReg(Dest, Desc[J].Dest)); | 383 assert(!sameVarOrReg(Dest, Desc[J].Dest)); |
393 } | 384 } |
394 const Operand *Src = Desc[J].Src; | 385 const Operand *Src = Desc[J].Src; |
395 if (sameVarOrReg(Dest, Src)) | 386 if (sameVarOrReg(Dest, Src)) |
396 ++Desc[I].NumPred; | 387 ++Desc[I].NumPred; |
397 } | 388 } |
398 } | 389 } |
399 | 390 |
400 // Another pass to compute initial Weight values. | 391 // Another pass to compute initial Weight values. |
401 | 392 |
402 // Always pick NumPred=0 over NumPred>0. | 393 // Always pick NumPred=0 over NumPred>0. |
403 constexpr int32_t WeightNoPreds = 4; | 394 constexpr int32_t WeightNoPreds = 4; |
404 // Prefer Src as a register because the register might free up. | 395 // Prefer Src as a register because the register might free up. |
405 constexpr int32_t WeightSrcIsReg = 2; | 396 constexpr int32_t WeightSrcIsReg = 2; |
406 // Prefer Dest not as a register because the register stays free | 397 // Prefer Dest not as a register because the register stays free longer. |
407 // longer. | |
408 constexpr int32_t WeightDestNotReg = 1; | 398 constexpr int32_t WeightDestNotReg = 1; |
409 | 399 |
410 for (size_t I = 0; I < NumPhis; ++I) { | 400 for (size_t I = 0; I < NumPhis; ++I) { |
411 if (Desc[I].Processed) | 401 if (Desc[I].Processed) |
412 continue; | 402 continue; |
413 int32_t Weight = 0; | 403 int32_t Weight = 0; |
414 if (Desc[I].NumPred == 0) | 404 if (Desc[I].NumPred == 0) |
415 Weight += WeightNoPreds; | 405 Weight += WeightNoPreds; |
416 if (auto Var = llvm::dyn_cast<Variable>(Desc[I].Src)) | 406 if (auto Var = llvm::dyn_cast<Variable>(Desc[I].Src)) |
417 if (Var->hasReg()) | 407 if (Var->hasReg()) |
418 Weight += WeightSrcIsReg; | 408 Weight += WeightSrcIsReg; |
419 if (!Desc[I].Dest->hasReg()) | 409 if (!Desc[I].Dest->hasReg()) |
420 Weight += WeightDestNotReg; | 410 Weight += WeightDestNotReg; |
421 Desc[I].Weight = Weight; | 411 Desc[I].Weight = Weight; |
422 } | 412 } |
423 | 413 |
424 // Repeatedly choose and process the best candidate in the | 414 // Repeatedly choose and process the best candidate in the topological |
425 // topological sort, until no candidates remain. This | 415 // sort, until no candidates remain. This implementation is O(N^2) where N |
426 // implementation is O(N^2) where N is the number of Phi | 416 // is the number of Phi instructions, but with a small constant factor |
427 // instructions, but with a small constant factor compared to a | 417 // compared to a likely implementation of O(N) topological sort. |
428 // likely implementation of O(N) topological sort. | |
429 for (; Remaining; --Remaining) { | 418 for (; Remaining; --Remaining) { |
430 size_t BestIndex = 0; | 419 size_t BestIndex = 0; |
431 int32_t BestWeight = -1; | 420 int32_t BestWeight = -1; |
432 // Find the best candidate. | 421 // Find the best candidate. |
433 for (size_t I = 0; I < NumPhis; ++I) { | 422 for (size_t I = 0; I < NumPhis; ++I) { |
434 if (Desc[I].Processed) | 423 if (Desc[I].Processed) |
435 continue; | 424 continue; |
436 int32_t Weight = 0; | 425 int32_t Weight = 0; |
437 Weight = Desc[I].Weight; | 426 Weight = Desc[I].Weight; |
438 if (Weight > BestWeight) { | 427 if (Weight > BestWeight) { |
439 BestIndex = I; | 428 BestIndex = I; |
440 BestWeight = Weight; | 429 BestWeight = Weight; |
441 } | 430 } |
442 } | 431 } |
443 assert(BestWeight >= 0); | 432 assert(BestWeight >= 0); |
444 assert(Desc[BestIndex].NumPred <= 1); | 433 assert(Desc[BestIndex].NumPred <= 1); |
445 Variable *Dest = Desc[BestIndex].Dest; | 434 Variable *Dest = Desc[BestIndex].Dest; |
446 Operand *Src = Desc[BestIndex].Src; | 435 Operand *Src = Desc[BestIndex].Src; |
447 assert(!sameVarOrReg(Dest, Src)); | 436 assert(!sameVarOrReg(Dest, Src)); |
448 // Break a cycle by introducing a temporary. | 437 // Break a cycle by introducing a temporary. |
449 if (Desc[BestIndex].NumPred) { | 438 if (Desc[BestIndex].NumPred) { |
450 bool Found = false; | 439 bool Found = false; |
451 // If the target instruction "A=B" is part of a cycle, find | 440 // If the target instruction "A=B" is part of a cycle, find the "X=A" |
452 // the "X=A" assignment in the cycle because it will have to | 441 // assignment in the cycle because it will have to be rewritten as |
453 // be rewritten as "X=tmp". | 442 // "X=tmp". |
454 for (size_t J = 0; !Found && J < NumPhis; ++J) { | 443 for (size_t J = 0; !Found && J < NumPhis; ++J) { |
455 if (Desc[J].Processed) | 444 if (Desc[J].Processed) |
456 continue; | 445 continue; |
457 Operand *OtherSrc = Desc[J].Src; | 446 Operand *OtherSrc = Desc[J].Src; |
458 if (Desc[J].NumPred && sameVarOrReg(Dest, OtherSrc)) { | 447 if (Desc[J].NumPred && sameVarOrReg(Dest, OtherSrc)) { |
459 SizeT VarNum = Func->getNumVariables(); | 448 SizeT VarNum = Func->getNumVariables(); |
460 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); | 449 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); |
461 if (BuildDefs::dump()) | 450 if (BuildDefs::dump()) |
462 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); | 451 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); |
463 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); | 452 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); |
464 Desc[J].Src = Tmp; | 453 Desc[J].Src = Tmp; |
465 Found = true; | 454 Found = true; |
466 } | 455 } |
467 } | 456 } |
468 assert(Found); | 457 assert(Found); |
469 } | 458 } |
470 // Now that a cycle (if any) has been broken, create the actual | 459 // Now that a cycle (if any) has been broken, create the actual |
471 // assignment. | 460 // assignment. |
472 Split->appendInst(InstAssign::create(Func, Dest, Src)); | 461 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
473 // Update NumPred for all Phi assignments using this Phi's Src | 462 // Update NumPred for all Phi assignments using this Phi's Src as their |
474 // as their Dest variable. Also update Weight if NumPred | 463 // Dest variable. Also update Weight if NumPred dropped from 1 to 0. |
475 // dropped from 1 to 0. | |
476 if (auto Var = llvm::dyn_cast<Variable>(Src)) { | 464 if (auto Var = llvm::dyn_cast<Variable>(Src)) { |
477 for (size_t I = 0; I < NumPhis; ++I) { | 465 for (size_t I = 0; I < NumPhis; ++I) { |
478 if (Desc[I].Processed) | 466 if (Desc[I].Processed) |
479 continue; | 467 continue; |
480 if (sameVarOrReg(Var, Desc[I].Dest)) { | 468 if (sameVarOrReg(Var, Desc[I].Dest)) { |
481 if (--Desc[I].NumPred == 0) | 469 if (--Desc[I].NumPred == 0) |
482 Desc[I].Weight += WeightNoPreds; | 470 Desc[I].Weight += WeightNoPreds; |
483 } | 471 } |
484 } | 472 } |
485 } | 473 } |
486 Desc[BestIndex].Processed = true; | 474 Desc[BestIndex].Processed = true; |
487 } | 475 } |
488 Split->appendInst(InstBr::create(Func, this)); | 476 Split->appendInst(InstBr::create(Func, this)); |
489 | 477 |
490 Split->genCode(); | 478 Split->genCode(); |
491 Func->getVMetadata()->addNode(Split); | 479 Func->getVMetadata()->addNode(Split); |
492 } | 480 } |
493 } | 481 } |
494 | 482 |
495 // Does address mode optimization. Pass each instruction to the | 483 // Does address mode optimization. Pass each instruction to the TargetLowering |
496 // TargetLowering object. If it returns a new instruction | 484 // object. If it returns a new instruction (representing the optimized address |
497 // (representing the optimized address mode), then insert the new | 485 // mode), then insert the new instruction and delete the old. |
498 // instruction and delete the old. | |
499 void CfgNode::doAddressOpt() { | 486 void CfgNode::doAddressOpt() { |
500 TargetLowering *Target = Func->getTarget(); | 487 TargetLowering *Target = Func->getTarget(); |
501 LoweringContext &Context = Target->getContext(); | 488 LoweringContext &Context = Target->getContext(); |
502 Context.init(this); | 489 Context.init(this); |
503 while (!Context.atEnd()) { | 490 while (!Context.atEnd()) { |
504 Target->doAddressOpt(); | 491 Target->doAddressOpt(); |
505 } | 492 } |
506 } | 493 } |
507 | 494 |
508 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) { | 495 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) { |
(...skipping 11 matching lines...) Expand all Loading... |
520 } | 507 } |
521 if (!PauseNopInsertion) | 508 if (!PauseNopInsertion) |
522 Target->doNopInsertion(RNG); | 509 Target->doNopInsertion(RNG); |
523 // Ensure Cur=Next, so that the nops are inserted before the current | 510 // Ensure Cur=Next, so that the nops are inserted before the current |
524 // instruction rather than after. | 511 // instruction rather than after. |
525 Context.advanceCur(); | 512 Context.advanceCur(); |
526 Context.advanceNext(); | 513 Context.advanceNext(); |
527 } | 514 } |
528 } | 515 } |
529 | 516 |
530 // Drives the target lowering. Passes the current instruction and the | 517 // Drives the target lowering. Passes the current instruction and the next |
531 // next non-deleted instruction for target lowering. | 518 // non-deleted instruction for target lowering. |
532 void CfgNode::genCode() { | 519 void CfgNode::genCode() { |
533 TargetLowering *Target = Func->getTarget(); | 520 TargetLowering *Target = Func->getTarget(); |
534 LoweringContext &Context = Target->getContext(); | 521 LoweringContext &Context = Target->getContext(); |
535 // Lower the regular instructions. | 522 // Lower the regular instructions. |
536 Context.init(this); | 523 Context.init(this); |
537 Target->initNodeForLowering(this); | 524 Target->initNodeForLowering(this); |
538 while (!Context.atEnd()) { | 525 while (!Context.atEnd()) { |
539 InstList::iterator Orig = Context.getCur(); | 526 InstList::iterator Orig = Context.getCur(); |
540 if (llvm::isa<InstRet>(*Orig)) | 527 if (llvm::isa<InstRet>(*Orig)) |
541 setHasReturn(); | 528 setHasReturn(); |
(...skipping 14 matching lines...) Expand all Loading... |
556 continue; | 543 continue; |
557 I.livenessLightweight(Func, Live); | 544 I.livenessLightweight(Func, Live); |
558 } | 545 } |
559 for (Inst &I : Phis) { | 546 for (Inst &I : Phis) { |
560 if (I.isDeleted()) | 547 if (I.isDeleted()) |
561 continue; | 548 continue; |
562 I.livenessLightweight(Func, Live); | 549 I.livenessLightweight(Func, Live); |
563 } | 550 } |
564 } | 551 } |
565 | 552 |
566 // Performs liveness analysis on the block. Returns true if the | 553 // Performs liveness analysis on the block. Returns true if the incoming |
567 // incoming liveness changed from before, false if it stayed the same. | 554 // liveness changed from before, false if it stayed the same. (If it changes, |
568 // (If it changes, the node's predecessors need to be processed | 555 // the node's predecessors need to be processed again.) |
569 // again.) | |
570 bool CfgNode::liveness(Liveness *Liveness) { | 556 bool CfgNode::liveness(Liveness *Liveness) { |
571 SizeT NumVars = Liveness->getNumVarsInNode(this); | 557 SizeT NumVars = Liveness->getNumVarsInNode(this); |
572 LivenessBV Live(NumVars); | 558 LivenessBV Live(NumVars); |
573 LiveBeginEndMap *LiveBegin = nullptr; | 559 LiveBeginEndMap *LiveBegin = nullptr; |
574 LiveBeginEndMap *LiveEnd = nullptr; | 560 LiveBeginEndMap *LiveEnd = nullptr; |
575 // Mark the beginning and ending of each variable's live range | 561 // Mark the beginning and ending of each variable's live range with the |
576 // with the sentinel instruction number 0. | 562 // sentinel instruction number 0. |
577 if (Liveness->getMode() == Liveness_Intervals) { | 563 if (Liveness->getMode() == Liveness_Intervals) { |
578 LiveBegin = Liveness->getLiveBegin(this); | 564 LiveBegin = Liveness->getLiveBegin(this); |
579 LiveEnd = Liveness->getLiveEnd(this); | 565 LiveEnd = Liveness->getLiveEnd(this); |
580 LiveBegin->clear(); | 566 LiveBegin->clear(); |
581 LiveEnd->clear(); | 567 LiveEnd->clear(); |
582 // Guess that the number of live ranges beginning is roughly the | 568 // Guess that the number of live ranges beginning is roughly the number of |
583 // number of instructions, and same for live ranges ending. | 569 // instructions, and same for live ranges ending. |
584 LiveBegin->reserve(getInstCountEstimate()); | 570 LiveBegin->reserve(getInstCountEstimate()); |
585 LiveEnd->reserve(getInstCountEstimate()); | 571 LiveEnd->reserve(getInstCountEstimate()); |
586 } | 572 } |
587 // Initialize Live to be the union of all successors' LiveIn. | 573 // Initialize Live to be the union of all successors' LiveIn. |
588 for (CfgNode *Succ : OutEdges) { | 574 for (CfgNode *Succ : OutEdges) { |
589 Live |= Liveness->getLiveIn(Succ); | 575 Live |= Liveness->getLiveIn(Succ); |
590 // Mark corresponding argument of phis in successor as live. | 576 // Mark corresponding argument of phis in successor as live. |
591 for (Inst &I : Succ->Phis) { | 577 for (Inst &I : Succ->Phis) { |
592 if (I.isDeleted()) | 578 if (I.isDeleted()) |
593 continue; | 579 continue; |
594 auto Phi = llvm::dyn_cast<InstPhi>(&I); | 580 auto Phi = llvm::dyn_cast<InstPhi>(&I); |
595 Phi->livenessPhiOperand(Live, this, Liveness); | 581 Phi->livenessPhiOperand(Live, this, Liveness); |
596 } | 582 } |
597 } | 583 } |
598 Liveness->getLiveOut(this) = Live; | 584 Liveness->getLiveOut(this) = Live; |
599 | 585 |
600 // Process regular instructions in reverse order. | 586 // Process regular instructions in reverse order. |
601 for (Inst &I : reverse_range(Insts)) { | 587 for (Inst &I : reverse_range(Insts)) { |
602 if (I.isDeleted()) | 588 if (I.isDeleted()) |
603 continue; | 589 continue; |
604 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd); | 590 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd); |
605 } | 591 } |
606 // Process phis in forward order so that we can override the | 592 // Process phis in forward order so that we can override the instruction |
607 // instruction number to be that of the earliest phi instruction in | 593 // number to be that of the earliest phi instruction in the block. |
608 // the block. | |
609 SizeT NumNonDeadPhis = 0; | 594 SizeT NumNonDeadPhis = 0; |
610 InstNumberT FirstPhiNumber = Inst::NumberSentinel; | 595 InstNumberT FirstPhiNumber = Inst::NumberSentinel; |
611 for (Inst &I : Phis) { | 596 for (Inst &I : Phis) { |
612 if (I.isDeleted()) | 597 if (I.isDeleted()) |
613 continue; | 598 continue; |
614 if (FirstPhiNumber == Inst::NumberSentinel) | 599 if (FirstPhiNumber == Inst::NumberSentinel) |
615 FirstPhiNumber = I.getNumber(); | 600 FirstPhiNumber = I.getNumber(); |
616 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd)) | 601 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd)) |
617 ++NumNonDeadPhis; | 602 ++NumNonDeadPhis; |
618 } | 603 } |
619 | 604 |
620 // When using the sparse representation, after traversing the | 605 // When using the sparse representation, after traversing the instructions in |
621 // instructions in the block, the Live bitvector should only contain | 606 // the block, the Live bitvector should only contain set bits for global |
622 // set bits for global variables upon block entry. We validate this | 607 // variables upon block entry. We validate this by shrinking the Live vector |
623 // by shrinking the Live vector and then testing it against the | 608 // and then testing it against the pre-shrunk version. (The shrinking is |
624 // pre-shrunk version. (The shrinking is required, but the | 609 // required, but the validation is not.) |
625 // validation is not.) | |
626 LivenessBV LiveOrig = Live; | 610 LivenessBV LiveOrig = Live; |
627 Live.resize(Liveness->getNumGlobalVars()); | 611 Live.resize(Liveness->getNumGlobalVars()); |
628 if (Live != LiveOrig) { | 612 if (Live != LiveOrig) { |
629 if (BuildDefs::dump()) { | 613 if (BuildDefs::dump()) { |
630 // This is a fatal liveness consistency error. Print some | 614 // This is a fatal liveness consistency error. Print some diagnostics and |
631 // diagnostics and abort. | 615 // abort. |
632 Ostream &Str = Func->getContext()->getStrDump(); | 616 Ostream &Str = Func->getContext()->getStrDump(); |
633 Func->resetCurrentNode(); | 617 Func->resetCurrentNode(); |
634 Str << "LiveOrig-Live ="; | 618 Str << "LiveOrig-Live ="; |
635 for (SizeT i = Live.size(); i < LiveOrig.size(); ++i) { | 619 for (SizeT i = Live.size(); i < LiveOrig.size(); ++i) { |
636 if (LiveOrig.test(i)) { | 620 if (LiveOrig.test(i)) { |
637 Str << " "; | 621 Str << " "; |
638 Liveness->getVariable(i, this)->dump(Func); | 622 Liveness->getVariable(i, this)->dump(Func); |
639 } | 623 } |
640 } | 624 } |
641 Str << "\n"; | 625 Str << "\n"; |
642 } | 626 } |
643 llvm::report_fatal_error("Fatal inconsistency in liveness analysis"); | 627 llvm::report_fatal_error("Fatal inconsistency in liveness analysis"); |
644 } | 628 } |
645 | 629 |
646 bool Changed = false; | 630 bool Changed = false; |
647 LivenessBV &LiveIn = Liveness->getLiveIn(this); | 631 LivenessBV &LiveIn = Liveness->getLiveIn(this); |
648 // Add in current LiveIn | 632 // Add in current LiveIn |
649 Live |= LiveIn; | 633 Live |= LiveIn; |
650 // Check result, set LiveIn=Live | 634 // Check result, set LiveIn=Live |
651 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this); | 635 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this); |
652 bool LiveInChanged = (Live != LiveIn); | 636 bool LiveInChanged = (Live != LiveIn); |
653 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged); | 637 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged); |
654 if (LiveInChanged) | 638 if (LiveInChanged) |
655 LiveIn = Live; | 639 LiveIn = Live; |
656 PrevNumNonDeadPhis = NumNonDeadPhis; | 640 PrevNumNonDeadPhis = NumNonDeadPhis; |
657 return Changed; | 641 return Changed; |
658 } | 642 } |
659 | 643 |
660 // Once basic liveness is complete, compute actual live ranges. It is | 644 // Once basic liveness is complete, compute actual live ranges. It is assumed |
661 // assumed that within a single basic block, a live range begins at | 645 // that within a single basic block, a live range begins at most once and ends |
662 // most once and ends at most once. This is certainly true for pure | 646 // at most once. This is certainly true for pure SSA form. It is also true once |
663 // SSA form. It is also true once phis are lowered, since each | 647 // phis are lowered, since each assignment to the phi-based temporary is in a |
664 // assignment to the phi-based temporary is in a different basic | 648 // different basic block, and there is a single read that ends the live in the |
665 // block, and there is a single read that ends the live in the basic | 649 // basic block that contained the actual phi instruction. |
666 // block that contained the actual phi instruction. | |
667 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum, | 650 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum, |
668 InstNumberT LastInstNum) { | 651 InstNumberT LastInstNum) { |
669 TimerMarker T1(TimerStack::TT_liveRange, Func); | 652 TimerMarker T1(TimerStack::TT_liveRange, Func); |
670 | 653 |
671 SizeT NumVars = Liveness->getNumVarsInNode(this); | 654 SizeT NumVars = Liveness->getNumVarsInNode(this); |
672 LivenessBV &LiveIn = Liveness->getLiveIn(this); | 655 LivenessBV &LiveIn = Liveness->getLiveIn(this); |
673 LivenessBV &LiveOut = Liveness->getLiveOut(this); | 656 LivenessBV &LiveOut = Liveness->getLiveOut(this); |
674 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this); | 657 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this); |
675 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this); | 658 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this); |
676 std::sort(MapBegin.begin(), MapBegin.end()); | 659 std::sort(MapBegin.begin(), MapBegin.end()); |
(...skipping 12 matching lines...) Expand all Loading... |
689 LivenessBV LiveInAndOut = LiveIn; | 672 LivenessBV LiveInAndOut = LiveIn; |
690 LiveInAndOut &= LiveOut; | 673 LiveInAndOut &= LiveOut; |
691 | 674 |
692 // Iterate in parallel across the sorted MapBegin[] and MapEnd[]. | 675 // Iterate in parallel across the sorted MapBegin[] and MapEnd[]. |
693 auto IBB = MapBegin.begin(), IEB = MapEnd.begin(); | 676 auto IBB = MapBegin.begin(), IEB = MapEnd.begin(); |
694 auto IBE = MapBegin.end(), IEE = MapEnd.end(); | 677 auto IBE = MapBegin.end(), IEE = MapEnd.end(); |
695 while (IBB != IBE || IEB != IEE) { | 678 while (IBB != IBE || IEB != IEE) { |
696 SizeT i1 = IBB == IBE ? NumVars : IBB->first; | 679 SizeT i1 = IBB == IBE ? NumVars : IBB->first; |
697 SizeT i2 = IEB == IEE ? NumVars : IEB->first; | 680 SizeT i2 = IEB == IEE ? NumVars : IEB->first; |
698 SizeT i = std::min(i1, i2); | 681 SizeT i = std::min(i1, i2); |
699 // i1 is the Variable number of the next MapBegin entry, and i2 is | 682 // i1 is the Variable number of the next MapBegin entry, and i2 is the |
700 // the Variable number of the next MapEnd entry. If i1==i2, then | 683 // Variable number of the next MapEnd entry. If i1==i2, then the Variable's |
701 // the Variable's live range begins and ends in this block. If | 684 // live range begins and ends in this block. If i1<i2, then i1's live range |
702 // i1<i2, then i1's live range begins at instruction IBB->second | 685 // begins at instruction IBB->second and extends through the end of the |
703 // and extends through the end of the block. If i1>i2, then i2's | 686 // block. If i1>i2, then i2's live range begins at the first instruction of |
704 // live range begins at the first instruction of the block and | 687 // the block and ends at IEB->second. In any case, we choose the lesser of |
705 // ends at IEB->second. In any case, we choose the lesser of i1 | 688 // i1 and i2 and proceed accordingly. |
706 // and i2 and proceed accordingly. | |
707 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum; | 689 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum; |
708 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1; | 690 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1; |
709 | 691 |
710 Variable *Var = Liveness->getVariable(i, this); | 692 Variable *Var = Liveness->getVariable(i, this); |
711 if (LB > LE) { | 693 if (LB > LE) { |
712 Var->addLiveRange(FirstInstNum, LE); | 694 Var->addLiveRange(FirstInstNum, LE); |
713 Var->addLiveRange(LB, LastInstNum + 1); | 695 Var->addLiveRange(LB, LastInstNum + 1); |
714 // Assert that Var is a global variable by checking that its | 696 // Assert that Var is a global variable by checking that its liveness |
715 // liveness index is less than the number of globals. This | 697 // index is less than the number of globals. This ensures that the |
716 // ensures that the LiveInAndOut[] access is valid. | 698 // LiveInAndOut[] access is valid. |
717 assert(i < Liveness->getNumGlobalVars()); | 699 assert(i < Liveness->getNumGlobalVars()); |
718 LiveInAndOut[i] = false; | 700 LiveInAndOut[i] = false; |
719 } else { | 701 } else { |
720 Var->addLiveRange(LB, LE); | 702 Var->addLiveRange(LB, LE); |
721 } | 703 } |
722 if (i == i1) | 704 if (i == i1) |
723 ++IBB; | 705 ++IBB; |
724 if (i == i2) | 706 if (i == i2) |
725 ++IEB; | 707 ++IEB; |
726 } | 708 } |
727 // Process the variables that are live across the entire block. | 709 // Process the variables that are live across the entire block. |
728 for (int i = LiveInAndOut.find_first(); i != -1; | 710 for (int i = LiveInAndOut.find_first(); i != -1; |
729 i = LiveInAndOut.find_next(i)) { | 711 i = LiveInAndOut.find_next(i)) { |
730 Variable *Var = Liveness->getVariable(i, this); | 712 Variable *Var = Liveness->getVariable(i, this); |
731 if (Liveness->getRangeMask(Var->getIndex())) | 713 if (Liveness->getRangeMask(Var->getIndex())) |
732 Var->addLiveRange(FirstInstNum, LastInstNum + 1); | 714 Var->addLiveRange(FirstInstNum, LastInstNum + 1); |
733 } | 715 } |
734 } | 716 } |
735 | 717 |
736 // If this node contains only deleted instructions, and ends in an | 718 // If this node contains only deleted instructions, and ends in an |
737 // unconditional branch, contract the node by repointing all its | 719 // unconditional branch, contract the node by repointing all its in-edges to |
738 // in-edges to its successor. | 720 // its successor. |
739 void CfgNode::contractIfEmpty() { | 721 void CfgNode::contractIfEmpty() { |
740 if (InEdges.empty()) | 722 if (InEdges.empty()) |
741 return; | 723 return; |
742 Inst *Branch = nullptr; | 724 Inst *Branch = nullptr; |
743 for (Inst &I : Insts) { | 725 for (Inst &I : Insts) { |
744 if (I.isDeleted()) | 726 if (I.isDeleted()) |
745 continue; | 727 continue; |
746 if (I.isUnconditionalBranch()) | 728 if (I.isUnconditionalBranch()) |
747 Branch = &I; | 729 Branch = &I; |
748 else if (!I.isRedundantAssign()) | 730 else if (!I.isRedundantAssign()) |
749 return; | 731 return; |
750 } | 732 } |
751 Branch->setDeleted(); | 733 Branch->setDeleted(); |
752 assert(OutEdges.size() == 1); | 734 assert(OutEdges.size() == 1); |
753 CfgNode *Successor = OutEdges.front(); | 735 CfgNode *Successor = OutEdges.front(); |
754 // Repoint all this node's in-edges to this node's successor, unless | 736 // Repoint all this node's in-edges to this node's successor, unless this |
755 // this node's successor is actually itself (in which case the | 737 // node's successor is actually itself (in which case the statement |
756 // statement "OutEdges.front()->InEdges.push_back(Pred)" could | 738 // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator |
757 // invalidate the iterator over this->InEdges). | 739 // over this->InEdges). |
758 if (Successor != this) { | 740 if (Successor != this) { |
759 for (CfgNode *Pred : InEdges) { | 741 for (CfgNode *Pred : InEdges) { |
760 for (CfgNode *&I : Pred->OutEdges) { | 742 for (CfgNode *&I : Pred->OutEdges) { |
761 if (I == this) { | 743 if (I == this) { |
762 I = Successor; | 744 I = Successor; |
763 Successor->InEdges.push_back(Pred); | 745 Successor->InEdges.push_back(Pred); |
764 } | 746 } |
765 } | 747 } |
766 for (Inst &I : Pred->getInsts()) { | 748 for (Inst &I : Pred->getInsts()) { |
767 if (!I.isDeleted()) | 749 if (!I.isDeleted()) |
768 I.repointEdges(this, Successor); | 750 I.repointEdges(this, Successor); |
769 } | 751 } |
770 } | 752 } |
771 | 753 |
772 // Remove the in-edge to the successor to allow node reordering to make | 754 // Remove the in-edge to the successor to allow node reordering to make |
773 // better decisions. For example it's more helpful to place a node after | 755 // better decisions. For example it's more helpful to place a node after a |
774 // a reachable predecessor than an unreachable one (like the one we just | 756 // reachable predecessor than an unreachable one (like the one we just |
775 // contracted). | 757 // contracted). |
776 Successor->InEdges.erase( | 758 Successor->InEdges.erase( |
777 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this)); | 759 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this)); |
778 } | 760 } |
779 InEdges.clear(); | 761 InEdges.clear(); |
780 } | 762 } |
781 | 763 |
782 void CfgNode::doBranchOpt(const CfgNode *NextNode) { | 764 void CfgNode::doBranchOpt(const CfgNode *NextNode) { |
783 TargetLowering *Target = Func->getTarget(); | 765 TargetLowering *Target = Func->getTarget(); |
784 // Find the first opportunity for branch optimization (which will be the last | 766 // Find the first opportunity for branch optimization (which will be the last |
785 // instruction in the block) and stop. This is sufficient unless there is some | 767 // instruction in the block) and stop. This is sufficient unless there is |
786 // target lowering where we have the possibility of multiple optimizations per | 768 // some target lowering where we have the possibility of multiple |
787 // block. Take care with switch lowering as there are multiple unconditional | 769 // optimizations per block. Take care with switch lowering as there are |
788 // branches and only the last can be deleted. | 770 // multiple unconditional branches and only the last can be deleted. |
789 for (Inst &I : reverse_range(Insts)) { | 771 for (Inst &I : reverse_range(Insts)) { |
790 if (!I.isDeleted()) { | 772 if (!I.isDeleted()) { |
791 Target->doBranchOpt(&I, NextNode); | 773 Target->doBranchOpt(&I, NextNode); |
792 return; | 774 return; |
793 } | 775 } |
794 } | 776 } |
795 } | 777 } |
796 | 778 |
797 // ======================== Dump routines ======================== // | 779 // ======================== Dump routines ======================== // |
798 | 780 |
(...skipping 19 matching lines...) Expand all Loading... |
818 for (SizeT i = 0; i < Live->size(); ++i) { | 800 for (SizeT i = 0; i < Live->size(); ++i) { |
819 if ((*Live)[i]) { | 801 if ((*Live)[i]) { |
820 Variable *Var = Liveness->getVariable(i, Node); | 802 Variable *Var = Liveness->getVariable(i, Node); |
821 if (Var->hasReg()) { | 803 if (Var->hasReg()) { |
822 if (IsLiveIn) | 804 if (IsLiveIn) |
823 ++LiveRegCount[Var->getRegNum()]; | 805 ++LiveRegCount[Var->getRegNum()]; |
824 LiveRegs.push_back(Var); | 806 LiveRegs.push_back(Var); |
825 } | 807 } |
826 } | 808 } |
827 } | 809 } |
828 // Sort the variables by regnum so they are always printed in a | 810 // Sort the variables by regnum so they are always printed in a familiar |
829 // familiar order. | 811 // order. |
830 std::sort(LiveRegs.begin(), LiveRegs.end(), | 812 std::sort(LiveRegs.begin(), LiveRegs.end(), |
831 [](const Variable *V1, const Variable *V2) { | 813 [](const Variable *V1, const Variable *V2) { |
832 return V1->getRegNum() < V2->getRegNum(); | 814 return V1->getRegNum() < V2->getRegNum(); |
833 }); | 815 }); |
834 bool First = true; | 816 bool First = true; |
835 for (Variable *Var : LiveRegs) { | 817 for (Variable *Var : LiveRegs) { |
836 if (!First) | 818 if (!First) |
837 Str << ","; | 819 Str << ","; |
838 First = false; | 820 First = false; |
839 Var->emit(Func); | 821 Var->emit(Func); |
840 } | 822 } |
841 } | 823 } |
842 Str << "\n"; | 824 Str << "\n"; |
843 } | 825 } |
844 | 826 |
845 void emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr, | 827 void emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr, |
846 std::vector<SizeT> &LiveRegCount) { | 828 std::vector<SizeT> &LiveRegCount) { |
847 if (!BuildDefs::dump()) | 829 if (!BuildDefs::dump()) |
848 return; | 830 return; |
849 bool First = true; | 831 bool First = true; |
850 Variable *Dest = Instr->getDest(); | 832 Variable *Dest = Instr->getDest(); |
851 // Normally we increment the live count for the dest register. But | 833 // Normally we increment the live count for the dest register. But we |
852 // we shouldn't if the instruction's IsDestNonKillable flag is set, | 834 // shouldn't if the instruction's IsDestNonKillable flag is set, because this |
853 // because this means that the target lowering created this | 835 // means that the target lowering created this instruction as a non-SSA |
854 // instruction as a non-SSA assignment; i.e., a different, previous | 836 // assignment; i.e., a different, previous instruction started the dest |
855 // instruction started the dest variable's live range. | 837 // variable's live range. |
856 if (!Instr->isDestNonKillable() && Dest && Dest->hasReg()) | 838 if (!Instr->isDestNonKillable() && Dest && Dest->hasReg()) |
857 ++LiveRegCount[Dest->getRegNum()]; | 839 ++LiveRegCount[Dest->getRegNum()]; |
858 FOREACH_VAR_IN_INST(Var, *Instr) { | 840 FOREACH_VAR_IN_INST(Var, *Instr) { |
859 bool ShouldReport = Instr->isLastUse(Var); | 841 bool ShouldReport = Instr->isLastUse(Var); |
860 if (ShouldReport && Var->hasReg()) { | 842 if (ShouldReport && Var->hasReg()) { |
861 // Don't report end of live range until the live count reaches 0. | 843 // Don't report end of live range until the live count reaches 0. |
862 SizeT NewCount = --LiveRegCount[Var->getRegNum()]; | 844 SizeT NewCount = --LiveRegCount[Var->getRegNum()]; |
863 if (NewCount) | 845 if (NewCount) |
864 ShouldReport = false; | 846 ShouldReport = false; |
865 } | 847 } |
866 if (ShouldReport) { | 848 if (ShouldReport) { |
867 if (First) | 849 if (First) |
868 Str << " \t# END="; | 850 Str << " \t# END="; |
869 else | 851 else |
870 Str << ","; | 852 Str << ","; |
871 Var->emit(Func); | 853 Var->emit(Func); |
872 First = false; | 854 First = false; |
873 } | 855 } |
874 } | 856 } |
875 } | 857 } |
876 | 858 |
877 void updateStats(Cfg *Func, const Inst *I) { | 859 void updateStats(Cfg *Func, const Inst *I) { |
878 if (!BuildDefs::dump()) | 860 if (!BuildDefs::dump()) |
879 return; | 861 return; |
880 // Update emitted instruction count, plus fill/spill count for | 862 // Update emitted instruction count, plus fill/spill count for Variable |
881 // Variable operands without a physical register. | 863 // operands without a physical register. |
882 if (uint32_t Count = I->getEmitInstCount()) { | 864 if (uint32_t Count = I->getEmitInstCount()) { |
883 Func->getContext()->statsUpdateEmitted(Count); | 865 Func->getContext()->statsUpdateEmitted(Count); |
884 if (Variable *Dest = I->getDest()) { | 866 if (Variable *Dest = I->getDest()) { |
885 if (!Dest->hasReg()) | 867 if (!Dest->hasReg()) |
886 Func->getContext()->statsUpdateFills(); | 868 Func->getContext()->statsUpdateFills(); |
887 } | 869 } |
888 for (SizeT S = 0; S < I->getSrcSize(); ++S) { | 870 for (SizeT S = 0; S < I->getSrcSize(); ++S) { |
889 if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) { | 871 if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) { |
890 if (!Src->hasReg()) | 872 if (!Src->hasReg()) |
891 Func->getContext()->statsUpdateSpills(); | 873 Func->getContext()->statsUpdateSpills(); |
892 } | 874 } |
893 } | 875 } |
894 } | 876 } |
895 } | 877 } |
896 | 878 |
897 } // end of anonymous namespace | 879 } // end of anonymous namespace |
898 | 880 |
899 void CfgNode::emit(Cfg *Func) const { | 881 void CfgNode::emit(Cfg *Func) const { |
900 if (!BuildDefs::dump()) | 882 if (!BuildDefs::dump()) |
901 return; | 883 return; |
902 Func->setCurrentNode(this); | 884 Func->setCurrentNode(this); |
903 Ostream &Str = Func->getContext()->getStrEmit(); | 885 Ostream &Str = Func->getContext()->getStrEmit(); |
904 Liveness *Liveness = Func->getLiveness(); | 886 Liveness *Liveness = Func->getLiveness(); |
905 bool DecorateAsm = | 887 bool DecorateAsm = |
906 Liveness && Func->getContext()->getFlags().getDecorateAsm(); | 888 Liveness && Func->getContext()->getFlags().getDecorateAsm(); |
907 Str << getAsmName() << ":\n"; | 889 Str << getAsmName() << ":\n"; |
908 // LiveRegCount keeps track of the number of currently live | 890 // LiveRegCount keeps track of the number of currently live variables that |
909 // variables that each register is assigned to. Normally that would | 891 // each register is assigned to. Normally that would be only 0 or 1, but the |
910 // be only 0 or 1, but the register allocator's AllowOverlap | 892 // register allocator's AllowOverlap inference allows it to be greater than 1 |
911 // inference allows it to be greater than 1 for short periods. | 893 // for short periods. |
912 std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters()); | 894 std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters()); |
913 if (DecorateAsm) { | 895 if (DecorateAsm) { |
914 constexpr bool IsLiveIn = true; | 896 constexpr bool IsLiveIn = true; |
915 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); | 897 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); |
916 } | 898 } |
917 | 899 |
918 for (const Inst &I : Phis) { | 900 for (const Inst &I : Phis) { |
919 if (I.isDeleted()) | 901 if (I.isDeleted()) |
920 continue; | 902 continue; |
921 // Emitting a Phi instruction should cause an error. | 903 // Emitting a Phi instruction should cause an error. |
922 I.emit(Func); | 904 I.emit(Func); |
923 } | 905 } |
924 for (const Inst &I : Insts) { | 906 for (const Inst &I : Insts) { |
925 if (I.isDeleted()) | 907 if (I.isDeleted()) |
926 continue; | 908 continue; |
927 if (I.isRedundantAssign()) { | 909 if (I.isRedundantAssign()) { |
928 // Usually, redundant assignments end the live range of the src | 910 // Usually, redundant assignments end the live range of the src variable |
929 // variable and begin the live range of the dest variable, with | 911 // and begin the live range of the dest variable, with no net effect on |
930 // no net effect on the liveness of their register. However, if | 912 // the liveness of their register. However, if the register allocator |
931 // the register allocator infers the AllowOverlap condition, | 913 // infers the AllowOverlap condition, then this may be a redundant |
932 // then this may be a redundant assignment that does not end the | 914 // assignment that does not end the src variable's live range, in which |
933 // src variable's live range, in which case the active variable | 915 // case the active variable count for that register needs to be bumped. |
934 // count for that register needs to be bumped. That normally | 916 // That normally would have happened as part of emitLiveRangesEnded(), |
935 // would have happened as part of emitLiveRangesEnded(), but | 917 // but that isn't called for redundant assignments. |
936 // that isn't called for redundant assignments. | |
937 Variable *Dest = I.getDest(); | 918 Variable *Dest = I.getDest(); |
938 if (DecorateAsm && Dest->hasReg() && !I.isLastUse(I.getSrc(0))) | 919 if (DecorateAsm && Dest->hasReg() && !I.isLastUse(I.getSrc(0))) |
939 ++LiveRegCount[Dest->getRegNum()]; | 920 ++LiveRegCount[Dest->getRegNum()]; |
940 continue; | 921 continue; |
941 } | 922 } |
942 I.emit(Func); | 923 I.emit(Func); |
943 if (DecorateAsm) | 924 if (DecorateAsm) |
944 emitLiveRangesEnded(Str, Func, &I, LiveRegCount); | 925 emitLiveRangesEnded(Str, Func, &I, LiveRegCount); |
945 Str << "\n"; | 926 Str << "\n"; |
946 updateStats(Func, &I); | 927 updateStats(Func, &I); |
(...skipping 12 matching lines...) Expand all Loading... |
959 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete; | 940 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete; |
960 | 941 |
961 public: | 942 public: |
962 BundleEmitHelper(Assembler *Asm, TargetLowering *Target, | 943 BundleEmitHelper(Assembler *Asm, TargetLowering *Target, |
963 const InstList &Insts) | 944 const InstList &Insts) |
964 : Asm(Asm), Target(Target), End(Insts.end()), BundleLockStart(End), | 945 : Asm(Asm), Target(Target), End(Insts.end()), BundleLockStart(End), |
965 BundleSize(1 << Asm->getBundleAlignLog2Bytes()), | 946 BundleSize(1 << Asm->getBundleAlignLog2Bytes()), |
966 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {} | 947 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {} |
967 // Check whether we're currently within a bundle_lock region. | 948 // Check whether we're currently within a bundle_lock region. |
968 bool isInBundleLockRegion() const { return BundleLockStart != End; } | 949 bool isInBundleLockRegion() const { return BundleLockStart != End; } |
969 // Check whether the current bundle_lock region has the align_to_end | 950 // Check whether the current bundle_lock region has the align_to_end option. |
970 // option. | |
971 bool isAlignToEnd() const { | 951 bool isAlignToEnd() const { |
972 assert(isInBundleLockRegion()); | 952 assert(isInBundleLockRegion()); |
973 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == | 953 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == |
974 InstBundleLock::Opt_AlignToEnd; | 954 InstBundleLock::Opt_AlignToEnd; |
975 } | 955 } |
976 // Check whether the entire bundle_lock region falls within the same | 956 // Check whether the entire bundle_lock region falls within the same bundle. |
977 // bundle. | |
978 bool isSameBundle() const { | 957 bool isSameBundle() const { |
979 assert(isInBundleLockRegion()); | 958 assert(isInBundleLockRegion()); |
980 return SizeSnapshotPre == SizeSnapshotPost || | 959 return SizeSnapshotPre == SizeSnapshotPost || |
981 (SizeSnapshotPre & BundleMaskHi) == | 960 (SizeSnapshotPre & BundleMaskHi) == |
982 ((SizeSnapshotPost - 1) & BundleMaskHi); | 961 ((SizeSnapshotPost - 1) & BundleMaskHi); |
983 } | 962 } |
984 // Get the bundle alignment of the first instruction of the | 963 // Get the bundle alignment of the first instruction of the bundle_lock |
985 // bundle_lock region. | 964 // region. |
986 intptr_t getPreAlignment() const { | 965 intptr_t getPreAlignment() const { |
987 assert(isInBundleLockRegion()); | 966 assert(isInBundleLockRegion()); |
988 return SizeSnapshotPre & BundleMaskLo; | 967 return SizeSnapshotPre & BundleMaskLo; |
989 } | 968 } |
990 // Get the bundle alignment of the first instruction past the | 969 // Get the bundle alignment of the first instruction past the bundle_lock |
991 // bundle_lock region. | 970 // region. |
992 intptr_t getPostAlignment() const { | 971 intptr_t getPostAlignment() const { |
993 assert(isInBundleLockRegion()); | 972 assert(isInBundleLockRegion()); |
994 return SizeSnapshotPost & BundleMaskLo; | 973 return SizeSnapshotPost & BundleMaskLo; |
995 } | 974 } |
996 // Get the iterator pointing to the bundle_lock instruction, e.g. to | 975 // Get the iterator pointing to the bundle_lock instruction, e.g. to roll |
997 // roll back the instruction iteration to that point. | 976 // back the instruction iteration to that point. |
998 InstList::const_iterator getBundleLockStart() const { | 977 InstList::const_iterator getBundleLockStart() const { |
999 assert(isInBundleLockRegion()); | 978 assert(isInBundleLockRegion()); |
1000 return BundleLockStart; | 979 return BundleLockStart; |
1001 } | 980 } |
1002 // Set up bookkeeping when the bundle_lock instruction is first | 981 // Set up bookkeeping when the bundle_lock instruction is first processed. |
1003 // processed. | |
1004 void enterBundleLock(InstList::const_iterator I) { | 982 void enterBundleLock(InstList::const_iterator I) { |
1005 assert(!isInBundleLockRegion()); | 983 assert(!isInBundleLockRegion()); |
1006 BundleLockStart = I; | 984 BundleLockStart = I; |
1007 SizeSnapshotPre = Asm->getBufferSize(); | 985 SizeSnapshotPre = Asm->getBufferSize(); |
1008 Asm->setPreliminary(true); | 986 Asm->setPreliminary(true); |
1009 Target->snapshotEmitState(); | 987 Target->snapshotEmitState(); |
1010 assert(isInBundleLockRegion()); | 988 assert(isInBundleLockRegion()); |
1011 } | 989 } |
1012 // Update bookkeeping when the bundle_unlock instruction is | 990 // Update bookkeeping when the bundle_unlock instruction is processed. |
1013 // processed. | |
1014 void enterBundleUnlock() { | 991 void enterBundleUnlock() { |
1015 assert(isInBundleLockRegion()); | 992 assert(isInBundleLockRegion()); |
1016 SizeSnapshotPost = Asm->getBufferSize(); | 993 SizeSnapshotPost = Asm->getBufferSize(); |
1017 } | 994 } |
1018 // Update bookkeeping when we are completely finished with the | 995 // Update bookkeeping when we are completely finished with the bundle_lock |
1019 // bundle_lock region. | 996 // region. |
1020 void leaveBundleLockRegion() { BundleLockStart = End; } | 997 void leaveBundleLockRegion() { BundleLockStart = End; } |
1021 // Check whether the instruction sequence fits within the current | 998 // Check whether the instruction sequence fits within the current bundle, and |
1022 // bundle, and if not, add nop padding to the end of the current | 999 // if not, add nop padding to the end of the current bundle. |
1023 // bundle. | |
1024 void padToNextBundle() { | 1000 void padToNextBundle() { |
1025 assert(isInBundleLockRegion()); | 1001 assert(isInBundleLockRegion()); |
1026 if (!isSameBundle()) { | 1002 if (!isSameBundle()) { |
1027 intptr_t PadToNextBundle = BundleSize - getPreAlignment(); | 1003 intptr_t PadToNextBundle = BundleSize - getPreAlignment(); |
1028 Asm->padWithNop(PadToNextBundle); | 1004 Asm->padWithNop(PadToNextBundle); |
1029 SizeSnapshotPre += PadToNextBundle; | 1005 SizeSnapshotPre += PadToNextBundle; |
1030 SizeSnapshotPost += PadToNextBundle; | 1006 SizeSnapshotPost += PadToNextBundle; |
1031 assert((Asm->getBufferSize() & BundleMaskLo) == 0); | 1007 assert((Asm->getBufferSize() & BundleMaskLo) == 0); |
1032 assert(Asm->getBufferSize() == SizeSnapshotPre); | 1008 assert(Asm->getBufferSize() == SizeSnapshotPre); |
1033 } | 1009 } |
1034 } | 1010 } |
1035 // If align_to_end is specified, add padding such that the | 1011 // If align_to_end is specified, add padding such that the instruction |
1036 // instruction sequences ends precisely at a bundle boundary. | 1012 // sequences ends precisely at a bundle boundary. |
1037 void padForAlignToEnd() { | 1013 void padForAlignToEnd() { |
1038 assert(isInBundleLockRegion()); | 1014 assert(isInBundleLockRegion()); |
1039 if (isAlignToEnd()) { | 1015 if (isAlignToEnd()) { |
1040 if (intptr_t Offset = getPostAlignment()) { | 1016 if (intptr_t Offset = getPostAlignment()) { |
1041 Asm->padWithNop(BundleSize - Offset); | 1017 Asm->padWithNop(BundleSize - Offset); |
1042 SizeSnapshotPre = Asm->getBufferSize(); | 1018 SizeSnapshotPre = Asm->getBufferSize(); |
1043 } | 1019 } |
1044 } | 1020 } |
1045 } | 1021 } |
1046 // Update bookkeeping when rolling back for the second pass. | 1022 // Update bookkeeping when rolling back for the second pass. |
1047 void rollback() { | 1023 void rollback() { |
1048 assert(isInBundleLockRegion()); | 1024 assert(isInBundleLockRegion()); |
1049 Asm->setBufferSize(SizeSnapshotPre); | 1025 Asm->setBufferSize(SizeSnapshotPre); |
1050 Asm->setPreliminary(false); | 1026 Asm->setPreliminary(false); |
1051 Target->rollbackEmitState(); | 1027 Target->rollbackEmitState(); |
1052 } | 1028 } |
1053 | 1029 |
1054 private: | 1030 private: |
1055 Assembler *const Asm; | 1031 Assembler *const Asm; |
1056 TargetLowering *const Target; | 1032 TargetLowering *const Target; |
1057 // End is a sentinel value such that BundleLockStart==End implies | 1033 // End is a sentinel value such that BundleLockStart==End implies that we are |
1058 // that we are not in a bundle_lock region. | 1034 // not in a bundle_lock region. |
1059 const InstList::const_iterator End; | 1035 const InstList::const_iterator End; |
1060 InstList::const_iterator BundleLockStart; | 1036 InstList::const_iterator BundleLockStart; |
1061 const intptr_t BundleSize; | 1037 const intptr_t BundleSize; |
1062 // Masking with BundleMaskLo identifies an address's bundle offset. | 1038 // Masking with BundleMaskLo identifies an address's bundle offset. |
1063 const intptr_t BundleMaskLo; | 1039 const intptr_t BundleMaskLo; |
1064 // Masking with BundleMaskHi identifies an address's bundle. | 1040 // Masking with BundleMaskHi identifies an address's bundle. |
1065 const intptr_t BundleMaskHi; | 1041 const intptr_t BundleMaskHi; |
1066 intptr_t SizeSnapshotPre = 0; | 1042 intptr_t SizeSnapshotPre = 0; |
1067 intptr_t SizeSnapshotPost = 0; | 1043 intptr_t SizeSnapshotPost = 0; |
1068 }; | 1044 }; |
1069 | 1045 |
1070 } // end of anonymous namespace | 1046 } // end of anonymous namespace |
1071 | 1047 |
1072 void CfgNode::emitIAS(Cfg *Func) const { | 1048 void CfgNode::emitIAS(Cfg *Func) const { |
1073 Func->setCurrentNode(this); | 1049 Func->setCurrentNode(this); |
1074 Assembler *Asm = Func->getAssembler<>(); | 1050 Assembler *Asm = Func->getAssembler<>(); |
1075 // TODO(stichnot): When sandboxing, defer binding the node label | 1051 // TODO(stichnot): When sandboxing, defer binding the node label until just |
1076 // until just before the first instruction is emitted, to reduce the | 1052 // before the first instruction is emitted, to reduce the chance that a |
1077 // chance that a padding nop is a branch target. | 1053 // padding nop is a branch target. |
1078 Asm->bindCfgNodeLabel(getIndex()); | 1054 Asm->bindCfgNodeLabel(getIndex()); |
1079 for (const Inst &I : Phis) { | 1055 for (const Inst &I : Phis) { |
1080 if (I.isDeleted()) | 1056 if (I.isDeleted()) |
1081 continue; | 1057 continue; |
1082 // Emitting a Phi instruction should cause an error. | 1058 // Emitting a Phi instruction should cause an error. |
1083 I.emitIAS(Func); | 1059 I.emitIAS(Func); |
1084 } | 1060 } |
1085 | 1061 |
1086 // Do the simple emission if not sandboxed. | 1062 // Do the simple emission if not sandboxed. |
1087 if (!Func->getContext()->getFlags().getUseSandboxing()) { | 1063 if (!Func->getContext()->getFlags().getUseSandboxing()) { |
1088 for (const Inst &I : Insts) { | 1064 for (const Inst &I : Insts) { |
1089 if (!I.isDeleted() && !I.isRedundantAssign()) { | 1065 if (!I.isDeleted() && !I.isRedundantAssign()) { |
1090 I.emitIAS(Func); | 1066 I.emitIAS(Func); |
1091 updateStats(Func, &I); | 1067 updateStats(Func, &I); |
1092 } | 1068 } |
1093 } | 1069 } |
1094 return; | 1070 return; |
1095 } | 1071 } |
1096 | 1072 |
1097 // The remainder of the function handles emission with sandboxing. | 1073 // The remainder of the function handles emission with sandboxing. There are |
1098 // There are explicit bundle_lock regions delimited by bundle_lock | 1074 // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock |
1099 // and bundle_unlock instructions. All other instructions are | 1075 // instructions. All other instructions are treated as an implicit |
1100 // treated as an implicit one-instruction bundle_lock region. | 1076 // one-instruction bundle_lock region. Emission is done twice for each |
1101 // Emission is done twice for each bundle_lock region. The first | 1077 // bundle_lock region. The first pass is a preliminary pass, after which we |
1102 // pass is a preliminary pass, after which we can figure out what | 1078 // can figure out what nop padding is needed, then roll back, and make the |
1103 // nop padding is needed, then roll back, and make the final pass. | 1079 // final pass. |
1104 // | 1080 // |
1105 // Ideally, the first pass would be speculative and the second pass | 1081 // Ideally, the first pass would be speculative and the second pass would |
1106 // would only be done if nop padding were needed, but the structure | 1082 // only be done if nop padding were needed, but the structure of the |
1107 // of the integrated assembler makes it hard to roll back the state | 1083 // integrated assembler makes it hard to roll back the state of label |
1108 // of label bindings, label links, and relocation fixups. Instead, | 1084 // bindings, label links, and relocation fixups. Instead, the first pass just |
1109 // the first pass just disables all mutation of that state. | 1085 // disables all mutation of that state. |
1110 | 1086 |
1111 BundleEmitHelper Helper(Asm, Func->getTarget(), Insts); | 1087 BundleEmitHelper Helper(Asm, Func->getTarget(), Insts); |
1112 InstList::const_iterator End = Insts.end(); | 1088 InstList::const_iterator End = Insts.end(); |
1113 // Retrying indicates that we had to roll back to the bundle_lock | 1089 // Retrying indicates that we had to roll back to the bundle_lock instruction |
1114 // instruction to apply padding before the bundle_lock sequence. | 1090 // to apply padding before the bundle_lock sequence. |
1115 bool Retrying = false; | 1091 bool Retrying = false; |
1116 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) { | 1092 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) { |
1117 if (I->isDeleted() || I->isRedundantAssign()) | 1093 if (I->isDeleted() || I->isRedundantAssign()) |
1118 continue; | 1094 continue; |
1119 | 1095 |
1120 if (llvm::isa<InstBundleLock>(I)) { | 1096 if (llvm::isa<InstBundleLock>(I)) { |
1121 // Set up the initial bundle_lock state. This should not happen | 1097 // Set up the initial bundle_lock state. This should not happen while |
1122 // while retrying, because the retry rolls back to the | 1098 // retrying, because the retry rolls back to the instruction following |
1123 // instruction following the bundle_lock instruction. | 1099 // the bundle_lock instruction. |
1124 assert(!Retrying); | 1100 assert(!Retrying); |
1125 Helper.enterBundleLock(I); | 1101 Helper.enterBundleLock(I); |
1126 continue; | 1102 continue; |
1127 } | 1103 } |
1128 | 1104 |
1129 if (llvm::isa<InstBundleUnlock>(I)) { | 1105 if (llvm::isa<InstBundleUnlock>(I)) { |
1130 Helper.enterBundleUnlock(); | 1106 Helper.enterBundleUnlock(); |
1131 if (Retrying) { | 1107 if (Retrying) { |
1132 // Make sure all instructions are in the same bundle. | 1108 // Make sure all instructions are in the same bundle. |
1133 assert(Helper.isSameBundle()); | 1109 assert(Helper.isSameBundle()); |
1134 // If align_to_end is specified, make sure the next | 1110 // If align_to_end is specified, make sure the next instruction begins |
1135 // instruction begins the bundle. | 1111 // the bundle. |
1136 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0); | 1112 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0); |
1137 Helper.leaveBundleLockRegion(); | 1113 Helper.leaveBundleLockRegion(); |
1138 Retrying = false; | 1114 Retrying = false; |
1139 } else { | 1115 } else { |
1140 // This is the first pass, so roll back for the retry pass. | 1116 // This is the first pass, so roll back for the retry pass. |
1141 Helper.rollback(); | 1117 Helper.rollback(); |
1142 // Pad to the next bundle if the instruction sequence crossed | 1118 // Pad to the next bundle if the instruction sequence crossed a bundle |
1143 // a bundle boundary. | 1119 // boundary. |
1144 Helper.padToNextBundle(); | 1120 Helper.padToNextBundle(); |
1145 // Insert additional padding to make AlignToEnd work. | 1121 // Insert additional padding to make AlignToEnd work. |
1146 Helper.padForAlignToEnd(); | 1122 Helper.padForAlignToEnd(); |
1147 // Prepare for the retry pass after padding is done. | 1123 // Prepare for the retry pass after padding is done. |
1148 Retrying = true; | 1124 Retrying = true; |
1149 I = Helper.getBundleLockStart(); | 1125 I = Helper.getBundleLockStart(); |
1150 } | 1126 } |
1151 continue; | 1127 continue; |
1152 } | 1128 } |
1153 | 1129 |
(...skipping 10 matching lines...) Expand all Loading... |
1164 I->emitIAS(Func); | 1140 I->emitIAS(Func); |
1165 Helper.enterBundleUnlock(); | 1141 Helper.enterBundleUnlock(); |
1166 Helper.rollback(); | 1142 Helper.rollback(); |
1167 Helper.padToNextBundle(); | 1143 Helper.padToNextBundle(); |
1168 I->emitIAS(Func); | 1144 I->emitIAS(Func); |
1169 updateStats(Func, I); | 1145 updateStats(Func, I); |
1170 Helper.leaveBundleLockRegion(); | 1146 Helper.leaveBundleLockRegion(); |
1171 } | 1147 } |
1172 } | 1148 } |
1173 | 1149 |
1174 // Don't allow bundle locking across basic blocks, to keep the | 1150 // Don't allow bundle locking across basic blocks, to keep the backtracking |
1175 // backtracking mechanism simple. | 1151 // mechanism simple. |
1176 assert(!Helper.isInBundleLockRegion()); | 1152 assert(!Helper.isInBundleLockRegion()); |
1177 assert(!Retrying); | 1153 assert(!Retrying); |
1178 } | 1154 } |
1179 | 1155 |
1180 void CfgNode::dump(Cfg *Func) const { | 1156 void CfgNode::dump(Cfg *Func) const { |
1181 if (!BuildDefs::dump()) | 1157 if (!BuildDefs::dump()) |
1182 return; | 1158 return; |
1183 Func->setCurrentNode(this); | 1159 Func->setCurrentNode(this); |
1184 Ostream &Str = Func->getContext()->getStrDump(); | 1160 Ostream &Str = Func->getContext()->getStrDump(); |
1185 Liveness *Liveness = Func->getLiveness(); | 1161 Liveness *Liveness = Func->getLiveness(); |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1283 InstIntrinsicCall *Inst = InstIntrinsicCall::create( | 1259 InstIntrinsicCall *Inst = InstIntrinsicCall::create( |
1284 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); | 1260 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); |
1285 Inst->addArg(AtomicRMWOp); | 1261 Inst->addArg(AtomicRMWOp); |
1286 Inst->addArg(Counter); | 1262 Inst->addArg(Counter); |
1287 Inst->addArg(One); | 1263 Inst->addArg(One); |
1288 Inst->addArg(OrderAcquireRelease); | 1264 Inst->addArg(OrderAcquireRelease); |
1289 Insts.push_front(Inst); | 1265 Insts.push_front(Inst); |
1290 } | 1266 } |
1291 | 1267 |
1292 } // end of namespace Ice | 1268 } // end of namespace Ice |
OLD | NEW |