Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(517)

Side by Side Diff: src/IceCfgNode.cpp

Issue 1341423002: Reflow comments to use the full width. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fix spelling and rebase Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceCfgNode.h ('k') | src/IceClFlags.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// 1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
11 /// This file implements the CfgNode class, including the complexities 11 /// This file implements the CfgNode class, including the complexities of
12 /// of instruction insertion and in-edge calculation. 12 /// instruction insertion and in-edge calculation.
13 /// 13 ///
14 //===----------------------------------------------------------------------===// 14 //===----------------------------------------------------------------------===//
15 15
16 #include "IceCfgNode.h" 16 #include "IceCfgNode.h"
17 17
18 #include "IceAssembler.h" 18 #include "IceAssembler.h"
19 #include "IceCfg.h" 19 #include "IceCfg.h"
20 #include "IceGlobalInits.h" 20 #include "IceGlobalInits.h"
21 #include "IceInst.h" 21 #include "IceInst.h"
22 #include "IceInstVarIter.h" 22 #include "IceInstVarIter.h"
23 #include "IceLiveness.h" 23 #include "IceLiveness.h"
24 #include "IceOperand.h" 24 #include "IceOperand.h"
25 #include "IceTargetLowering.h" 25 #include "IceTargetLowering.h"
26 26
27 namespace Ice { 27 namespace Ice {
28 28
29 CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber) 29 CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber)
30 : Func(Func), Number(LabelNumber), LabelNumber(LabelNumber) {} 30 : Func(Func), Number(LabelNumber), LabelNumber(LabelNumber) {}
31 31
32 // Returns the name the node was created with. If no name was given, 32 // Returns the name the node was created with. If no name was given, it
33 // it synthesizes a (hopefully) unique name. 33 // synthesizes a (hopefully) unique name.
34 IceString CfgNode::getName() const { 34 IceString CfgNode::getName() const {
35 if (NameIndex >= 0) 35 if (NameIndex >= 0)
36 return Func->getIdentifierName(NameIndex); 36 return Func->getIdentifierName(NameIndex);
37 return "__" + std::to_string(LabelNumber); 37 return "__" + std::to_string(LabelNumber);
38 } 38 }
39 39
40 // Adds an instruction to either the Phi list or the regular 40 // Adds an instruction to either the Phi list or the regular instruction list.
41 // instruction list. Validates that all Phis are added before all 41 // Validates that all Phis are added before all regular instructions.
42 // regular instructions.
43 void CfgNode::appendInst(Inst *Inst) { 42 void CfgNode::appendInst(Inst *Inst) {
44 ++InstCountEstimate; 43 ++InstCountEstimate;
45 if (InstPhi *Phi = llvm::dyn_cast<InstPhi>(Inst)) { 44 if (InstPhi *Phi = llvm::dyn_cast<InstPhi>(Inst)) {
46 if (!Insts.empty()) { 45 if (!Insts.empty()) {
47 Func->setError("Phi instruction added to the middle of a block"); 46 Func->setError("Phi instruction added to the middle of a block");
48 return; 47 return;
49 } 48 }
50 Phis.push_back(Phi); 49 Phis.push_back(Phi);
51 } else { 50 } else {
52 Insts.push_back(Inst); 51 Insts.push_back(Inst);
53 } 52 }
54 } 53 }
55 54
56 // Renumbers the non-deleted instructions in the node. This needs to 55 // Renumbers the non-deleted instructions in the node. This needs to be done in
57 // be done in preparation for live range analysis. The instruction 56 // preparation for live range analysis. The instruction numbers in a block must
58 // numbers in a block must be monotonically increasing. The range of 57 // be monotonically increasing. The range of instruction numbers in a block,
59 // instruction numbers in a block, from lowest to highest, must not 58 // from lowest to highest, must not overlap with the range of any other block.
60 // overlap with the range of any other block.
61 void CfgNode::renumberInstructions() { 59 void CfgNode::renumberInstructions() {
62 InstNumberT FirstNumber = Func->getNextInstNumber(); 60 InstNumberT FirstNumber = Func->getNextInstNumber();
63 for (Inst &I : Phis) 61 for (Inst &I : Phis)
64 I.renumber(Func); 62 I.renumber(Func);
65 for (Inst &I : Insts) 63 for (Inst &I : Insts)
66 I.renumber(Func); 64 I.renumber(Func);
67 InstCountEstimate = Func->getNextInstNumber() - FirstNumber; 65 InstCountEstimate = Func->getNextInstNumber() - FirstNumber;
68 } 66 }
69 67
70 // When a node is created, the OutEdges are immediately known, but the 68 // When a node is created, the OutEdges are immediately known, but the InEdges
71 // InEdges have to be built up incrementally. After the CFG has been 69 // have to be built up incrementally. After the CFG has been constructed, the
72 // constructed, the computePredecessors() pass finalizes it by 70 // computePredecessors() pass finalizes it by creating the InEdges list.
73 // creating the InEdges list.
74 void CfgNode::computePredecessors() { 71 void CfgNode::computePredecessors() {
75 for (CfgNode *Succ : OutEdges) 72 for (CfgNode *Succ : OutEdges)
76 Succ->InEdges.push_back(this); 73 Succ->InEdges.push_back(this);
77 } 74 }
78 75
79 void CfgNode::computeSuccessors() { 76 void CfgNode::computeSuccessors() {
80 OutEdges = Insts.rbegin()->getTerminatorEdges(); 77 OutEdges = Insts.rbegin()->getTerminatorEdges();
81 } 78 }
82 79
83 // Validate each Phi instruction in the node with respect to control flow. For 80 // Validate each Phi instruction in the node with respect to control flow. For
84 // every phi argument, its label must appear in the predecessor list. For each 81 // every phi argument, its label must appear in the predecessor list. For each
85 // predecessor, there must be a phi argument with that label. We don't check 82 // predecessor, there must be a phi argument with that label. We don't check
86 // that phi arguments with the same label have the same value. 83 // that phi arguments with the same label have the same value.
87 void CfgNode::validatePhis() { 84 void CfgNode::validatePhis() {
88 for (Inst &Instr : Phis) { 85 for (Inst &Instr : Phis) {
89 auto *Phi = llvm::cast<InstPhi>(&Instr); 86 auto *Phi = llvm::cast<InstPhi>(&Instr);
90 // We do a simple O(N^2) algorithm to check for consistency. Even so, it 87 // We do a simple O(N^2) algorithm to check for consistency. Even so, it
91 // shows up as only about 0.2% of the total translation time. But if 88 // shows up as only about 0.2% of the total translation time. But if
92 // necessary, we could improve the complexity by using a hash table to count 89 // necessary, we could improve the complexity by using a hash table to
93 // how many times each node is referenced in the Phi instruction, and how 90 // count how many times each node is referenced in the Phi instruction, and
94 // many times each node is referenced in the incoming edge list, and compare 91 // how many times each node is referenced in the incoming edge list, and
95 // the two for equality. 92 // compare the two for equality.
96 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { 93 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
97 CfgNode *Label = Phi->getLabel(i); 94 CfgNode *Label = Phi->getLabel(i);
98 bool Found = false; 95 bool Found = false;
99 for (CfgNode *InNode : getInEdges()) { 96 for (CfgNode *InNode : getInEdges()) {
100 if (InNode == Label) { 97 if (InNode == Label) {
101 Found = true; 98 Found = true;
102 break; 99 break;
103 } 100 }
104 } 101 }
105 if (!Found) 102 if (!Found)
106 llvm::report_fatal_error("Phi error: label for bad incoming edge"); 103 llvm::report_fatal_error("Phi error: label for bad incoming edge");
107 } 104 }
108 for (CfgNode *InNode : getInEdges()) { 105 for (CfgNode *InNode : getInEdges()) {
109 bool Found = false; 106 bool Found = false;
110 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { 107 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
111 CfgNode *Label = Phi->getLabel(i); 108 CfgNode *Label = Phi->getLabel(i);
112 if (InNode == Label) { 109 if (InNode == Label) {
113 Found = true; 110 Found = true;
114 break; 111 break;
115 } 112 }
116 } 113 }
117 if (!Found) 114 if (!Found)
118 llvm::report_fatal_error("Phi error: missing label for incoming edge"); 115 llvm::report_fatal_error("Phi error: missing label for incoming edge");
119 } 116 }
120 } 117 }
121 } 118 }
122 119
123 // This does part 1 of Phi lowering, by creating a new dest variable 120 // This does part 1 of Phi lowering, by creating a new dest variable for each
124 // for each Phi instruction, replacing the Phi instruction's dest with 121 // Phi instruction, replacing the Phi instruction's dest with that variable,
125 // that variable, and adding an explicit assignment of the old dest to 122 // and adding an explicit assignment of the old dest to the new dest. For
126 // the new dest. For example, 123 // example,
127 // a=phi(...) 124 // a=phi(...)
128 // changes to 125 // changes to
129 // "a_phi=phi(...); a=a_phi". 126 // "a_phi=phi(...); a=a_phi".
130 // 127 //
131 // This is in preparation for part 2 which deletes the Phi 128 // This is in preparation for part 2 which deletes the Phi instructions and
132 // instructions and appends assignment instructions to predecessor 129 // appends assignment instructions to predecessor blocks. Note that this
133 // blocks. Note that this transformation preserves SSA form. 130 // transformation preserves SSA form.
134 void CfgNode::placePhiLoads() { 131 void CfgNode::placePhiLoads() {
135 for (Inst &I : Phis) { 132 for (Inst &I : Phis) {
136 auto Phi = llvm::dyn_cast<InstPhi>(&I); 133 auto Phi = llvm::dyn_cast<InstPhi>(&I);
137 Insts.insert(Insts.begin(), Phi->lower(Func)); 134 Insts.insert(Insts.begin(), Phi->lower(Func));
138 } 135 }
139 } 136 }
140 137
141 // This does part 2 of Phi lowering. For each Phi instruction at each 138 // This does part 2 of Phi lowering. For each Phi instruction at each out-edge,
142 // out-edge, create a corresponding assignment instruction, and add 139 // create a corresponding assignment instruction, and add all the assignments
143 // all the assignments near the end of this block. They need to be 140 // near the end of this block. They need to be added before any branch
144 // added before any branch instruction, and also if the block ends 141 // instruction, and also if the block ends with a compare instruction followed
145 // with a compare instruction followed by a branch instruction that we 142 // by a branch instruction that we may want to fuse, it's better to insert the
146 // may want to fuse, it's better to insert the new assignments before 143 // new assignments before the compare instruction. The
147 // the compare instruction. The tryOptimizedCmpxchgCmpBr() method 144 // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions.
148 // assumes this ordering of instructions.
149 // 145 //
150 // Note that this transformation takes the Phi dest variables out of 146 // Note that this transformation takes the Phi dest variables out of SSA form,
151 // SSA form, as there may be assignments to the dest variable in 147 // as there may be assignments to the dest variable in multiple blocks.
152 // multiple blocks.
153 void CfgNode::placePhiStores() { 148 void CfgNode::placePhiStores() {
154 // Find the insertion point. 149 // Find the insertion point.
155 InstList::iterator InsertionPoint = Insts.end(); 150 InstList::iterator InsertionPoint = Insts.end();
156 // Every block must end in a terminator instruction, and therefore 151 // Every block must end in a terminator instruction, and therefore must have
157 // must have at least one instruction, so it's valid to decrement 152 // at least one instruction, so it's valid to decrement InsertionPoint (but
158 // InsertionPoint (but assert just in case). 153 // assert just in case).
159 assert(InsertionPoint != Insts.begin()); 154 assert(InsertionPoint != Insts.begin());
160 --InsertionPoint; 155 --InsertionPoint;
161 // Confirm that InsertionPoint is a terminator instruction. Calling 156 // Confirm that InsertionPoint is a terminator instruction. Calling
162 // getTerminatorEdges() on a non-terminator instruction will cause 157 // getTerminatorEdges() on a non-terminator instruction will cause an
163 // an llvm_unreachable(). 158 // llvm_unreachable().
164 (void)InsertionPoint->getTerminatorEdges(); 159 (void)InsertionPoint->getTerminatorEdges();
165 // SafeInsertionPoint is always immediately before the terminator 160 // SafeInsertionPoint is always immediately before the terminator
166 // instruction. If the block ends in a compare and conditional 161 // instruction. If the block ends in a compare and conditional branch, it's
167 // branch, it's better to place the Phi store before the compare so 162 // better to place the Phi store before the compare so as not to interfere
168 // as not to interfere with compare/branch fusing. However, if the 163 // with compare/branch fusing. However, if the compare instruction's dest
169 // compare instruction's dest operand is the same as the new 164 // operand is the same as the new assignment statement's source operand, this
170 // assignment statement's source operand, this can't be done due to 165 // can't be done due to data dependences, so we need to fall back to the
171 // data dependences, so we need to fall back to the 166 // SafeInsertionPoint. To illustrate:
172 // SafeInsertionPoint. To illustrate:
173 // ; <label>:95 167 // ; <label>:95
174 // %97 = load i8* %96, align 1 168 // %97 = load i8* %96, align 1
175 // %98 = icmp ne i8 %97, 0 169 // %98 = icmp ne i8 %97, 0
176 // br i1 %98, label %99, label %2132 170 // br i1 %98, label %99, label %2132
177 // ; <label>:99 171 // ; <label>:99
178 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ] 172 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ]
179 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ] 173 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ]
180 // would be Phi-lowered as: 174 // would be Phi-lowered as:
181 // ; <label>:95 175 // ; <label>:95
182 // %97 = load i8* %96, align 1 176 // %97 = load i8* %96, align 1
183 // %100_phi = %97 ; can be at InsertionPoint 177 // %100_phi = %97 ; can be at InsertionPoint
184 // %98 = icmp ne i8 %97, 0 178 // %98 = icmp ne i8 %97, 0
185 // %101_phi = %98 ; must be at SafeInsertionPoint 179 // %101_phi = %98 ; must be at SafeInsertionPoint
186 // br i1 %98, label %99, label %2132 180 // br i1 %98, label %99, label %2132
187 // ; <label>:99 181 // ; <label>:99
188 // %100 = %100_phi 182 // %100 = %100_phi
189 // %101 = %101_phi 183 // %101 = %101_phi
190 // 184 //
191 // TODO(stichnot): It may be possible to bypass this whole 185 // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint
192 // SafeInsertionPoint mechanism. If a source basic block ends in a 186 // mechanism. If a source basic block ends in a conditional branch:
193 // conditional branch:
194 // labelSource: 187 // labelSource:
195 // ... 188 // ...
196 // br i1 %foo, label %labelTrue, label %labelFalse 189 // br i1 %foo, label %labelTrue, label %labelFalse
197 // and a branch target has a Phi involving the branch operand: 190 // and a branch target has a Phi involving the branch operand:
198 // labelTrue: 191 // labelTrue:
199 // %bar = phi i1 [ %foo, %labelSource ], ... 192 // %bar = phi i1 [ %foo, %labelSource ], ...
200 // then we actually know the constant i1 value of the Phi operand: 193 // then we actually know the constant i1 value of the Phi operand:
201 // labelTrue: 194 // labelTrue:
202 // %bar = phi i1 [ true, %labelSource ], ... 195 // %bar = phi i1 [ true, %labelSource ], ...
203 // It seems that this optimization should be done by clang or opt, 196 // It seems that this optimization should be done by clang or opt, but we
204 // but we could also do it here. 197 // could also do it here.
205 InstList::iterator SafeInsertionPoint = InsertionPoint; 198 InstList::iterator SafeInsertionPoint = InsertionPoint;
206 // Keep track of the dest variable of a compare instruction, so that 199 // Keep track of the dest variable of a compare instruction, so that we
207 // we insert the new instruction at the SafeInsertionPoint if the 200 // insert the new instruction at the SafeInsertionPoint if the compare's dest
208 // compare's dest matches the Phi-lowered assignment's source. 201 // matches the Phi-lowered assignment's source.
209 Variable *CmpInstDest = nullptr; 202 Variable *CmpInstDest = nullptr;
210 // If the current insertion point is at a conditional branch 203 // If the current insertion point is at a conditional branch instruction, and
211 // instruction, and the previous instruction is a compare 204 // the previous instruction is a compare instruction, then we move the
212 // instruction, then we move the insertion point before the compare 205 // insertion point before the compare instruction so as not to interfere with
213 // instruction so as not to interfere with compare/branch fusing. 206 // compare/branch fusing.
214 if (InstBr *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) { 207 if (InstBr *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) {
215 if (!Branch->isUnconditional()) { 208 if (!Branch->isUnconditional()) {
216 if (InsertionPoint != Insts.begin()) { 209 if (InsertionPoint != Insts.begin()) {
217 --InsertionPoint; 210 --InsertionPoint;
218 if (llvm::isa<InstIcmp>(InsertionPoint) || 211 if (llvm::isa<InstIcmp>(InsertionPoint) ||
219 llvm::isa<InstFcmp>(InsertionPoint)) { 212 llvm::isa<InstFcmp>(InsertionPoint)) {
220 CmpInstDest = InsertionPoint->getDest(); 213 CmpInstDest = InsertionPoint->getDest();
221 } else { 214 } else {
222 ++InsertionPoint; 215 ++InsertionPoint;
223 } 216 }
(...skipping 18 matching lines...) Expand all
242 } 235 }
243 } 236 }
244 } 237 }
245 238
246 // Deletes the phi instructions after the loads and stores are placed. 239 // Deletes the phi instructions after the loads and stores are placed.
247 void CfgNode::deletePhis() { 240 void CfgNode::deletePhis() {
248 for (Inst &I : Phis) 241 for (Inst &I : Phis)
249 I.setDeleted(); 242 I.setDeleted();
250 } 243 }
251 244
252 // Splits the edge from Pred to this node by creating a new node and 245 // Splits the edge from Pred to this node by creating a new node and hooking up
253 // hooking up the in and out edges appropriately. (The EdgeIndex 246 // the in and out edges appropriately. (The EdgeIndex parameter is only used to
254 // parameter is only used to make the new node's name unique when 247 // make the new node's name unique when there are multiple edges between the
255 // there are multiple edges between the same pair of nodes.) The new 248 // same pair of nodes.) The new node's instruction list is initialized to the
256 // node's instruction list is initialized to the empty list, with no 249 // empty list, with no terminator instruction. There must not be multiple edges
257 // terminator instruction. There must not be multiple edges from Pred 250 // from Pred to this node so all Inst::getTerminatorEdges implementations must
258 // to this node so all Inst::getTerminatorEdges implementations must
259 // not contain duplicates. 251 // not contain duplicates.
260 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) { 252 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
261 CfgNode *NewNode = Func->makeNode(); 253 CfgNode *NewNode = Func->makeNode();
262 // Depth is the minimum as it works if both are the same, but if one is 254 // Depth is the minimum as it works if both are the same, but if one is
263 // outside the loop and the other is inside, the new node should be placed 255 // outside the loop and the other is inside, the new node should be placed
264 // outside and not be executed multiple times within the loop. 256 // outside and not be executed multiple times within the loop.
265 NewNode->setLoopNestDepth( 257 NewNode->setLoopNestDepth(
266 std::min(getLoopNestDepth(), Pred->getLoopNestDepth())); 258 std::min(getLoopNestDepth(), Pred->getLoopNestDepth()));
267 if (BuildDefs::dump()) 259 if (BuildDefs::dump())
268 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" + 260 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" +
269 std::to_string(EdgeIndex)); 261 std::to_string(EdgeIndex));
270 // The new node is added to the end of the node list, and will later 262 // The new node is added to the end of the node list, and will later need to
271 // need to be sorted into a reasonable topological order. 263 // be sorted into a reasonable topological order.
272 NewNode->setNeedsPlacement(true); 264 NewNode->setNeedsPlacement(true);
273 // Repoint Pred's out-edge. 265 // Repoint Pred's out-edge.
274 bool Found = false; 266 bool Found = false;
275 for (CfgNode *&I : Pred->OutEdges) { 267 for (CfgNode *&I : Pred->OutEdges) {
276 if (I == this) { 268 if (I == this) {
277 I = NewNode; 269 I = NewNode;
278 NewNode->InEdges.push_back(Pred); 270 NewNode->InEdges.push_back(Pred);
279 Found = true; 271 Found = true;
280 break; 272 break;
281 } 273 }
(...skipping 30 matching lines...) Expand all
312 return true; 304 return true;
313 if (const auto Var2 = llvm::dyn_cast<Variable>(Opnd)) { 305 if (const auto Var2 = llvm::dyn_cast<Variable>(Opnd)) {
314 if (Var->hasReg() && Var->getRegNum() == Var2->getRegNum()) 306 if (Var->hasReg() && Var->getRegNum() == Var2->getRegNum())
315 return true; 307 return true;
316 } 308 }
317 return false; 309 return false;
318 } 310 }
319 311
320 } // end of anonymous namespace 312 } // end of anonymous namespace
321 313
322 // This the "advanced" version of Phi lowering for a basic block, in contrast to 314 // This the "advanced" version of Phi lowering for a basic block, in contrast
323 // the simple version that lowers through assignments involving temporaries. 315 // to the simple version that lowers through assignments involving temporaries.
324 // 316 //
325 // All Phi instructions in a basic block are conceptually executed in parallel. 317 // All Phi instructions in a basic block are conceptually executed in parallel.
326 // However, if we lower Phis early and commit to a sequential ordering, we may 318 // However, if we lower Phis early and commit to a sequential ordering, we may
327 // end up creating unnecessary interferences which lead to worse register 319 // end up creating unnecessary interferences which lead to worse register
328 // allocation. Delaying Phi scheduling until after register allocation can help 320 // allocation. Delaying Phi scheduling until after register allocation can help
329 // unless there are no free registers for shuffling registers or stack slots and 321 // unless there are no free registers for shuffling registers or stack slots
330 // spilling becomes necessary. 322 // and spilling becomes necessary.
331 // 323 //
332 // The advanced Phi lowering starts by finding a topological sort of the Phi 324 // The advanced Phi lowering starts by finding a topological sort of the Phi
333 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on B. 325 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on
334 // Preexisting register assignments are considered in the topological sort. If 326 // B. Preexisting register assignments are considered in the topological sort.
335 // a topological sort is not possible due to a cycle, the cycle is broken by 327 // If a topological sort is not possible due to a cycle, the cycle is broken by
336 // introducing a non-parallel temporary. For example, a cycle arising from a 328 // introducing a non-parallel temporary. For example, a cycle arising from a
337 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being 329 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being
338 // equal, prefer to schedule assignments with register-allocated Src operands 330 // equal, prefer to schedule assignments with register-allocated Src operands
339 // earlier, in case that register becomes free afterwards, and prefer to 331 // earlier, in case that register becomes free afterwards, and prefer to
340 // schedule assignments with register-allocated Dest variables later, to keep 332 // schedule assignments with register-allocated Dest variables later, to keep
341 // that register free for longer. 333 // that register free for longer.
342 // 334 //
343 // Once the ordering is determined, the Cfg edge is split and the assignment 335 // Once the ordering is determined, the Cfg edge is split and the assignment
344 // list is lowered by the target lowering layer. Since the assignment lowering 336 // list is lowered by the target lowering layer. Since the assignment lowering
345 // may create new infinite-weight temporaries, a follow-on register allocation 337 // may create new infinite-weight temporaries, a follow-on register allocation
346 // pass will be needed. To prepare for this, liveness (including live range 338 // pass will be needed. To prepare for this, liveness (including live range
347 // calculation) of the split nodes needs to be calculated, and liveness of the 339 // calculation) of the split nodes needs to be calculated, and liveness of the
348 // original node need to be updated to "undo" the effects of the phi 340 // original node need to be updated to "undo" the effects of the phi
349 // assignments. 341 // assignments.
350 342
351 // The specific placement of the new node within the Cfg node list is deferred 343 // The specific placement of the new node within the Cfg node list is deferred
352 // until later, including after empty node contraction. 344 // until later, including after empty node contraction.
353 // 345 //
354 // After phi assignments are lowered across all blocks, another register 346 // After phi assignments are lowered across all blocks, another register
355 // allocation pass is run, focusing only on pre-colored and infinite-weight 347 // allocation pass is run, focusing only on pre-colored and infinite-weight
356 // variables, similar to Om1 register allocation (except without the need to 348 // variables, similar to Om1 register allocation (except without the need to
357 // specially compute these variables' live ranges, since they have already been 349 // specially compute these variables' live ranges, since they have already been
358 // precisely calculated). The register allocator in this mode needs the ability 350 // precisely calculated). The register allocator in this mode needs the ability
359 // to forcibly spill and reload registers in case none are naturally available. 351 // to forcibly spill and reload registers in case none are naturally available.
360 void CfgNode::advancedPhiLowering() { 352 void CfgNode::advancedPhiLowering() {
361 if (getPhis().empty()) 353 if (getPhis().empty())
362 return; 354 return;
363 355
364 // Count the number of non-deleted Phi instructions. 356 // Count the number of non-deleted Phi instructions.
365 struct PhiDesc { 357 struct PhiDesc {
366 InstPhi *Phi; 358 InstPhi *Phi;
367 Variable *Dest; 359 Variable *Dest;
368 Operand *Src; 360 Operand *Src;
(...skipping 27 matching lines...) Expand all
396 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); 388 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++);
397 SizeT Remaining = NumPhis; 389 SizeT Remaining = NumPhis;
398 390
399 // First pass computes Src and initializes NumPred. 391 // First pass computes Src and initializes NumPred.
400 for (size_t I = 0; I < NumPhis; ++I) { 392 for (size_t I = 0; I < NumPhis; ++I) {
401 Variable *Dest = Desc[I].Dest; 393 Variable *Dest = Desc[I].Dest;
402 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred); 394 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred);
403 Desc[I].Src = Src; 395 Desc[I].Src = Src;
404 Desc[I].Processed = false; 396 Desc[I].Processed = false;
405 Desc[I].NumPred = 0; 397 Desc[I].NumPred = 0;
406 // Cherry-pick any trivial assignments, so that they don't 398 // Cherry-pick any trivial assignments, so that they don't contribute to
407 // contribute to the running complexity of the topological sort. 399 // the running complexity of the topological sort.
408 if (sameVarOrReg(Dest, Src)) { 400 if (sameVarOrReg(Dest, Src)) {
409 Desc[I].Processed = true; 401 Desc[I].Processed = true;
410 --Remaining; 402 --Remaining;
411 if (Dest != Src) 403 if (Dest != Src)
412 // If Dest and Src are syntactically the same, don't bother 404 // If Dest and Src are syntactically the same, don't bother adding
413 // adding the assignment, because in all respects it would 405 // the assignment, because in all respects it would be redundant, and
414 // be redundant, and if Dest/Src are on the stack, the 406 // if Dest/Src are on the stack, the target lowering may naively
415 // target lowering may naively decide to lower it using a 407 // decide to lower it using a temporary register.
416 // temporary register.
417 Split->appendInst(InstAssign::create(Func, Dest, Src)); 408 Split->appendInst(InstAssign::create(Func, Dest, Src));
418 } 409 }
419 } 410 }
420 // Second pass computes NumPred by comparing every pair of Phi 411 // Second pass computes NumPred by comparing every pair of Phi
421 // instructions. 412 // instructions.
422 for (size_t I = 0; I < NumPhis; ++I) { 413 for (size_t I = 0; I < NumPhis; ++I) {
423 if (Desc[I].Processed) 414 if (Desc[I].Processed)
424 continue; 415 continue;
425 const Variable *Dest = Desc[I].Dest; 416 const Variable *Dest = Desc[I].Dest;
426 for (size_t J = 0; J < NumPhis; ++J) { 417 for (size_t J = 0; J < NumPhis; ++J) {
427 if (Desc[J].Processed) 418 if (Desc[J].Processed)
428 continue; 419 continue;
429 if (I != J) { 420 if (I != J) {
430 // There shouldn't be two Phis with the same Dest variable 421 // There shouldn't be two Phis with the same Dest variable or
431 // or register. 422 // register.
432 assert(!sameVarOrReg(Dest, Desc[J].Dest)); 423 assert(!sameVarOrReg(Dest, Desc[J].Dest));
433 } 424 }
434 const Operand *Src = Desc[J].Src; 425 const Operand *Src = Desc[J].Src;
435 if (sameVarOrReg(Dest, Src)) 426 if (sameVarOrReg(Dest, Src))
436 ++Desc[I].NumPred; 427 ++Desc[I].NumPred;
437 } 428 }
438 } 429 }
439 430
440 // Another pass to compute initial Weight values. 431 // Another pass to compute initial Weight values.
441 432
442 // Always pick NumPred=0 over NumPred>0. 433 // Always pick NumPred=0 over NumPred>0.
443 constexpr int32_t WeightNoPreds = 4; 434 constexpr int32_t WeightNoPreds = 4;
444 // Prefer Src as a register because the register might free up. 435 // Prefer Src as a register because the register might free up.
445 constexpr int32_t WeightSrcIsReg = 2; 436 constexpr int32_t WeightSrcIsReg = 2;
446 // Prefer Dest not as a register because the register stays free 437 // Prefer Dest not as a register because the register stays free longer.
447 // longer.
448 constexpr int32_t WeightDestNotReg = 1; 438 constexpr int32_t WeightDestNotReg = 1;
449 439
450 for (size_t I = 0; I < NumPhis; ++I) { 440 for (size_t I = 0; I < NumPhis; ++I) {
451 if (Desc[I].Processed) 441 if (Desc[I].Processed)
452 continue; 442 continue;
453 int32_t Weight = 0; 443 int32_t Weight = 0;
454 if (Desc[I].NumPred == 0) 444 if (Desc[I].NumPred == 0)
455 Weight += WeightNoPreds; 445 Weight += WeightNoPreds;
456 if (auto Var = llvm::dyn_cast<Variable>(Desc[I].Src)) 446 if (auto Var = llvm::dyn_cast<Variable>(Desc[I].Src))
457 if (Var->hasReg()) 447 if (Var->hasReg())
458 Weight += WeightSrcIsReg; 448 Weight += WeightSrcIsReg;
459 if (!Desc[I].Dest->hasReg()) 449 if (!Desc[I].Dest->hasReg())
460 Weight += WeightDestNotReg; 450 Weight += WeightDestNotReg;
461 Desc[I].Weight = Weight; 451 Desc[I].Weight = Weight;
462 } 452 }
463 453
464 // Repeatedly choose and process the best candidate in the 454 // Repeatedly choose and process the best candidate in the topological
465 // topological sort, until no candidates remain. This 455 // sort, until no candidates remain. This implementation is O(N^2) where N
466 // implementation is O(N^2) where N is the number of Phi 456 // is the number of Phi instructions, but with a small constant factor
467 // instructions, but with a small constant factor compared to a 457 // compared to a likely implementation of O(N) topological sort.
468 // likely implementation of O(N) topological sort.
469 for (; Remaining; --Remaining) { 458 for (; Remaining; --Remaining) {
470 size_t BestIndex = 0; 459 size_t BestIndex = 0;
471 int32_t BestWeight = -1; 460 int32_t BestWeight = -1;
472 // Find the best candidate. 461 // Find the best candidate.
473 for (size_t I = 0; I < NumPhis; ++I) { 462 for (size_t I = 0; I < NumPhis; ++I) {
474 if (Desc[I].Processed) 463 if (Desc[I].Processed)
475 continue; 464 continue;
476 int32_t Weight = 0; 465 int32_t Weight = 0;
477 Weight = Desc[I].Weight; 466 Weight = Desc[I].Weight;
478 if (Weight > BestWeight) { 467 if (Weight > BestWeight) {
479 BestIndex = I; 468 BestIndex = I;
480 BestWeight = Weight; 469 BestWeight = Weight;
481 } 470 }
482 } 471 }
483 assert(BestWeight >= 0); 472 assert(BestWeight >= 0);
484 assert(Desc[BestIndex].NumPred <= 1); 473 assert(Desc[BestIndex].NumPred <= 1);
485 Variable *Dest = Desc[BestIndex].Dest; 474 Variable *Dest = Desc[BestIndex].Dest;
486 Operand *Src = Desc[BestIndex].Src; 475 Operand *Src = Desc[BestIndex].Src;
487 assert(!sameVarOrReg(Dest, Src)); 476 assert(!sameVarOrReg(Dest, Src));
488 // Break a cycle by introducing a temporary. 477 // Break a cycle by introducing a temporary.
489 if (Desc[BestIndex].NumPred) { 478 if (Desc[BestIndex].NumPred) {
490 bool Found = false; 479 bool Found = false;
491 // If the target instruction "A=B" is part of a cycle, find 480 // If the target instruction "A=B" is part of a cycle, find the "X=A"
492 // the "X=A" assignment in the cycle because it will have to 481 // assignment in the cycle because it will have to be rewritten as
493 // be rewritten as "X=tmp". 482 // "X=tmp".
494 for (size_t J = 0; !Found && J < NumPhis; ++J) { 483 for (size_t J = 0; !Found && J < NumPhis; ++J) {
495 if (Desc[J].Processed) 484 if (Desc[J].Processed)
496 continue; 485 continue;
497 Operand *OtherSrc = Desc[J].Src; 486 Operand *OtherSrc = Desc[J].Src;
498 if (Desc[J].NumPred && sameVarOrReg(Dest, OtherSrc)) { 487 if (Desc[J].NumPred && sameVarOrReg(Dest, OtherSrc)) {
499 SizeT VarNum = Func->getNumVariables(); 488 SizeT VarNum = Func->getNumVariables();
500 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); 489 Variable *Tmp = Func->makeVariable(OtherSrc->getType());
501 if (BuildDefs::dump()) 490 if (BuildDefs::dump())
502 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); 491 Tmp->setName(Func, "__split_" + std::to_string(VarNum));
503 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); 492 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc));
504 Desc[J].Src = Tmp; 493 Desc[J].Src = Tmp;
505 Found = true; 494 Found = true;
506 } 495 }
507 } 496 }
508 assert(Found); 497 assert(Found);
509 } 498 }
510 // Now that a cycle (if any) has been broken, create the actual 499 // Now that a cycle (if any) has been broken, create the actual
511 // assignment. 500 // assignment.
512 Split->appendInst(InstAssign::create(Func, Dest, Src)); 501 Split->appendInst(InstAssign::create(Func, Dest, Src));
513 // Update NumPred for all Phi assignments using this Phi's Src 502 // Update NumPred for all Phi assignments using this Phi's Src as their
514 // as their Dest variable. Also update Weight if NumPred 503 // Dest variable. Also update Weight if NumPred dropped from 1 to 0.
515 // dropped from 1 to 0.
516 if (auto Var = llvm::dyn_cast<Variable>(Src)) { 504 if (auto Var = llvm::dyn_cast<Variable>(Src)) {
517 for (size_t I = 0; I < NumPhis; ++I) { 505 for (size_t I = 0; I < NumPhis; ++I) {
518 if (Desc[I].Processed) 506 if (Desc[I].Processed)
519 continue; 507 continue;
520 if (sameVarOrReg(Var, Desc[I].Dest)) { 508 if (sameVarOrReg(Var, Desc[I].Dest)) {
521 if (--Desc[I].NumPred == 0) 509 if (--Desc[I].NumPred == 0)
522 Desc[I].Weight += WeightNoPreds; 510 Desc[I].Weight += WeightNoPreds;
523 } 511 }
524 } 512 }
525 } 513 }
526 Desc[BestIndex].Processed = true; 514 Desc[BestIndex].Processed = true;
527 } 515 }
528 Split->appendInst(InstBr::create(Func, this)); 516 Split->appendInst(InstBr::create(Func, this));
529 517
530 Split->genCode(); 518 Split->genCode();
531 Func->getVMetadata()->addNode(Split); 519 Func->getVMetadata()->addNode(Split);
532 } 520 }
533 } 521 }
534 522
535 // Does address mode optimization. Pass each instruction to the 523 // Does address mode optimization. Pass each instruction to the TargetLowering
536 // TargetLowering object. If it returns a new instruction 524 // object. If it returns a new instruction (representing the optimized address
537 // (representing the optimized address mode), then insert the new 525 // mode), then insert the new instruction and delete the old.
538 // instruction and delete the old.
539 void CfgNode::doAddressOpt() { 526 void CfgNode::doAddressOpt() {
540 TargetLowering *Target = Func->getTarget(); 527 TargetLowering *Target = Func->getTarget();
541 LoweringContext &Context = Target->getContext(); 528 LoweringContext &Context = Target->getContext();
542 Context.init(this); 529 Context.init(this);
543 while (!Context.atEnd()) { 530 while (!Context.atEnd()) {
544 Target->doAddressOpt(); 531 Target->doAddressOpt();
545 } 532 }
546 } 533 }
547 534
548 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) { 535 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) {
(...skipping 11 matching lines...) Expand all
560 } 547 }
561 if (!PauseNopInsertion) 548 if (!PauseNopInsertion)
562 Target->doNopInsertion(RNG); 549 Target->doNopInsertion(RNG);
563 // Ensure Cur=Next, so that the nops are inserted before the current 550 // Ensure Cur=Next, so that the nops are inserted before the current
564 // instruction rather than after. 551 // instruction rather than after.
565 Context.advanceCur(); 552 Context.advanceCur();
566 Context.advanceNext(); 553 Context.advanceNext();
567 } 554 }
568 } 555 }
569 556
570 // Drives the target lowering. Passes the current instruction and the 557 // Drives the target lowering. Passes the current instruction and the next
571 // next non-deleted instruction for target lowering. 558 // non-deleted instruction for target lowering.
572 void CfgNode::genCode() { 559 void CfgNode::genCode() {
573 TargetLowering *Target = Func->getTarget(); 560 TargetLowering *Target = Func->getTarget();
574 LoweringContext &Context = Target->getContext(); 561 LoweringContext &Context = Target->getContext();
575 // Lower the regular instructions. 562 // Lower the regular instructions.
576 Context.init(this); 563 Context.init(this);
577 Target->initNodeForLowering(this); 564 Target->initNodeForLowering(this);
578 while (!Context.atEnd()) { 565 while (!Context.atEnd()) {
579 InstList::iterator Orig = Context.getCur(); 566 InstList::iterator Orig = Context.getCur();
580 if (llvm::isa<InstRet>(*Orig)) 567 if (llvm::isa<InstRet>(*Orig))
581 setHasReturn(); 568 setHasReturn();
(...skipping 14 matching lines...) Expand all
596 continue; 583 continue;
597 I.livenessLightweight(Func, Live); 584 I.livenessLightweight(Func, Live);
598 } 585 }
599 for (Inst &I : Phis) { 586 for (Inst &I : Phis) {
600 if (I.isDeleted()) 587 if (I.isDeleted())
601 continue; 588 continue;
602 I.livenessLightweight(Func, Live); 589 I.livenessLightweight(Func, Live);
603 } 590 }
604 } 591 }
605 592
606 // Performs liveness analysis on the block. Returns true if the 593 // Performs liveness analysis on the block. Returns true if the incoming
607 // incoming liveness changed from before, false if it stayed the same. 594 // liveness changed from before, false if it stayed the same. (If it changes,
608 // (If it changes, the node's predecessors need to be processed 595 // the node's predecessors need to be processed again.)
609 // again.)
610 bool CfgNode::liveness(Liveness *Liveness) { 596 bool CfgNode::liveness(Liveness *Liveness) {
611 SizeT NumVars = Liveness->getNumVarsInNode(this); 597 SizeT NumVars = Liveness->getNumVarsInNode(this);
612 LivenessBV Live(NumVars); 598 LivenessBV Live(NumVars);
613 LiveBeginEndMap *LiveBegin = nullptr; 599 LiveBeginEndMap *LiveBegin = nullptr;
614 LiveBeginEndMap *LiveEnd = nullptr; 600 LiveBeginEndMap *LiveEnd = nullptr;
615 // Mark the beginning and ending of each variable's live range 601 // Mark the beginning and ending of each variable's live range with the
616 // with the sentinel instruction number 0. 602 // sentinel instruction number 0.
617 if (Liveness->getMode() == Liveness_Intervals) { 603 if (Liveness->getMode() == Liveness_Intervals) {
618 LiveBegin = Liveness->getLiveBegin(this); 604 LiveBegin = Liveness->getLiveBegin(this);
619 LiveEnd = Liveness->getLiveEnd(this); 605 LiveEnd = Liveness->getLiveEnd(this);
620 LiveBegin->clear(); 606 LiveBegin->clear();
621 LiveEnd->clear(); 607 LiveEnd->clear();
622 // Guess that the number of live ranges beginning is roughly the 608 // Guess that the number of live ranges beginning is roughly the number of
623 // number of instructions, and same for live ranges ending. 609 // instructions, and same for live ranges ending.
624 LiveBegin->reserve(getInstCountEstimate()); 610 LiveBegin->reserve(getInstCountEstimate());
625 LiveEnd->reserve(getInstCountEstimate()); 611 LiveEnd->reserve(getInstCountEstimate());
626 } 612 }
627 // Initialize Live to be the union of all successors' LiveIn. 613 // Initialize Live to be the union of all successors' LiveIn.
628 for (CfgNode *Succ : OutEdges) { 614 for (CfgNode *Succ : OutEdges) {
629 Live |= Liveness->getLiveIn(Succ); 615 Live |= Liveness->getLiveIn(Succ);
630 // Mark corresponding argument of phis in successor as live. 616 // Mark corresponding argument of phis in successor as live.
631 for (Inst &I : Succ->Phis) { 617 for (Inst &I : Succ->Phis) {
632 if (I.isDeleted()) 618 if (I.isDeleted())
633 continue; 619 continue;
634 auto Phi = llvm::dyn_cast<InstPhi>(&I); 620 auto Phi = llvm::dyn_cast<InstPhi>(&I);
635 Phi->livenessPhiOperand(Live, this, Liveness); 621 Phi->livenessPhiOperand(Live, this, Liveness);
636 } 622 }
637 } 623 }
638 Liveness->getLiveOut(this) = Live; 624 Liveness->getLiveOut(this) = Live;
639 625
640 // Process regular instructions in reverse order. 626 // Process regular instructions in reverse order.
641 for (Inst &I : reverse_range(Insts)) { 627 for (Inst &I : reverse_range(Insts)) {
642 if (I.isDeleted()) 628 if (I.isDeleted())
643 continue; 629 continue;
644 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd); 630 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd);
645 } 631 }
646 // Process phis in forward order so that we can override the 632 // Process phis in forward order so that we can override the instruction
647 // instruction number to be that of the earliest phi instruction in 633 // number to be that of the earliest phi instruction in the block.
648 // the block.
649 SizeT NumNonDeadPhis = 0; 634 SizeT NumNonDeadPhis = 0;
650 InstNumberT FirstPhiNumber = Inst::NumberSentinel; 635 InstNumberT FirstPhiNumber = Inst::NumberSentinel;
651 for (Inst &I : Phis) { 636 for (Inst &I : Phis) {
652 if (I.isDeleted()) 637 if (I.isDeleted())
653 continue; 638 continue;
654 if (FirstPhiNumber == Inst::NumberSentinel) 639 if (FirstPhiNumber == Inst::NumberSentinel)
655 FirstPhiNumber = I.getNumber(); 640 FirstPhiNumber = I.getNumber();
656 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd)) 641 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd))
657 ++NumNonDeadPhis; 642 ++NumNonDeadPhis;
658 } 643 }
659 644
660 // When using the sparse representation, after traversing the 645 // When using the sparse representation, after traversing the instructions in
661 // instructions in the block, the Live bitvector should only contain 646 // the block, the Live bitvector should only contain set bits for global
662 // set bits for global variables upon block entry. We validate this 647 // variables upon block entry. We validate this by shrinking the Live vector
663 // by shrinking the Live vector and then testing it against the 648 // and then testing it against the pre-shrunk version. (The shrinking is
664 // pre-shrunk version. (The shrinking is required, but the 649 // required, but the validation is not.)
665 // validation is not.)
666 LivenessBV LiveOrig = Live; 650 LivenessBV LiveOrig = Live;
667 Live.resize(Liveness->getNumGlobalVars()); 651 Live.resize(Liveness->getNumGlobalVars());
668 if (Live != LiveOrig) { 652 if (Live != LiveOrig) {
669 if (BuildDefs::dump()) { 653 if (BuildDefs::dump()) {
670 // This is a fatal liveness consistency error. Print some 654 // This is a fatal liveness consistency error. Print some diagnostics and
671 // diagnostics and abort. 655 // abort.
672 Ostream &Str = Func->getContext()->getStrDump(); 656 Ostream &Str = Func->getContext()->getStrDump();
673 Func->resetCurrentNode(); 657 Func->resetCurrentNode();
674 Str << "LiveOrig-Live ="; 658 Str << "LiveOrig-Live =";
675 for (SizeT i = Live.size(); i < LiveOrig.size(); ++i) { 659 for (SizeT i = Live.size(); i < LiveOrig.size(); ++i) {
676 if (LiveOrig.test(i)) { 660 if (LiveOrig.test(i)) {
677 Str << " "; 661 Str << " ";
678 Liveness->getVariable(i, this)->dump(Func); 662 Liveness->getVariable(i, this)->dump(Func);
679 } 663 }
680 } 664 }
681 Str << "\n"; 665 Str << "\n";
682 } 666 }
683 llvm::report_fatal_error("Fatal inconsistency in liveness analysis"); 667 llvm::report_fatal_error("Fatal inconsistency in liveness analysis");
684 } 668 }
685 669
686 bool Changed = false; 670 bool Changed = false;
687 LivenessBV &LiveIn = Liveness->getLiveIn(this); 671 LivenessBV &LiveIn = Liveness->getLiveIn(this);
688 // Add in current LiveIn 672 // Add in current LiveIn
689 Live |= LiveIn; 673 Live |= LiveIn;
690 // Check result, set LiveIn=Live 674 // Check result, set LiveIn=Live
691 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this); 675 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this);
692 bool LiveInChanged = (Live != LiveIn); 676 bool LiveInChanged = (Live != LiveIn);
693 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged); 677 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged);
694 if (LiveInChanged) 678 if (LiveInChanged)
695 LiveIn = Live; 679 LiveIn = Live;
696 PrevNumNonDeadPhis = NumNonDeadPhis; 680 PrevNumNonDeadPhis = NumNonDeadPhis;
697 return Changed; 681 return Changed;
698 } 682 }
699 683
700 // Once basic liveness is complete, compute actual live ranges. It is 684 // Once basic liveness is complete, compute actual live ranges. It is assumed
701 // assumed that within a single basic block, a live range begins at 685 // that within a single basic block, a live range begins at most once and ends
702 // most once and ends at most once. This is certainly true for pure 686 // at most once. This is certainly true for pure SSA form. It is also true once
703 // SSA form. It is also true once phis are lowered, since each 687 // phis are lowered, since each assignment to the phi-based temporary is in a
704 // assignment to the phi-based temporary is in a different basic 688 // different basic block, and there is a single read that ends the live in the
705 // block, and there is a single read that ends the live in the basic 689 // basic block that contained the actual phi instruction.
706 // block that contained the actual phi instruction.
707 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum, 690 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum,
708 InstNumberT LastInstNum) { 691 InstNumberT LastInstNum) {
709 TimerMarker T1(TimerStack::TT_liveRange, Func); 692 TimerMarker T1(TimerStack::TT_liveRange, Func);
710 693
711 SizeT NumVars = Liveness->getNumVarsInNode(this); 694 SizeT NumVars = Liveness->getNumVarsInNode(this);
712 LivenessBV &LiveIn = Liveness->getLiveIn(this); 695 LivenessBV &LiveIn = Liveness->getLiveIn(this);
713 LivenessBV &LiveOut = Liveness->getLiveOut(this); 696 LivenessBV &LiveOut = Liveness->getLiveOut(this);
714 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this); 697 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
715 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this); 698 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
716 std::sort(MapBegin.begin(), MapBegin.end()); 699 std::sort(MapBegin.begin(), MapBegin.end());
(...skipping 12 matching lines...) Expand all
729 LivenessBV LiveInAndOut = LiveIn; 712 LivenessBV LiveInAndOut = LiveIn;
730 LiveInAndOut &= LiveOut; 713 LiveInAndOut &= LiveOut;
731 714
732 // Iterate in parallel across the sorted MapBegin[] and MapEnd[]. 715 // Iterate in parallel across the sorted MapBegin[] and MapEnd[].
733 auto IBB = MapBegin.begin(), IEB = MapEnd.begin(); 716 auto IBB = MapBegin.begin(), IEB = MapEnd.begin();
734 auto IBE = MapBegin.end(), IEE = MapEnd.end(); 717 auto IBE = MapBegin.end(), IEE = MapEnd.end();
735 while (IBB != IBE || IEB != IEE) { 718 while (IBB != IBE || IEB != IEE) {
736 SizeT i1 = IBB == IBE ? NumVars : IBB->first; 719 SizeT i1 = IBB == IBE ? NumVars : IBB->first;
737 SizeT i2 = IEB == IEE ? NumVars : IEB->first; 720 SizeT i2 = IEB == IEE ? NumVars : IEB->first;
738 SizeT i = std::min(i1, i2); 721 SizeT i = std::min(i1, i2);
739 // i1 is the Variable number of the next MapBegin entry, and i2 is 722 // i1 is the Variable number of the next MapBegin entry, and i2 is the
740 // the Variable number of the next MapEnd entry. If i1==i2, then 723 // Variable number of the next MapEnd entry. If i1==i2, then the Variable's
741 // the Variable's live range begins and ends in this block. If 724 // live range begins and ends in this block. If i1<i2, then i1's live range
742 // i1<i2, then i1's live range begins at instruction IBB->second 725 // begins at instruction IBB->second and extends through the end of the
743 // and extends through the end of the block. If i1>i2, then i2's 726 // block. If i1>i2, then i2's live range begins at the first instruction of
744 // live range begins at the first instruction of the block and 727 // the block and ends at IEB->second. In any case, we choose the lesser of
745 // ends at IEB->second. In any case, we choose the lesser of i1 728 // i1 and i2 and proceed accordingly.
746 // and i2 and proceed accordingly.
747 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum; 729 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum;
748 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1; 730 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1;
749 731
750 Variable *Var = Liveness->getVariable(i, this); 732 Variable *Var = Liveness->getVariable(i, this);
751 if (LB > LE) { 733 if (LB > LE) {
752 Var->addLiveRange(FirstInstNum, LE); 734 Var->addLiveRange(FirstInstNum, LE);
753 Var->addLiveRange(LB, LastInstNum + 1); 735 Var->addLiveRange(LB, LastInstNum + 1);
754 // Assert that Var is a global variable by checking that its 736 // Assert that Var is a global variable by checking that its liveness
755 // liveness index is less than the number of globals. This 737 // index is less than the number of globals. This ensures that the
756 // ensures that the LiveInAndOut[] access is valid. 738 // LiveInAndOut[] access is valid.
757 assert(i < Liveness->getNumGlobalVars()); 739 assert(i < Liveness->getNumGlobalVars());
758 LiveInAndOut[i] = false; 740 LiveInAndOut[i] = false;
759 } else { 741 } else {
760 Var->addLiveRange(LB, LE); 742 Var->addLiveRange(LB, LE);
761 } 743 }
762 if (i == i1) 744 if (i == i1)
763 ++IBB; 745 ++IBB;
764 if (i == i2) 746 if (i == i2)
765 ++IEB; 747 ++IEB;
766 } 748 }
767 // Process the variables that are live across the entire block. 749 // Process the variables that are live across the entire block.
768 for (int i = LiveInAndOut.find_first(); i != -1; 750 for (int i = LiveInAndOut.find_first(); i != -1;
769 i = LiveInAndOut.find_next(i)) { 751 i = LiveInAndOut.find_next(i)) {
770 Variable *Var = Liveness->getVariable(i, this); 752 Variable *Var = Liveness->getVariable(i, this);
771 if (Liveness->getRangeMask(Var->getIndex())) 753 if (Liveness->getRangeMask(Var->getIndex()))
772 Var->addLiveRange(FirstInstNum, LastInstNum + 1); 754 Var->addLiveRange(FirstInstNum, LastInstNum + 1);
773 } 755 }
774 } 756 }
775 757
776 // If this node contains only deleted instructions, and ends in an 758 // If this node contains only deleted instructions, and ends in an
777 // unconditional branch, contract the node by repointing all its 759 // unconditional branch, contract the node by repointing all its in-edges to
778 // in-edges to its successor. 760 // its successor.
779 void CfgNode::contractIfEmpty() { 761 void CfgNode::contractIfEmpty() {
780 if (InEdges.empty()) 762 if (InEdges.empty())
781 return; 763 return;
782 Inst *Branch = nullptr; 764 Inst *Branch = nullptr;
783 for (Inst &I : Insts) { 765 for (Inst &I : Insts) {
784 if (I.isDeleted()) 766 if (I.isDeleted())
785 continue; 767 continue;
786 if (I.isUnconditionalBranch()) 768 if (I.isUnconditionalBranch())
787 Branch = &I; 769 Branch = &I;
788 else if (!I.isRedundantAssign()) 770 else if (!I.isRedundantAssign())
789 return; 771 return;
790 } 772 }
791 assert(OutEdges.size() == 1); 773 assert(OutEdges.size() == 1);
792 // Don't try to delete a self-loop. 774 // Don't try to delete a self-loop.
793 if (OutEdges[0] == this) 775 if (OutEdges[0] == this)
794 return; 776 return;
795 777
796 Branch->setDeleted(); 778 Branch->setDeleted();
797 CfgNode *Successor = OutEdges.front(); 779 CfgNode *Successor = OutEdges.front();
798 // Repoint all this node's in-edges to this node's successor, unless 780 // Repoint all this node's in-edges to this node's successor, unless this
799 // this node's successor is actually itself (in which case the 781 // node's successor is actually itself (in which case the statement
800 // statement "OutEdges.front()->InEdges.push_back(Pred)" could 782 // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator
801 // invalidate the iterator over this->InEdges). 783 // over this->InEdges).
802 if (Successor != this) { 784 if (Successor != this) {
803 for (CfgNode *Pred : InEdges) { 785 for (CfgNode *Pred : InEdges) {
804 for (CfgNode *&I : Pred->OutEdges) { 786 for (CfgNode *&I : Pred->OutEdges) {
805 if (I == this) { 787 if (I == this) {
806 I = Successor; 788 I = Successor;
807 Successor->InEdges.push_back(Pred); 789 Successor->InEdges.push_back(Pred);
808 } 790 }
809 } 791 }
810 for (Inst &I : Pred->getInsts()) { 792 for (Inst &I : Pred->getInsts()) {
811 if (!I.isDeleted()) 793 if (!I.isDeleted())
812 I.repointEdges(this, Successor); 794 I.repointEdges(this, Successor);
813 } 795 }
814 } 796 }
815 797
816 // Remove the in-edge to the successor to allow node reordering to make 798 // Remove the in-edge to the successor to allow node reordering to make
817 // better decisions. For example it's more helpful to place a node after 799 // better decisions. For example it's more helpful to place a node after a
818 // a reachable predecessor than an unreachable one (like the one we just 800 // reachable predecessor than an unreachable one (like the one we just
819 // contracted). 801 // contracted).
820 Successor->InEdges.erase( 802 Successor->InEdges.erase(
821 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this)); 803 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this));
822 } 804 }
823 InEdges.clear(); 805 InEdges.clear();
824 } 806 }
825 807
826 void CfgNode::doBranchOpt(const CfgNode *NextNode) { 808 void CfgNode::doBranchOpt(const CfgNode *NextNode) {
827 TargetLowering *Target = Func->getTarget(); 809 TargetLowering *Target = Func->getTarget();
828 // Find the first opportunity for branch optimization (which will be the last 810 // Find the first opportunity for branch optimization (which will be the last
829 // instruction in the block) and stop. This is sufficient unless there is some 811 // instruction in the block) and stop. This is sufficient unless there is
830 // target lowering where we have the possibility of multiple optimizations per 812 // some target lowering where we have the possibility of multiple
831 // block. Take care with switch lowering as there are multiple unconditional 813 // optimizations per block. Take care with switch lowering as there are
832 // branches and only the last can be deleted. 814 // multiple unconditional branches and only the last can be deleted.
833 for (Inst &I : reverse_range(Insts)) { 815 for (Inst &I : reverse_range(Insts)) {
834 if (!I.isDeleted()) { 816 if (!I.isDeleted()) {
835 Target->doBranchOpt(&I, NextNode); 817 Target->doBranchOpt(&I, NextNode);
836 return; 818 return;
837 } 819 }
838 } 820 }
839 } 821 }
840 822
841 // ======================== Dump routines ======================== // 823 // ======================== Dump routines ======================== //
842 824
(...skipping 19 matching lines...) Expand all
862 for (SizeT i = 0; i < Live->size(); ++i) { 844 for (SizeT i = 0; i < Live->size(); ++i) {
863 if ((*Live)[i]) { 845 if ((*Live)[i]) {
864 Variable *Var = Liveness->getVariable(i, Node); 846 Variable *Var = Liveness->getVariable(i, Node);
865 if (Var->hasReg()) { 847 if (Var->hasReg()) {
866 if (IsLiveIn) 848 if (IsLiveIn)
867 ++LiveRegCount[Var->getRegNum()]; 849 ++LiveRegCount[Var->getRegNum()];
868 LiveRegs.push_back(Var); 850 LiveRegs.push_back(Var);
869 } 851 }
870 } 852 }
871 } 853 }
872 // Sort the variables by regnum so they are always printed in a 854 // Sort the variables by regnum so they are always printed in a familiar
873 // familiar order. 855 // order.
874 std::sort(LiveRegs.begin(), LiveRegs.end(), 856 std::sort(LiveRegs.begin(), LiveRegs.end(),
875 [](const Variable *V1, const Variable *V2) { 857 [](const Variable *V1, const Variable *V2) {
876 return V1->getRegNum() < V2->getRegNum(); 858 return V1->getRegNum() < V2->getRegNum();
877 }); 859 });
878 bool First = true; 860 bool First = true;
879 for (Variable *Var : LiveRegs) { 861 for (Variable *Var : LiveRegs) {
880 if (!First) 862 if (!First)
881 Str << ","; 863 Str << ",";
882 First = false; 864 First = false;
883 Var->emit(Func); 865 Var->emit(Func);
884 } 866 }
885 } 867 }
886 Str << "\n"; 868 Str << "\n";
887 } 869 }
888 870
889 void emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr, 871 void emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
890 std::vector<SizeT> &LiveRegCount) { 872 std::vector<SizeT> &LiveRegCount) {
891 if (!BuildDefs::dump()) 873 if (!BuildDefs::dump())
892 return; 874 return;
893 bool First = true; 875 bool First = true;
894 Variable *Dest = Instr->getDest(); 876 Variable *Dest = Instr->getDest();
895 // Normally we increment the live count for the dest register. But 877 // Normally we increment the live count for the dest register. But we
896 // we shouldn't if the instruction's IsDestNonKillable flag is set, 878 // shouldn't if the instruction's IsDestNonKillable flag is set, because this
897 // because this means that the target lowering created this 879 // means that the target lowering created this instruction as a non-SSA
898 // instruction as a non-SSA assignment; i.e., a different, previous 880 // assignment; i.e., a different, previous instruction started the dest
899 // instruction started the dest variable's live range. 881 // variable's live range.
900 if (!Instr->isDestNonKillable() && Dest && Dest->hasReg()) 882 if (!Instr->isDestNonKillable() && Dest && Dest->hasReg())
901 ++LiveRegCount[Dest->getRegNum()]; 883 ++LiveRegCount[Dest->getRegNum()];
902 FOREACH_VAR_IN_INST(Var, *Instr) { 884 FOREACH_VAR_IN_INST(Var, *Instr) {
903 bool ShouldReport = Instr->isLastUse(Var); 885 bool ShouldReport = Instr->isLastUse(Var);
904 if (ShouldReport && Var->hasReg()) { 886 if (ShouldReport && Var->hasReg()) {
905 // Don't report end of live range until the live count reaches 0. 887 // Don't report end of live range until the live count reaches 0.
906 SizeT NewCount = --LiveRegCount[Var->getRegNum()]; 888 SizeT NewCount = --LiveRegCount[Var->getRegNum()];
907 if (NewCount) 889 if (NewCount)
908 ShouldReport = false; 890 ShouldReport = false;
909 } 891 }
910 if (ShouldReport) { 892 if (ShouldReport) {
911 if (First) 893 if (First)
912 Str << " \t# END="; 894 Str << " \t# END=";
913 else 895 else
914 Str << ","; 896 Str << ",";
915 Var->emit(Func); 897 Var->emit(Func);
916 First = false; 898 First = false;
917 } 899 }
918 } 900 }
919 } 901 }
920 902
921 void updateStats(Cfg *Func, const Inst *I) { 903 void updateStats(Cfg *Func, const Inst *I) {
922 if (!BuildDefs::dump()) 904 if (!BuildDefs::dump())
923 return; 905 return;
924 // Update emitted instruction count, plus fill/spill count for 906 // Update emitted instruction count, plus fill/spill count for Variable
925 // Variable operands without a physical register. 907 // operands without a physical register.
926 if (uint32_t Count = I->getEmitInstCount()) { 908 if (uint32_t Count = I->getEmitInstCount()) {
927 Func->getContext()->statsUpdateEmitted(Count); 909 Func->getContext()->statsUpdateEmitted(Count);
928 if (Variable *Dest = I->getDest()) { 910 if (Variable *Dest = I->getDest()) {
929 if (!Dest->hasReg()) 911 if (!Dest->hasReg())
930 Func->getContext()->statsUpdateFills(); 912 Func->getContext()->statsUpdateFills();
931 } 913 }
932 for (SizeT S = 0; S < I->getSrcSize(); ++S) { 914 for (SizeT S = 0; S < I->getSrcSize(); ++S) {
933 if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) { 915 if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) {
934 if (!Src->hasReg()) 916 if (!Src->hasReg())
935 Func->getContext()->statsUpdateSpills(); 917 Func->getContext()->statsUpdateSpills();
936 } 918 }
937 } 919 }
938 } 920 }
939 } 921 }
940 922
941 } // end of anonymous namespace 923 } // end of anonymous namespace
942 924
943 void CfgNode::emit(Cfg *Func) const { 925 void CfgNode::emit(Cfg *Func) const {
944 if (!BuildDefs::dump()) 926 if (!BuildDefs::dump())
945 return; 927 return;
946 Func->setCurrentNode(this); 928 Func->setCurrentNode(this);
947 Ostream &Str = Func->getContext()->getStrEmit(); 929 Ostream &Str = Func->getContext()->getStrEmit();
948 Liveness *Liveness = Func->getLiveness(); 930 Liveness *Liveness = Func->getLiveness();
949 bool DecorateAsm = 931 bool DecorateAsm =
950 Liveness && Func->getContext()->getFlags().getDecorateAsm(); 932 Liveness && Func->getContext()->getFlags().getDecorateAsm();
951 Str << getAsmName() << ":\n"; 933 Str << getAsmName() << ":\n";
952 // LiveRegCount keeps track of the number of currently live 934 // LiveRegCount keeps track of the number of currently live variables that
953 // variables that each register is assigned to. Normally that would 935 // each register is assigned to. Normally that would be only 0 or 1, but the
954 // be only 0 or 1, but the register allocator's AllowOverlap 936 // register allocator's AllowOverlap inference allows it to be greater than 1
955 // inference allows it to be greater than 1 for short periods. 937 // for short periods.
956 std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters()); 938 std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
957 if (DecorateAsm) { 939 if (DecorateAsm) {
958 constexpr bool IsLiveIn = true; 940 constexpr bool IsLiveIn = true;
959 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); 941 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
960 } 942 }
961 943
962 for (const Inst &I : Phis) { 944 for (const Inst &I : Phis) {
963 if (I.isDeleted()) 945 if (I.isDeleted())
964 continue; 946 continue;
965 // Emitting a Phi instruction should cause an error. 947 // Emitting a Phi instruction should cause an error.
966 I.emit(Func); 948 I.emit(Func);
967 } 949 }
968 for (const Inst &I : Insts) { 950 for (const Inst &I : Insts) {
969 if (I.isDeleted()) 951 if (I.isDeleted())
970 continue; 952 continue;
971 if (I.isRedundantAssign()) { 953 if (I.isRedundantAssign()) {
972 // Usually, redundant assignments end the live range of the src 954 // Usually, redundant assignments end the live range of the src variable
973 // variable and begin the live range of the dest variable, with 955 // and begin the live range of the dest variable, with no net effect on
974 // no net effect on the liveness of their register. However, if 956 // the liveness of their register. However, if the register allocator
975 // the register allocator infers the AllowOverlap condition, 957 // infers the AllowOverlap condition, then this may be a redundant
976 // then this may be a redundant assignment that does not end the 958 // assignment that does not end the src variable's live range, in which
977 // src variable's live range, in which case the active variable 959 // case the active variable count for that register needs to be bumped.
978 // count for that register needs to be bumped. That normally 960 // That normally would have happened as part of emitLiveRangesEnded(),
979 // would have happened as part of emitLiveRangesEnded(), but 961 // but that isn't called for redundant assignments.
980 // that isn't called for redundant assignments.
981 Variable *Dest = I.getDest(); 962 Variable *Dest = I.getDest();
982 if (DecorateAsm && Dest->hasReg() && !I.isLastUse(I.getSrc(0))) 963 if (DecorateAsm && Dest->hasReg() && !I.isLastUse(I.getSrc(0)))
983 ++LiveRegCount[Dest->getRegNum()]; 964 ++LiveRegCount[Dest->getRegNum()];
984 continue; 965 continue;
985 } 966 }
986 I.emit(Func); 967 I.emit(Func);
987 if (DecorateAsm) 968 if (DecorateAsm)
988 emitLiveRangesEnded(Str, Func, &I, LiveRegCount); 969 emitLiveRangesEnded(Str, Func, &I, LiveRegCount);
989 Str << "\n"; 970 Str << "\n";
990 updateStats(Func, &I); 971 updateStats(Func, &I);
(...skipping 12 matching lines...) Expand all
1003 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete; 984 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete;
1004 985
1005 public: 986 public:
1006 BundleEmitHelper(Assembler *Asm, TargetLowering *Target, 987 BundleEmitHelper(Assembler *Asm, TargetLowering *Target,
1007 const InstList &Insts) 988 const InstList &Insts)
1008 : Asm(Asm), Target(Target), End(Insts.end()), BundleLockStart(End), 989 : Asm(Asm), Target(Target), End(Insts.end()), BundleLockStart(End),
1009 BundleSize(1 << Asm->getBundleAlignLog2Bytes()), 990 BundleSize(1 << Asm->getBundleAlignLog2Bytes()),
1010 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {} 991 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {}
1011 // Check whether we're currently within a bundle_lock region. 992 // Check whether we're currently within a bundle_lock region.
1012 bool isInBundleLockRegion() const { return BundleLockStart != End; } 993 bool isInBundleLockRegion() const { return BundleLockStart != End; }
1013 // Check whether the current bundle_lock region has the align_to_end 994 // Check whether the current bundle_lock region has the align_to_end option.
1014 // option.
1015 bool isAlignToEnd() const { 995 bool isAlignToEnd() const {
1016 assert(isInBundleLockRegion()); 996 assert(isInBundleLockRegion());
1017 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == 997 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
1018 InstBundleLock::Opt_AlignToEnd; 998 InstBundleLock::Opt_AlignToEnd;
1019 } 999 }
1020 // Check whether the entire bundle_lock region falls within the same 1000 // Check whether the entire bundle_lock region falls within the same bundle.
1021 // bundle.
1022 bool isSameBundle() const { 1001 bool isSameBundle() const {
1023 assert(isInBundleLockRegion()); 1002 assert(isInBundleLockRegion());
1024 return SizeSnapshotPre == SizeSnapshotPost || 1003 return SizeSnapshotPre == SizeSnapshotPost ||
1025 (SizeSnapshotPre & BundleMaskHi) == 1004 (SizeSnapshotPre & BundleMaskHi) ==
1026 ((SizeSnapshotPost - 1) & BundleMaskHi); 1005 ((SizeSnapshotPost - 1) & BundleMaskHi);
1027 } 1006 }
1028 // Get the bundle alignment of the first instruction of the 1007 // Get the bundle alignment of the first instruction of the bundle_lock
1029 // bundle_lock region. 1008 // region.
1030 intptr_t getPreAlignment() const { 1009 intptr_t getPreAlignment() const {
1031 assert(isInBundleLockRegion()); 1010 assert(isInBundleLockRegion());
1032 return SizeSnapshotPre & BundleMaskLo; 1011 return SizeSnapshotPre & BundleMaskLo;
1033 } 1012 }
1034 // Get the bundle alignment of the first instruction past the 1013 // Get the bundle alignment of the first instruction past the bundle_lock
1035 // bundle_lock region. 1014 // region.
1036 intptr_t getPostAlignment() const { 1015 intptr_t getPostAlignment() const {
1037 assert(isInBundleLockRegion()); 1016 assert(isInBundleLockRegion());
1038 return SizeSnapshotPost & BundleMaskLo; 1017 return SizeSnapshotPost & BundleMaskLo;
1039 } 1018 }
1040 // Get the iterator pointing to the bundle_lock instruction, e.g. to 1019 // Get the iterator pointing to the bundle_lock instruction, e.g. to roll
1041 // roll back the instruction iteration to that point. 1020 // back the instruction iteration to that point.
1042 InstList::const_iterator getBundleLockStart() const { 1021 InstList::const_iterator getBundleLockStart() const {
1043 assert(isInBundleLockRegion()); 1022 assert(isInBundleLockRegion());
1044 return BundleLockStart; 1023 return BundleLockStart;
1045 } 1024 }
1046 // Set up bookkeeping when the bundle_lock instruction is first 1025 // Set up bookkeeping when the bundle_lock instruction is first processed.
1047 // processed.
1048 void enterBundleLock(InstList::const_iterator I) { 1026 void enterBundleLock(InstList::const_iterator I) {
1049 assert(!isInBundleLockRegion()); 1027 assert(!isInBundleLockRegion());
1050 BundleLockStart = I; 1028 BundleLockStart = I;
1051 SizeSnapshotPre = Asm->getBufferSize(); 1029 SizeSnapshotPre = Asm->getBufferSize();
1052 Asm->setPreliminary(true); 1030 Asm->setPreliminary(true);
1053 Target->snapshotEmitState(); 1031 Target->snapshotEmitState();
1054 assert(isInBundleLockRegion()); 1032 assert(isInBundleLockRegion());
1055 } 1033 }
1056 // Update bookkeeping when the bundle_unlock instruction is 1034 // Update bookkeeping when the bundle_unlock instruction is processed.
1057 // processed.
1058 void enterBundleUnlock() { 1035 void enterBundleUnlock() {
1059 assert(isInBundleLockRegion()); 1036 assert(isInBundleLockRegion());
1060 SizeSnapshotPost = Asm->getBufferSize(); 1037 SizeSnapshotPost = Asm->getBufferSize();
1061 } 1038 }
1062 // Update bookkeeping when we are completely finished with the 1039 // Update bookkeeping when we are completely finished with the bundle_lock
1063 // bundle_lock region. 1040 // region.
1064 void leaveBundleLockRegion() { BundleLockStart = End; } 1041 void leaveBundleLockRegion() { BundleLockStart = End; }
1065 // Check whether the instruction sequence fits within the current 1042 // Check whether the instruction sequence fits within the current bundle, and
1066 // bundle, and if not, add nop padding to the end of the current 1043 // if not, add nop padding to the end of the current bundle.
1067 // bundle.
1068 void padToNextBundle() { 1044 void padToNextBundle() {
1069 assert(isInBundleLockRegion()); 1045 assert(isInBundleLockRegion());
1070 if (!isSameBundle()) { 1046 if (!isSameBundle()) {
1071 intptr_t PadToNextBundle = BundleSize - getPreAlignment(); 1047 intptr_t PadToNextBundle = BundleSize - getPreAlignment();
1072 Asm->padWithNop(PadToNextBundle); 1048 Asm->padWithNop(PadToNextBundle);
1073 SizeSnapshotPre += PadToNextBundle; 1049 SizeSnapshotPre += PadToNextBundle;
1074 SizeSnapshotPost += PadToNextBundle; 1050 SizeSnapshotPost += PadToNextBundle;
1075 assert((Asm->getBufferSize() & BundleMaskLo) == 0); 1051 assert((Asm->getBufferSize() & BundleMaskLo) == 0);
1076 assert(Asm->getBufferSize() == SizeSnapshotPre); 1052 assert(Asm->getBufferSize() == SizeSnapshotPre);
1077 } 1053 }
1078 } 1054 }
1079 // If align_to_end is specified, add padding such that the 1055 // If align_to_end is specified, add padding such that the instruction
1080 // instruction sequences ends precisely at a bundle boundary. 1056 // sequences ends precisely at a bundle boundary.
1081 void padForAlignToEnd() { 1057 void padForAlignToEnd() {
1082 assert(isInBundleLockRegion()); 1058 assert(isInBundleLockRegion());
1083 if (isAlignToEnd()) { 1059 if (isAlignToEnd()) {
1084 if (intptr_t Offset = getPostAlignment()) { 1060 if (intptr_t Offset = getPostAlignment()) {
1085 Asm->padWithNop(BundleSize - Offset); 1061 Asm->padWithNop(BundleSize - Offset);
1086 SizeSnapshotPre = Asm->getBufferSize(); 1062 SizeSnapshotPre = Asm->getBufferSize();
1087 } 1063 }
1088 } 1064 }
1089 } 1065 }
1090 // Update bookkeeping when rolling back for the second pass. 1066 // Update bookkeeping when rolling back for the second pass.
1091 void rollback() { 1067 void rollback() {
1092 assert(isInBundleLockRegion()); 1068 assert(isInBundleLockRegion());
1093 Asm->setBufferSize(SizeSnapshotPre); 1069 Asm->setBufferSize(SizeSnapshotPre);
1094 Asm->setPreliminary(false); 1070 Asm->setPreliminary(false);
1095 Target->rollbackEmitState(); 1071 Target->rollbackEmitState();
1096 } 1072 }
1097 1073
1098 private: 1074 private:
1099 Assembler *const Asm; 1075 Assembler *const Asm;
1100 TargetLowering *const Target; 1076 TargetLowering *const Target;
1101 // End is a sentinel value such that BundleLockStart==End implies 1077 // End is a sentinel value such that BundleLockStart==End implies that we are
1102 // that we are not in a bundle_lock region. 1078 // not in a bundle_lock region.
1103 const InstList::const_iterator End; 1079 const InstList::const_iterator End;
1104 InstList::const_iterator BundleLockStart; 1080 InstList::const_iterator BundleLockStart;
1105 const intptr_t BundleSize; 1081 const intptr_t BundleSize;
1106 // Masking with BundleMaskLo identifies an address's bundle offset. 1082 // Masking with BundleMaskLo identifies an address's bundle offset.
1107 const intptr_t BundleMaskLo; 1083 const intptr_t BundleMaskLo;
1108 // Masking with BundleMaskHi identifies an address's bundle. 1084 // Masking with BundleMaskHi identifies an address's bundle.
1109 const intptr_t BundleMaskHi; 1085 const intptr_t BundleMaskHi;
1110 intptr_t SizeSnapshotPre = 0; 1086 intptr_t SizeSnapshotPre = 0;
1111 intptr_t SizeSnapshotPost = 0; 1087 intptr_t SizeSnapshotPost = 0;
1112 }; 1088 };
1113 1089
1114 } // end of anonymous namespace 1090 } // end of anonymous namespace
1115 1091
1116 void CfgNode::emitIAS(Cfg *Func) const { 1092 void CfgNode::emitIAS(Cfg *Func) const {
1117 Func->setCurrentNode(this); 1093 Func->setCurrentNode(this);
1118 Assembler *Asm = Func->getAssembler<>(); 1094 Assembler *Asm = Func->getAssembler<>();
1119 // TODO(stichnot): When sandboxing, defer binding the node label 1095 // TODO(stichnot): When sandboxing, defer binding the node label until just
1120 // until just before the first instruction is emitted, to reduce the 1096 // before the first instruction is emitted, to reduce the chance that a
1121 // chance that a padding nop is a branch target. 1097 // padding nop is a branch target.
1122 Asm->bindCfgNodeLabel(getIndex()); 1098 Asm->bindCfgNodeLabel(getIndex());
1123 for (const Inst &I : Phis) { 1099 for (const Inst &I : Phis) {
1124 if (I.isDeleted()) 1100 if (I.isDeleted())
1125 continue; 1101 continue;
1126 // Emitting a Phi instruction should cause an error. 1102 // Emitting a Phi instruction should cause an error.
1127 I.emitIAS(Func); 1103 I.emitIAS(Func);
1128 } 1104 }
1129 1105
1130 // Do the simple emission if not sandboxed. 1106 // Do the simple emission if not sandboxed.
1131 if (!Func->getContext()->getFlags().getUseSandboxing()) { 1107 if (!Func->getContext()->getFlags().getUseSandboxing()) {
1132 for (const Inst &I : Insts) { 1108 for (const Inst &I : Insts) {
1133 if (!I.isDeleted() && !I.isRedundantAssign()) { 1109 if (!I.isDeleted() && !I.isRedundantAssign()) {
1134 I.emitIAS(Func); 1110 I.emitIAS(Func);
1135 updateStats(Func, &I); 1111 updateStats(Func, &I);
1136 } 1112 }
1137 } 1113 }
1138 return; 1114 return;
1139 } 1115 }
1140 1116
1141 // The remainder of the function handles emission with sandboxing. 1117 // The remainder of the function handles emission with sandboxing. There are
1142 // There are explicit bundle_lock regions delimited by bundle_lock 1118 // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock
1143 // and bundle_unlock instructions. All other instructions are 1119 // instructions. All other instructions are treated as an implicit
1144 // treated as an implicit one-instruction bundle_lock region. 1120 // one-instruction bundle_lock region. Emission is done twice for each
1145 // Emission is done twice for each bundle_lock region. The first 1121 // bundle_lock region. The first pass is a preliminary pass, after which we
1146 // pass is a preliminary pass, after which we can figure out what 1122 // can figure out what nop padding is needed, then roll back, and make the
1147 // nop padding is needed, then roll back, and make the final pass. 1123 // final pass.
1148 // 1124 //
1149 // Ideally, the first pass would be speculative and the second pass 1125 // Ideally, the first pass would be speculative and the second pass would
1150 // would only be done if nop padding were needed, but the structure 1126 // only be done if nop padding were needed, but the structure of the
1151 // of the integrated assembler makes it hard to roll back the state 1127 // integrated assembler makes it hard to roll back the state of label
1152 // of label bindings, label links, and relocation fixups. Instead, 1128 // bindings, label links, and relocation fixups. Instead, the first pass just
1153 // the first pass just disables all mutation of that state. 1129 // disables all mutation of that state.
1154 1130
1155 BundleEmitHelper Helper(Asm, Func->getTarget(), Insts); 1131 BundleEmitHelper Helper(Asm, Func->getTarget(), Insts);
1156 InstList::const_iterator End = Insts.end(); 1132 InstList::const_iterator End = Insts.end();
1157 // Retrying indicates that we had to roll back to the bundle_lock 1133 // Retrying indicates that we had to roll back to the bundle_lock instruction
1158 // instruction to apply padding before the bundle_lock sequence. 1134 // to apply padding before the bundle_lock sequence.
1159 bool Retrying = false; 1135 bool Retrying = false;
1160 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) { 1136 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) {
1161 if (I->isDeleted() || I->isRedundantAssign()) 1137 if (I->isDeleted() || I->isRedundantAssign())
1162 continue; 1138 continue;
1163 1139
1164 if (llvm::isa<InstBundleLock>(I)) { 1140 if (llvm::isa<InstBundleLock>(I)) {
1165 // Set up the initial bundle_lock state. This should not happen 1141 // Set up the initial bundle_lock state. This should not happen while
1166 // while retrying, because the retry rolls back to the 1142 // retrying, because the retry rolls back to the instruction following
1167 // instruction following the bundle_lock instruction. 1143 // the bundle_lock instruction.
1168 assert(!Retrying); 1144 assert(!Retrying);
1169 Helper.enterBundleLock(I); 1145 Helper.enterBundleLock(I);
1170 continue; 1146 continue;
1171 } 1147 }
1172 1148
1173 if (llvm::isa<InstBundleUnlock>(I)) { 1149 if (llvm::isa<InstBundleUnlock>(I)) {
1174 Helper.enterBundleUnlock(); 1150 Helper.enterBundleUnlock();
1175 if (Retrying) { 1151 if (Retrying) {
1176 // Make sure all instructions are in the same bundle. 1152 // Make sure all instructions are in the same bundle.
1177 assert(Helper.isSameBundle()); 1153 assert(Helper.isSameBundle());
1178 // If align_to_end is specified, make sure the next 1154 // If align_to_end is specified, make sure the next instruction begins
1179 // instruction begins the bundle. 1155 // the bundle.
1180 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0); 1156 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0);
1181 Helper.leaveBundleLockRegion(); 1157 Helper.leaveBundleLockRegion();
1182 Retrying = false; 1158 Retrying = false;
1183 } else { 1159 } else {
1184 // This is the first pass, so roll back for the retry pass. 1160 // This is the first pass, so roll back for the retry pass.
1185 Helper.rollback(); 1161 Helper.rollback();
1186 // Pad to the next bundle if the instruction sequence crossed 1162 // Pad to the next bundle if the instruction sequence crossed a bundle
1187 // a bundle boundary. 1163 // boundary.
1188 Helper.padToNextBundle(); 1164 Helper.padToNextBundle();
1189 // Insert additional padding to make AlignToEnd work. 1165 // Insert additional padding to make AlignToEnd work.
1190 Helper.padForAlignToEnd(); 1166 Helper.padForAlignToEnd();
1191 // Prepare for the retry pass after padding is done. 1167 // Prepare for the retry pass after padding is done.
1192 Retrying = true; 1168 Retrying = true;
1193 I = Helper.getBundleLockStart(); 1169 I = Helper.getBundleLockStart();
1194 } 1170 }
1195 continue; 1171 continue;
1196 } 1172 }
1197 1173
(...skipping 10 matching lines...) Expand all
1208 I->emitIAS(Func); 1184 I->emitIAS(Func);
1209 Helper.enterBundleUnlock(); 1185 Helper.enterBundleUnlock();
1210 Helper.rollback(); 1186 Helper.rollback();
1211 Helper.padToNextBundle(); 1187 Helper.padToNextBundle();
1212 I->emitIAS(Func); 1188 I->emitIAS(Func);
1213 updateStats(Func, I); 1189 updateStats(Func, I);
1214 Helper.leaveBundleLockRegion(); 1190 Helper.leaveBundleLockRegion();
1215 } 1191 }
1216 } 1192 }
1217 1193
1218 // Don't allow bundle locking across basic blocks, to keep the 1194 // Don't allow bundle locking across basic blocks, to keep the backtracking
1219 // backtracking mechanism simple. 1195 // mechanism simple.
1220 assert(!Helper.isInBundleLockRegion()); 1196 assert(!Helper.isInBundleLockRegion());
1221 assert(!Retrying); 1197 assert(!Retrying);
1222 } 1198 }
1223 1199
1224 void CfgNode::dump(Cfg *Func) const { 1200 void CfgNode::dump(Cfg *Func) const {
1225 if (!BuildDefs::dump()) 1201 if (!BuildDefs::dump())
1226 return; 1202 return;
1227 Func->setCurrentNode(this); 1203 Func->setCurrentNode(this);
1228 Ostream &Str = Func->getContext()->getStrDump(); 1204 Ostream &Str = Func->getContext()->getStrDump();
1229 Liveness *Liveness = Func->getLiveness(); 1205 Liveness *Liveness = Func->getLiveness();
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
1327 InstIntrinsicCall *Inst = InstIntrinsicCall::create( 1303 InstIntrinsicCall *Inst = InstIntrinsicCall::create(
1328 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); 1304 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info);
1329 Inst->addArg(AtomicRMWOp); 1305 Inst->addArg(AtomicRMWOp);
1330 Inst->addArg(Counter); 1306 Inst->addArg(Counter);
1331 Inst->addArg(One); 1307 Inst->addArg(One);
1332 Inst->addArg(OrderAcquireRelease); 1308 Inst->addArg(OrderAcquireRelease);
1333 Insts.push_front(Inst); 1309 Insts.push_front(Inst);
1334 } 1310 }
1335 1311
1336 } // end of namespace Ice 1312 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceCfgNode.h ('k') | src/IceClFlags.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698