Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1465)

Side by Side Diff: src/IceRegAlloc.cpp

Issue 1310833003: Refactor LinearScan::scan from one huge function into smaller functions. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Final edits and rebase Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceRegAlloc.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===// 1 //===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
11 /// This file implements the LinearScan class, which performs the 11 /// This file implements the LinearScan class, which performs the linear-scan
12 /// linear-scan register allocation after liveness analysis has been 12 /// register allocation after liveness analysis has been performed.
13 /// performed.
14 /// 13 ///
15 //===----------------------------------------------------------------------===// 14 //===----------------------------------------------------------------------===//
16 15
17 #include "IceRegAlloc.h" 16 #include "IceRegAlloc.h"
18 17
19 #include "IceCfg.h" 18 #include "IceCfg.h"
20 #include "IceCfgNode.h" 19 #include "IceCfgNode.h"
21 #include "IceInst.h" 20 #include "IceInst.h"
22 #include "IceOperand.h" 21 #include "IceOperand.h"
23 #include "IceTargetLowering.h" 22 #include "IceTargetLowering.h"
24 23
25 namespace Ice { 24 namespace Ice {
26 25
27 namespace { 26 namespace {
28 27
29 // TODO(stichnot): Statically choose the size based on the target
30 // being compiled.
31 constexpr size_t REGS_SIZE = 32;
32
33 // Returns true if Var has any definitions within Item's live range. 28 // Returns true if Var has any definitions within Item's live range.
34 // TODO(stichnot): Consider trimming the Definitions list similar to 29 // TODO(stichnot): Consider trimming the Definitions list similar to how the
35 // how the live ranges are trimmed, since all the overlapsDefs() tests 30 // live ranges are trimmed, since all the overlapsDefs() tests are whether some
36 // are whether some variable's definitions overlap Cur, and trimming 31 // variable's definitions overlap Cur, and trimming is with respect Cur.start.
37 // is with respect Cur.start. Initial tests show no measurable 32 // Initial tests show no measurable performance difference, so we'll keep the
38 // performance difference, so we'll keep the code simple for now. 33 // code simple for now.
39 bool overlapsDefs(const Cfg *Func, const Variable *Item, const Variable *Var) { 34 bool overlapsDefs(const Cfg *Func, const Variable *Item, const Variable *Var) {
40 constexpr bool UseTrimmed = true; 35 constexpr bool UseTrimmed = true;
41 VariablesMetadata *VMetadata = Func->getVMetadata(); 36 VariablesMetadata *VMetadata = Func->getVMetadata();
42 if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var)) 37 if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var))
43 if (Item->getLiveRange().overlapsInst(FirstDef->getNumber(), UseTrimmed)) 38 if (Item->getLiveRange().overlapsInst(FirstDef->getNumber(), UseTrimmed))
44 return true; 39 return true;
45 const InstDefList &Defs = VMetadata->getLatterDefinitions(Var); 40 const InstDefList &Defs = VMetadata->getLatterDefinitions(Var);
46 for (size_t i = 0; i < Defs.size(); ++i) { 41 for (size_t i = 0; i < Defs.size(); ++i) {
47 if (Item->getLiveRange().overlapsInst(Defs[i]->getNumber(), UseTrimmed)) 42 if (Item->getLiveRange().overlapsInst(Defs[i]->getNumber(), UseTrimmed))
48 return true; 43 return true;
(...skipping 26 matching lines...) Expand all
75 Ostream &Str = Func->getContext()->getStrDump(); 70 Ostream &Str = Func->getContext()->getStrDump();
76 char buf[30]; 71 char buf[30];
77 snprintf(buf, llvm::array_lengthof(buf), "%2d", Var->getRegNumTmp()); 72 snprintf(buf, llvm::array_lengthof(buf), "%2d", Var->getRegNumTmp());
78 Str << "R=" << buf << " V="; 73 Str << "R=" << buf << " V=";
79 Var->dump(Func); 74 Var->dump(Func);
80 Str << " Range=" << Var->getLiveRange(); 75 Str << " Range=" << Var->getLiveRange();
81 } 76 }
82 77
83 } // end of anonymous namespace 78 } // end of anonymous namespace
84 79
85 // Prepare for full register allocation of all variables. We depend 80 LinearScan::LinearScan(Cfg *Func)
86 // on liveness analysis to have calculated live ranges. 81 : Func(Func), Ctx(Func->getContext()),
82 Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)) {}
83
84 // Prepare for full register allocation of all variables. We depend on
85 // liveness analysis to have calculated live ranges.
87 void LinearScan::initForGlobal() { 86 void LinearScan::initForGlobal() {
88 TimerMarker T(TimerStack::TT_initUnhandled, Func); 87 TimerMarker T(TimerStack::TT_initUnhandled, Func);
89 FindPreference = true; 88 FindPreference = true;
90 // For full register allocation, normally we want to enable FindOverlap 89 // For full register allocation, normally we want to enable FindOverlap
91 // (meaning we look for opportunities for two overlapping live ranges to 90 // (meaning we look for opportunities for two overlapping live ranges to
92 // safely share the same register). However, we disable it for phi-lowering 91 // safely share the same register). However, we disable it for phi-lowering
93 // register allocation since no overlap opportunities should be available and 92 // register allocation since no overlap opportunities should be available and
94 // it's more expensive to look for opportunities. 93 // it's more expensive to look for opportunities.
95 FindOverlap = (Kind != RAK_Phi); 94 FindOverlap = (Kind != RAK_Phi);
96 const VarList &Vars = Func->getVariables(); 95 const VarList &Vars = Func->getVariables();
97 Unhandled.reserve(Vars.size()); 96 Unhandled.reserve(Vars.size());
98 UnhandledPrecolored.reserve(Vars.size()); 97 UnhandledPrecolored.reserve(Vars.size());
99 // Gather the live ranges of all variables and add them to the 98 // Gather the live ranges of all variables and add them to the Unhandled set.
100 // Unhandled set.
101 for (Variable *Var : Vars) { 99 for (Variable *Var : Vars) {
102 // Explicitly don't consider zero-weight variables, which are 100 // Explicitly don't consider zero-weight variables, which are meant to be
103 // meant to be spill slots. 101 // spill slots.
104 if (Var->getWeight().isZero()) 102 if (Var->getWeight().isZero())
105 continue; 103 continue;
106 // Don't bother if the variable has a null live range, which means 104 // Don't bother if the variable has a null live range, which means it was
107 // it was never referenced. 105 // never referenced.
108 if (Var->getLiveRange().isEmpty()) 106 if (Var->getLiveRange().isEmpty())
109 continue; 107 continue;
110 Var->untrimLiveRange(); 108 Var->untrimLiveRange();
111 Unhandled.push_back(Var); 109 Unhandled.push_back(Var);
112 if (Var->hasReg()) { 110 if (Var->hasReg()) {
113 Var->setRegNumTmp(Var->getRegNum()); 111 Var->setRegNumTmp(Var->getRegNum());
114 Var->setLiveRangeInfiniteWeight(); 112 Var->setLiveRangeInfiniteWeight();
115 UnhandledPrecolored.push_back(Var); 113 UnhandledPrecolored.push_back(Var);
116 } 114 }
117 } 115 }
118 116
119 // Build the (ordered) list of FakeKill instruction numbers. 117 // Build the (ordered) list of FakeKill instruction numbers.
120 Kills.clear(); 118 Kills.clear();
121 // Phi lowering should not be creating new call instructions, so there should 119 // Phi lowering should not be creating new call instructions, so there should
122 // be no infinite-weight not-yet-colored live ranges that span a call 120 // be no infinite-weight not-yet-colored live ranges that span a call
123 // instruction, hence no need to construct the Kills list. 121 // instruction, hence no need to construct the Kills list.
124 if (Kind == RAK_Phi) 122 if (Kind == RAK_Phi)
125 return; 123 return;
126 for (CfgNode *Node : Func->getNodes()) { 124 for (CfgNode *Node : Func->getNodes()) {
127 for (Inst &I : Node->getInsts()) { 125 for (Inst &I : Node->getInsts()) {
128 if (auto Kill = llvm::dyn_cast<InstFakeKill>(&I)) { 126 if (auto Kill = llvm::dyn_cast<InstFakeKill>(&I)) {
129 if (!Kill->isDeleted() && !Kill->getLinked()->isDeleted()) 127 if (!Kill->isDeleted() && !Kill->getLinked()->isDeleted())
130 Kills.push_back(I.getNumber()); 128 Kills.push_back(I.getNumber());
131 } 129 }
132 } 130 }
133 } 131 }
134 } 132 }
135 133
136 // Prepare for very simple register allocation of only infinite-weight 134 // Prepare for very simple register allocation of only infinite-weight
137 // Variables while respecting pre-colored Variables. Some properties 135 // Variables while respecting pre-colored Variables. Some properties we take
138 // we take advantage of: 136 // advantage of:
139 // 137 //
140 // * Live ranges of interest consist of a single segment. 138 // * Live ranges of interest consist of a single segment.
141 // 139 //
142 // * Live ranges of interest never span a call instruction. 140 // * Live ranges of interest never span a call instruction.
143 // 141 //
144 // * Phi instructions are not considered because either phis have 142 // * Phi instructions are not considered because either phis have already been
145 // already been lowered, or they don't contain any pre-colored or 143 // lowered, or they don't contain any pre-colored or infinite-weight
146 // infinite-weight Variables. 144 // Variables.
147 // 145 //
148 // * We don't need to renumber instructions before computing live 146 // * We don't need to renumber instructions before computing live ranges
149 // ranges because all the high-level ICE instructions are deleted 147 // because all the high-level ICE instructions are deleted prior to lowering,
150 // prior to lowering, and the low-level instructions are added in 148 // and the low-level instructions are added in monotonically increasing order.
151 // monotonically increasing order.
152 // 149 //
153 // * There are no opportunities for register preference or allowing 150 // * There are no opportunities for register preference or allowing overlap.
154 // overlap.
155 // 151 //
156 // Some properties we aren't (yet) taking advantage of: 152 // Some properties we aren't (yet) taking advantage of:
157 // 153 //
158 // * Because live ranges are a single segment, the Inactive set will 154 // * Because live ranges are a single segment, the Inactive set will always be
159 // always be empty, and the live range trimming operation is 155 // empty, and the live range trimming operation is unnecessary.
160 // unnecessary.
161 // 156 //
162 // * Calculating overlap of single-segment live ranges could be 157 // * Calculating overlap of single-segment live ranges could be optimized a
163 // optimized a bit. 158 // bit.
164 void LinearScan::initForInfOnly() { 159 void LinearScan::initForInfOnly() {
165 TimerMarker T(TimerStack::TT_initUnhandled, Func); 160 TimerMarker T(TimerStack::TT_initUnhandled, Func);
166 FindPreference = false; 161 FindPreference = false;
167 FindOverlap = false; 162 FindOverlap = false;
168 SizeT NumVars = 0; 163 SizeT NumVars = 0;
169 const VarList &Vars = Func->getVariables(); 164 const VarList &Vars = Func->getVariables();
170 165
171 // Iterate across all instructions and record the begin and end of 166 // Iterate across all instructions and record the begin and end of the live
172 // the live range for each variable that is pre-colored or infinite 167 // range for each variable that is pre-colored or infinite weight.
173 // weight.
174 std::vector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel); 168 std::vector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel);
175 std::vector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel); 169 std::vector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel);
176 for (CfgNode *Node : Func->getNodes()) { 170 for (CfgNode *Node : Func->getNodes()) {
177 for (Inst &Inst : Node->getInsts()) { 171 for (Inst &Inst : Node->getInsts()) {
178 if (Inst.isDeleted()) 172 if (Inst.isDeleted())
179 continue; 173 continue;
180 if (const Variable *Var = Inst.getDest()) { 174 if (const Variable *Var = Inst.getDest()) {
181 if (!Var->getIgnoreLiveness() && 175 if (!Var->getIgnoreLiveness() &&
182 (Var->hasReg() || Var->getWeight().isInf())) { 176 (Var->hasReg() || Var->getWeight().isInf())) {
183 if (LRBegin[Var->getIndex()] == Inst::NumberSentinel) { 177 if (LRBegin[Var->getIndex()] == Inst::NumberSentinel) {
(...skipping 28 matching lines...) Expand all
212 Var->addLiveRange(LRBegin[i], LREnd[i], WeightDelta); 206 Var->addLiveRange(LRBegin[i], LREnd[i], WeightDelta);
213 Var->untrimLiveRange(); 207 Var->untrimLiveRange();
214 if (Var->hasReg()) { 208 if (Var->hasReg()) {
215 Var->setRegNumTmp(Var->getRegNum()); 209 Var->setRegNumTmp(Var->getRegNum());
216 Var->setLiveRangeInfiniteWeight(); 210 Var->setLiveRangeInfiniteWeight();
217 UnhandledPrecolored.push_back(Var); 211 UnhandledPrecolored.push_back(Var);
218 } 212 }
219 --NumVars; 213 --NumVars;
220 } 214 }
221 } 215 }
222 // This isn't actually a fatal condition, but it would be nice to 216 // This isn't actually a fatal condition, but it would be nice to know if we
223 // know if we somehow pre-calculated Unhandled's size wrong. 217 // somehow pre-calculated Unhandled's size wrong.
224 assert(NumVars == 0); 218 assert(NumVars == 0);
225 219
226 // Don't build up the list of Kills because we know that no 220 // Don't build up the list of Kills because we know that no infinite-weight
227 // infinite-weight Variable has a live range spanning a call. 221 // Variable has a live range spanning a call.
228 Kills.clear(); 222 Kills.clear();
229 } 223 }
230 224
231 void LinearScan::init(RegAllocKind Kind) { 225 void LinearScan::init(RegAllocKind Kind) {
232 this->Kind = Kind; 226 this->Kind = Kind;
233 Unhandled.clear(); 227 Unhandled.clear();
234 UnhandledPrecolored.clear(); 228 UnhandledPrecolored.clear();
235 Handled.clear(); 229 Handled.clear();
236 Inactive.clear(); 230 Inactive.clear();
237 Active.clear(); 231 Active.clear();
(...skipping 26 matching lines...) Expand all
264 Handled.reserve(Unhandled.size()); 258 Handled.reserve(Unhandled.size());
265 Inactive.reserve(Unhandled.size()); 259 Inactive.reserve(Unhandled.size());
266 Active.reserve(Unhandled.size()); 260 Active.reserve(Unhandled.size());
267 } 261 }
268 262
269 // This is called when Cur must be allocated a register but no registers are 263 // This is called when Cur must be allocated a register but no registers are
270 // available across Cur's live range. To handle this, we find a register that 264 // available across Cur's live range. To handle this, we find a register that
271 // is not explicitly used during Cur's live range, spill that register to a 265 // is not explicitly used during Cur's live range, spill that register to a
272 // stack location right before Cur's live range begins, and fill (reload) the 266 // stack location right before Cur's live range begins, and fill (reload) the
273 // register from the stack location right after Cur's live range ends. 267 // register from the stack location right after Cur's live range ends.
274 void LinearScan::addSpillFill(Variable *Cur, llvm::SmallBitVector RegMask) { 268 void LinearScan::addSpillFill(IterationState &Iter) {
275 // Identify the actual instructions that begin and end Cur's live range. 269 // Identify the actual instructions that begin and end Iter.Cur's live range.
276 // Iterate through Cur's node's instruction list until we find the actual 270 // Iterate through Iter.Cur's node's instruction list until we find the actual
277 // instructions with instruction numbers corresponding to Cur's recorded live 271 // instructions with instruction numbers corresponding to Iter.Cur's recorded
278 // range endpoints. This sounds inefficient but shouldn't be a problem in 272 // live range endpoints. This sounds inefficient but shouldn't be a problem
279 // practice because: 273 // in practice because:
280 // (1) This function is almost never called in practice. 274 // (1) This function is almost never called in practice.
281 // (2) Since this register over-subscription problem happens only for 275 // (2) Since this register over-subscription problem happens only for
282 // phi-lowered instructions, the number of instructions in the node is 276 // phi-lowered instructions, the number of instructions in the node is
283 // proportional to the number of phi instructions in the original node, 277 // proportional to the number of phi instructions in the original node,
284 // which is never very large in practice. 278 // which is never very large in practice.
285 // (3) We still have to iterate through all instructions of Cur's live range 279 // (3) We still have to iterate through all instructions of Iter.Cur's live
286 // to find all explicitly used registers (though the live range is usually 280 // range to find all explicitly used registers (though the live range is
287 // only 2-3 instructions), so the main cost that could be avoided would be 281 // usually only 2-3 instructions), so the main cost that could be avoided
288 // finding the instruction that begin's Cur's live range. 282 // would be finding the instruction that begin's Iter.Cur's live range.
289 assert(!Cur->getLiveRange().isEmpty()); 283 assert(!Iter.Cur->getLiveRange().isEmpty());
290 InstNumberT Start = Cur->getLiveRange().getStart(); 284 InstNumberT Start = Iter.Cur->getLiveRange().getStart();
291 InstNumberT End = Cur->getLiveRange().getEnd(); 285 InstNumberT End = Iter.Cur->getLiveRange().getEnd();
292 CfgNode *Node = Func->getVMetadata()->getLocalUseNode(Cur); 286 CfgNode *Node = Func->getVMetadata()->getLocalUseNode(Iter.Cur);
293 assert(Node); 287 assert(Node);
294 InstList &Insts = Node->getInsts(); 288 InstList &Insts = Node->getInsts();
295 InstList::iterator SpillPoint = Insts.end(); 289 InstList::iterator SpillPoint = Insts.end();
296 InstList::iterator FillPoint = Insts.end(); 290 InstList::iterator FillPoint = Insts.end();
297 // Stop searching after we have found both the SpillPoint and the FillPoint. 291 // Stop searching after we have found both the SpillPoint and the FillPoint.
298 for (auto I = Insts.begin(), E = Insts.end(); 292 for (auto I = Insts.begin(), E = Insts.end();
299 I != E && (SpillPoint == E || FillPoint == E); ++I) { 293 I != E && (SpillPoint == E || FillPoint == E); ++I) {
300 if (I->getNumber() == Start) 294 if (I->getNumber() == Start)
301 SpillPoint = I; 295 SpillPoint = I;
302 if (I->getNumber() == End) 296 if (I->getNumber() == End)
303 FillPoint = I; 297 FillPoint = I;
304 if (SpillPoint != E) { 298 if (SpillPoint != E) {
305 // Remove from RegMask any physical registers referenced during Cur's live 299 // Remove from RegMask any physical registers referenced during Cur's live
306 // range. Start looking after SpillPoint gets set, i.e. once Cur's live 300 // range. Start looking after SpillPoint gets set, i.e. once Cur's live
307 // range begins. 301 // range begins.
308 for (SizeT i = 0; i < I->getSrcSize(); ++i) { 302 for (SizeT i = 0; i < I->getSrcSize(); ++i) {
309 Operand *Src = I->getSrc(i); 303 Operand *Src = I->getSrc(i);
310 SizeT NumVars = Src->getNumVars(); 304 SizeT NumVars = Src->getNumVars();
311 for (SizeT j = 0; j < NumVars; ++j) { 305 for (SizeT j = 0; j < NumVars; ++j) {
312 const Variable *Var = Src->getVar(j); 306 const Variable *Var = Src->getVar(j);
313 if (Var->hasRegTmp()) 307 if (Var->hasRegTmp())
314 RegMask[Var->getRegNumTmp()] = false; 308 Iter.RegMask[Var->getRegNumTmp()] = false;
315 } 309 }
316 } 310 }
317 } 311 }
318 } 312 }
319 assert(SpillPoint != Insts.end()); 313 assert(SpillPoint != Insts.end());
320 assert(FillPoint != Insts.end()); 314 assert(FillPoint != Insts.end());
321 ++FillPoint; 315 ++FillPoint;
322 // TODO(stichnot): Randomize instead of find_first(). 316 // TODO(stichnot): Randomize instead of find_first().
323 int32_t RegNum = RegMask.find_first(); 317 int32_t RegNum = Iter.RegMask.find_first();
324 assert(RegNum != -1); 318 assert(RegNum != -1);
325 Cur->setRegNumTmp(RegNum); 319 Iter.Cur->setRegNumTmp(RegNum);
326 TargetLowering *Target = Func->getTarget(); 320 TargetLowering *Target = Func->getTarget();
327 Variable *Preg = Target->getPhysicalRegister(RegNum, Cur->getType()); 321 Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType());
328 // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc 322 // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc
329 // is correctly identified as !isMultiBlock(), reducing stack frame size. 323 // is correctly identified as !isMultiBlock(), reducing stack frame size.
330 Variable *SpillLoc = Func->makeVariable(Cur->getType()); 324 Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType());
331 // Add "reg=FakeDef;spill=reg" before SpillPoint 325 // Add "reg=FakeDef;spill=reg" before SpillPoint
332 Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg)); 326 Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg));
333 Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg)); 327 Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg));
334 // add "reg=spill;FakeUse(reg)" before FillPoint 328 // add "reg=spill;FakeUse(reg)" before FillPoint
335 Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc)); 329 Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc));
336 Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg)); 330 Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg));
337 } 331 }
338 332
339 // Implements the linear-scan algorithm. Based on "Linear Scan 333 void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) {
340 // Register Allocation in the Context of SSA Form and Register 334 for (SizeT I = Active.size(); I > 0; --I) {
341 // Constraints" by Hanspeter Mössenböck and Michael Pfeiffer, 335 const SizeT Index = I - 1;
342 // ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF . This 336 Variable *Item = Active[Index];
343 // implementation is modified to take affinity into account and allow 337 Item->trimLiveRange(Cur->getLiveRange().getStart());
344 // two interfering variables to share the same register in certain 338 bool Moved = false;
345 // cases. 339 if (Item->rangeEndsBefore(Cur)) {
340 // Move Item from Active to Handled list.
341 dumpLiveRangeTrace("Expiring ", Cur);
342 moveItem(Active, Index, Handled);
343 Moved = true;
344 } else if (!Item->rangeOverlapsStart(Cur)) {
345 // Move Item from Active to Inactive list.
346 dumpLiveRangeTrace("Inactivating ", Cur);
347 moveItem(Active, Index, Inactive);
348 Moved = true;
349 }
350 if (Moved) {
351 // Decrement Item from RegUses[].
352 assert(Item->hasRegTmp());
353 int32_t RegNum = Item->getRegNumTmp();
354 --RegUses[RegNum];
355 assert(RegUses[RegNum] >= 0);
356 }
357 }
358 }
359
360 void LinearScan::handleInactiveRangeExpiredOrReactivated(const Variable *Cur) {
361 for (SizeT I = Inactive.size(); I > 0; --I) {
362 const SizeT Index = I - 1;
363 Variable *Item = Inactive[Index];
364 Item->trimLiveRange(Cur->getLiveRange().getStart());
365 if (Item->rangeEndsBefore(Cur)) {
366 // Move Item from Inactive to Handled list.
367 dumpLiveRangeTrace("Expiring ", Cur);
368 moveItem(Inactive, Index, Handled);
369 } else if (Item->rangeOverlapsStart(Cur)) {
370 // Move Item from Inactive to Active list.
371 dumpLiveRangeTrace("Reactivating ", Cur);
372 moveItem(Inactive, Index, Active);
373 // Increment Item in RegUses[].
374 assert(Item->hasRegTmp());
375 int32_t RegNum = Item->getRegNumTmp();
376 assert(RegUses[RegNum] >= 0);
377 ++RegUses[RegNum];
378 }
379 }
380 }
381
382 // Infer register preference and allowable overlap. Only form a preference when
383 // the current Variable has an unambiguous "first" definition. The preference
384 // is some source Variable of the defining instruction that either is assigned
385 // a register that is currently free, or that is assigned a register that is
386 // not free but overlap is allowed. Overlap is allowed when the Variable under
387 // consideration is single-definition, and its definition is a simple
388 // assignment - i.e., the register gets copied/aliased but is never modified.
389 // Furthermore, overlap is only allowed when preferred Variable definition
390 // instructions do not appear within the current Variable's live range.
391 void LinearScan::findRegisterPreference(IterationState &Iter) {
392 Iter.Prefer = nullptr;
393 Iter.PreferReg = Variable::NoRegister;
394 Iter.AllowOverlap = false;
395
396 if (FindPreference) {
397 VariablesMetadata *VMetadata = Func->getVMetadata();
398 if (const Inst *DefInst = VMetadata->getFirstDefinition(Iter.Cur)) {
399 assert(DefInst->getDest() == Iter.Cur);
400 bool IsAssign = DefInst->isSimpleAssign();
401 bool IsSingleDef = !VMetadata->isMultiDef(Iter.Cur);
402 for (SizeT i = 0; i < DefInst->getSrcSize(); ++i) {
403 // TODO(stichnot): Iterate through the actual Variables of the
404 // instruction, not just the source operands. This could capture Load
405 // instructions, including address mode optimization, for Prefer (but
406 // not for AllowOverlap).
407 if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) {
408 int32_t SrcReg = SrcVar->getRegNumTmp();
409 // Only consider source variables that have (so far) been assigned a
410 // register. That register must be one in the RegMask set, e.g.
411 // don't try to prefer the stack pointer as a result of the stacksave
412 // intrinsic.
413 if (SrcVar->hasRegTmp() && Iter.RegMask[SrcReg]) {
414 if (FindOverlap && !Iter.Free[SrcReg]) {
415 // Don't bother trying to enable AllowOverlap if the register is
416 // already free.
417 Iter.AllowOverlap = IsSingleDef && IsAssign &&
418 !overlapsDefs(Func, Iter.Cur, SrcVar);
419 }
420 if (Iter.AllowOverlap || Iter.Free[SrcReg]) {
421 Iter.Prefer = SrcVar;
422 Iter.PreferReg = SrcReg;
423 }
424 }
425 }
426 }
427 if (Verbose && Iter.Prefer) {
428 Ostream &Str = Ctx->getStrDump();
429 Str << "Initial Iter.Prefer=";
430 Iter.Prefer->dump(Func);
431 Str << " R=" << Iter.PreferReg
432 << " LIVE=" << Iter.Prefer->getLiveRange()
433 << " Overlap=" << Iter.AllowOverlap << "\n";
434 }
435 }
436 }
437 }
438
439 // Remove registers from the Free[] list where an Inactive range overlaps with
440 // the current range.
441 void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) {
442 for (const Variable *Item : Inactive) {
443 if (Item->rangeOverlaps(Iter.Cur)) {
444 int32_t RegNum = Item->getRegNumTmp();
445 // Don't assert(Free[RegNum]) because in theory (though probably never in
446 // practice) there could be two inactive variables that were marked with
447 // AllowOverlap.
448 Iter.Free[RegNum] = false;
449 // Disable AllowOverlap if an Inactive variable, which is not Prefer,
450 // shares Prefer's register, and has a definition within Cur's live
451 // range.
452 if (Iter.AllowOverlap && Item != Iter.Prefer &&
453 RegNum == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) {
454 Iter.AllowOverlap = false;
455 dumpDisableOverlap(Func, Item, "Inactive");
456 }
457 }
458 }
459 }
460
461 // Remove registers from the Free[] list where an Unhandled pre-colored range
462 // overlaps with the current range, and set those registers to infinite weight
463 // so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is
464 // an early exit check that turns a guaranteed O(N^2) algorithm into expected
465 // linear complexity.
466 void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) {
467 for (Variable *Item : reverse_range(UnhandledPrecolored)) {
468 assert(Item->hasReg());
469 if (Iter.Cur->rangeEndsBefore(Item))
470 break;
471 if (Item->rangeOverlaps(Iter.Cur)) {
472 int32_t ItemReg = Item->getRegNum(); // Note: not getRegNumTmp()
473 Iter.Weights[ItemReg].setWeight(RegWeight::Inf);
474 Iter.Free[ItemReg] = false;
475 Iter.PrecoloredUnhandledMask[ItemReg] = true;
476 // Disable Iter.AllowOverlap if the preferred register is one of these
477 // pre-colored unhandled overlapping ranges.
478 if (Iter.AllowOverlap && ItemReg == Iter.PreferReg) {
479 Iter.AllowOverlap = false;
480 dumpDisableOverlap(Func, Item, "PrecoloredUnhandled");
481 }
482 }
483 }
484 }
485
486 void LinearScan::allocatePrecoloredRegister(Variable *Cur) {
487 int32_t RegNum = Cur->getRegNum();
488 // RegNumTmp should have already been set above.
489 assert(Cur->getRegNumTmp() == RegNum);
490 dumpLiveRangeTrace("Precoloring ", Cur);
491 Active.push_back(Cur);
492 assert(RegUses[RegNum] >= 0);
493 ++RegUses[RegNum];
494 assert(!UnhandledPrecolored.empty());
495 assert(UnhandledPrecolored.back() == Cur);
496 UnhandledPrecolored.pop_back();
497 }
498
499 void LinearScan::allocatePreferredRegister(IterationState &Iter) {
500 Iter.Cur->setRegNumTmp(Iter.PreferReg);
501 dumpLiveRangeTrace("Preferring ", Iter.Cur);
502 assert(RegUses[Iter.PreferReg] >= 0);
503 ++RegUses[Iter.PreferReg];
504 Active.push_back(Iter.Cur);
505 }
506
507 void LinearScan::allocateFreeRegister(IterationState &Iter) {
508 int32_t RegNum = Iter.Free.find_first();
509 Iter.Cur->setRegNumTmp(RegNum);
510 dumpLiveRangeTrace("Allocating ", Iter.Cur);
511 assert(RegUses[RegNum] >= 0);
512 ++RegUses[RegNum];
513 Active.push_back(Iter.Cur);
514 }
515
516 void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
517 // Check Active ranges.
518 for (const Variable *Item : Active) {
519 assert(Item->rangeOverlaps(Iter.Cur));
520 int32_t RegNum = Item->getRegNumTmp();
521 assert(Item->hasRegTmp());
522 Iter.Weights[RegNum].addWeight(Item->getLiveRange().getWeight());
523 }
524 // Same as above, but check Inactive ranges instead of Active.
525 for (const Variable *Item : Inactive) {
526 int32_t RegNum = Item->getRegNumTmp();
527 assert(Item->hasRegTmp());
528 if (Item->rangeOverlaps(Iter.Cur))
529 Iter.Weights[RegNum].addWeight(Item->getLiveRange().getWeight());
530 }
531
532 // All the weights are now calculated. Find the register with smallest
533 // weight.
534 int32_t MinWeightIndex = Iter.RegMask.find_first();
535 // MinWeightIndex must be valid because of the initial RegMask.any() test.
536 assert(MinWeightIndex >= 0);
537 for (SizeT i = MinWeightIndex + 1; i < Iter.Weights.size(); ++i) {
538 if (Iter.RegMask[i] && Iter.Weights[i] < Iter.Weights[MinWeightIndex])
539 MinWeightIndex = i;
540 }
541
542 if (Iter.Cur->getLiveRange().getWeight() <= Iter.Weights[MinWeightIndex]) {
543 // Cur doesn't have priority over any other live ranges, so don't allocate
544 // any register to it, and move it to the Handled state.
545 Handled.push_back(Iter.Cur);
546 if (Iter.Cur->getLiveRange().getWeight().isInf()) {
547 if (Kind == RAK_Phi)
548 addSpillFill(Iter);
549 else
550 Func->setError("Unable to find a physical register for an "
551 "infinite-weight live range");
552 }
553 } else {
554 // Evict all live ranges in Active that register number MinWeightIndex is
555 // assigned to.
556 for (SizeT I = Active.size(); I > 0; --I) {
557 const SizeT Index = I - 1;
558 Variable *Item = Active[Index];
559 if (Item->getRegNumTmp() == MinWeightIndex) {
560 dumpLiveRangeTrace("Evicting ", Item);
561 --RegUses[MinWeightIndex];
562 assert(RegUses[MinWeightIndex] >= 0);
563 Item->setRegNumTmp(Variable::NoRegister);
564 moveItem(Active, Index, Handled);
565 }
566 }
567 // Do the same for Inactive.
568 for (SizeT I = Inactive.size(); I > 0; --I) {
569 const SizeT Index = I - 1;
570 Variable *Item = Inactive[Index];
571 // Note: The Item->rangeOverlaps(Cur) clause is not part of the
572 // description of AssignMemLoc() in the original paper. But there
573 // doesn't seem to be any need to evict an inactive live range that
574 // doesn't overlap with the live range currently being considered. It's
575 // especially bad if we would end up evicting an infinite-weight but
576 // currently-inactive live range. The most common situation for this
577 // would be a scratch register kill set for call instructions.
578 if (Item->getRegNumTmp() == MinWeightIndex &&
579 Item->rangeOverlaps(Iter.Cur)) {
580 dumpLiveRangeTrace("Evicting ", Item);
581 Item->setRegNumTmp(Variable::NoRegister);
582 moveItem(Inactive, Index, Handled);
583 }
584 }
585 // Assign the register to Cur.
586 Iter.Cur->setRegNumTmp(MinWeightIndex);
587 assert(RegUses[MinWeightIndex] >= 0);
588 ++RegUses[MinWeightIndex];
589 Active.push_back(Iter.Cur);
590 dumpLiveRangeTrace("Allocating ", Iter.Cur);
591 }
592 }
593
594 void LinearScan::assignFinalRegisters(
595 const llvm::SmallBitVector &RegMaskFull,
596 const llvm::SmallBitVector &PreDefinedRegisters, bool Randomized) {
597 const size_t NumRegisters = RegMaskFull.size();
598 llvm::SmallVector<int32_t, REGS_SIZE> Permutation(NumRegisters);
599 if (Randomized) {
600 // Create a random number generator for regalloc randomization. Merge
601 // function's sequence and Kind value as the Salt. Because regAlloc() is
602 // called twice under O2, the second time with RAK_Phi, we check
603 // Kind == RAK_Phi to determine the lowest-order bit to make sure the Salt
604 // is different.
605 uint64_t Salt =
606 (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u);
607 Func->getTarget()->makeRandomRegisterPermutation(
608 Permutation, PreDefinedRegisters | ~RegMaskFull, Salt);
609 }
610
611 // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof)
612 // for each Variable.
613 for (Variable *Item : Handled) {
614 int32_t RegNum = Item->getRegNumTmp();
615 int32_t AssignedRegNum = RegNum;
616
617 if (Randomized && Item->hasRegTmp() && !Item->hasReg()) {
618 AssignedRegNum = Permutation[RegNum];
619 }
620 if (Verbose) {
621 Ostream &Str = Ctx->getStrDump();
622 if (!Item->hasRegTmp()) {
623 Str << "Not assigning ";
624 Item->dump(Func);
625 Str << "\n";
626 } else {
627 Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning "
628 : "Assigning ")
629 << Func->getTarget()->getRegName(AssignedRegNum, IceType_i32)
630 << "(r" << AssignedRegNum << ") to ";
631 Item->dump(Func);
632 Str << "\n";
633 }
634 }
635 Item->setRegNum(AssignedRegNum);
636 }
637 }
638
639 // Implements the linear-scan algorithm. Based on "Linear Scan Register
640 // Allocation in the Context of SSA Form and Register Constraints" by Hanspeter
641 // Mössenböck and Michael Pfeiffer,
642 // ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF. This implementation is
643 // modified to take affinity into account and allow two interfering variables
644 // to share the same register in certain cases.
346 // 645 //
347 // Requires running Cfg::liveness(Liveness_Intervals) in 646 // Requires running Cfg::liveness(Liveness_Intervals) in preparation. Results
348 // preparation. Results are assigned to Variable::RegNum for each 647 // are assigned to Variable::RegNum for each Variable.
349 // Variable.
350 void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull, 648 void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
351 bool Randomized) { 649 bool Randomized) {
352 TimerMarker T(TimerStack::TT_linearScan, Func); 650 TimerMarker T(TimerStack::TT_linearScan, Func);
353 assert(RegMaskFull.any()); // Sanity check 651 assert(RegMaskFull.any()); // Sanity check
354 GlobalContext *Ctx = Func->getContext();
355 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_LinearScan);
356 if (Verbose) 652 if (Verbose)
357 Ctx->lockStr(); 653 Ctx->lockStr();
358 Func->resetCurrentNode(); 654 Func->resetCurrentNode();
359 VariablesMetadata *VMetadata = Func->getVMetadata();
360 const size_t NumRegisters = RegMaskFull.size(); 655 const size_t NumRegisters = RegMaskFull.size();
361 llvm::SmallBitVector PreDefinedRegisters(NumRegisters); 656 llvm::SmallBitVector PreDefinedRegisters(NumRegisters);
362 if (Randomized) { 657 if (Randomized) {
363 for (Variable *Var : UnhandledPrecolored) { 658 for (Variable *Var : UnhandledPrecolored) {
364 PreDefinedRegisters[Var->getRegNum()] = true; 659 PreDefinedRegisters[Var->getRegNum()] = true;
365 } 660 }
366 } 661 }
367 662
368 // Build a LiveRange representing the Kills list. 663 // Build a LiveRange representing the Kills list.
369 LiveRange KillsRange(Kills); 664 LiveRange KillsRange(Kills);
370 KillsRange.untrim(); 665 KillsRange.untrim();
371 666
372 // RegUses[I] is the number of live ranges (variables) that register 667 // Reset the register use count
373 // I is currently assigned to. It can be greater than 1 as a result 668 RegUses.resize(NumRegisters);
374 // of AllowOverlap inference below. 669 std::fill(RegUses.begin(), RegUses.end(), 0);
375 llvm::SmallVector<int, REGS_SIZE> RegUses(NumRegisters); 670
376 // Unhandled is already set to all ranges in increasing order of 671 // Unhandled is already set to all ranges in increasing order of start
377 // start points. 672 // points.
378 assert(Active.empty()); 673 assert(Active.empty());
379 assert(Inactive.empty()); 674 assert(Inactive.empty());
380 assert(Handled.empty()); 675 assert(Handled.empty());
381 const TargetLowering::RegSetMask RegsInclude = 676 const TargetLowering::RegSetMask RegsInclude =
382 TargetLowering::RegSet_CallerSave; 677 TargetLowering::RegSet_CallerSave;
383 const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None; 678 const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None;
384 const llvm::SmallBitVector KillsMask = 679 const llvm::SmallBitVector KillsMask =
385 Func->getTarget()->getRegisterSet(RegsInclude, RegsExclude); 680 Func->getTarget()->getRegisterSet(RegsInclude, RegsExclude);
386 681
682 // Allocate memory once outside the loop
683 IterationState Iter;
684 Iter.Weights.reserve(NumRegisters);
685 Iter.PrecoloredUnhandledMask.reserve(NumRegisters);
686
387 while (!Unhandled.empty()) { 687 while (!Unhandled.empty()) {
388 Variable *Cur = Unhandled.back(); 688 Iter.Cur = Unhandled.back();
389 Unhandled.pop_back(); 689 Unhandled.pop_back();
390 if (Verbose) { 690 dumpLiveRangeTrace("\nConsidering ", Iter.Cur);
391 Ostream &Str = Ctx->getStrDump(); 691 Iter.RegMask =
392 Str << "\nConsidering "; 692 RegMaskFull &
393 dumpLiveRange(Cur, Func); 693 Func->getTarget()->getRegisterSetForType(Iter.Cur->getType());
394 Str << "\n"; 694 KillsRange.trim(Iter.Cur->getLiveRange().getStart());
395 }
396 const llvm::SmallBitVector RegMask =
397 RegMaskFull & Func->getTarget()->getRegisterSetForType(Cur->getType());
398 KillsRange.trim(Cur->getLiveRange().getStart());
399 695
400 // Check for pre-colored ranges. If Cur is pre-colored, it 696 // Check for pre-colored ranges. If Cur is pre-colored, it definitely gets
401 // definitely gets that register. Previously processed live 697 // that register. Previously processed live ranges would have avoided that
402 // ranges would have avoided that register due to it being 698 // register due to it being pre-colored. Future processed live ranges won't
403 // pre-colored. Future processed live ranges won't evict that 699 // evict that register because the live range has infinite weight.
404 // register because the live range has infinite weight. 700 if (Iter.Cur->hasReg()) {
405 if (Cur->hasReg()) { 701 allocatePrecoloredRegister(Iter.Cur);
406 int32_t RegNum = Cur->getRegNum();
407 // RegNumTmp should have already been set above.
408 assert(Cur->getRegNumTmp() == RegNum);
409 if (Verbose) {
410 Ostream &Str = Ctx->getStrDump();
411 Str << "Precoloring ";
412 dumpLiveRange(Cur, Func);
413 Str << "\n";
414 }
415 Active.push_back(Cur);
416 assert(RegUses[RegNum] >= 0);
417 ++RegUses[RegNum];
418 assert(!UnhandledPrecolored.empty());
419 assert(UnhandledPrecolored.back() == Cur);
420 UnhandledPrecolored.pop_back();
421 continue; 702 continue;
422 } 703 }
423 704
424 // Check for active ranges that have expired or become inactive. 705 handleActiveRangeExpiredOrInactive(Iter.Cur);
425 for (SizeT I = Active.size(); I > 0; --I) { 706 handleInactiveRangeExpiredOrReactivated(Iter.Cur);
426 const SizeT Index = I - 1; 707
427 Variable *Item = Active[Index]; 708 // Calculate available registers into Free[].
428 Item->trimLiveRange(Cur->getLiveRange().getStart()); 709 Iter.Free = Iter.RegMask;
429 bool Moved = false; 710 for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
430 if (Item->rangeEndsBefore(Cur)) { 711 if (RegUses[i] > 0)
431 // Move Item from Active to Handled list. 712 Iter.Free[i] = false;
432 if (Verbose) {
433 Ostream &Str = Ctx->getStrDump();
434 Str << "Expiring ";
435 dumpLiveRange(Item, Func);
436 Str << "\n";
437 }
438 moveItem(Active, Index, Handled);
439 Moved = true;
440 } else if (!Item->rangeOverlapsStart(Cur)) {
441 // Move Item from Active to Inactive list.
442 if (Verbose) {
443 Ostream &Str = Ctx->getStrDump();
444 Str << "Inactivating ";
445 dumpLiveRange(Item, Func);
446 Str << "\n";
447 }
448 moveItem(Active, Index, Inactive);
449 Moved = true;
450 }
451 if (Moved) {
452 // Decrement Item from RegUses[].
453 assert(Item->hasRegTmp());
454 int32_t RegNum = Item->getRegNumTmp();
455 --RegUses[RegNum];
456 assert(RegUses[RegNum] >= 0);
457 }
458 } 713 }
459 714
460 // Check for inactive ranges that have expired or reactivated. 715 findRegisterPreference(Iter);
461 for (SizeT I = Inactive.size(); I > 0; --I) { 716 filterFreeWithInactiveRanges(Iter);
462 const SizeT Index = I - 1;
463 Variable *Item = Inactive[Index];
464 Item->trimLiveRange(Cur->getLiveRange().getStart());
465 if (Item->rangeEndsBefore(Cur)) {
466 // Move Item from Inactive to Handled list.
467 if (Verbose) {
468 Ostream &Str = Ctx->getStrDump();
469 Str << "Expiring ";
470 dumpLiveRange(Item, Func);
471 Str << "\n";
472 }
473 moveItem(Inactive, Index, Handled);
474 } else if (Item->rangeOverlapsStart(Cur)) {
475 // Move Item from Inactive to Active list.
476 if (Verbose) {
477 Ostream &Str = Ctx->getStrDump();
478 Str << "Reactivating ";
479 dumpLiveRange(Item, Func);
480 Str << "\n";
481 }
482 moveItem(Inactive, Index, Active);
483 // Increment Item in RegUses[].
484 assert(Item->hasRegTmp());
485 int32_t RegNum = Item->getRegNumTmp();
486 assert(RegUses[RegNum] >= 0);
487 ++RegUses[RegNum];
488 }
489 }
490 717
491 // Calculate available registers into Free[]. 718 // Disable AllowOverlap if an Active variable, which is not Prefer, shares
492 llvm::SmallBitVector Free = RegMask; 719 // Prefer's register, and has a definition within Cur's live range.
493 for (SizeT i = 0; i < RegMask.size(); ++i) { 720 if (Iter.AllowOverlap) {
494 if (RegUses[i] > 0)
495 Free[i] = false;
496 }
497
498 // Infer register preference and allowable overlap. Only form a
499 // preference when the current Variable has an unambiguous "first"
500 // definition. The preference is some source Variable of the
501 // defining instruction that either is assigned a register that is
502 // currently free, or that is assigned a register that is not free
503 // but overlap is allowed. Overlap is allowed when the Variable
504 // under consideration is single-definition, and its definition is
505 // a simple assignment - i.e., the register gets copied/aliased
506 // but is never modified. Furthermore, overlap is only allowed
507 // when preferred Variable definition instructions do not appear
508 // within the current Variable's live range.
509 Variable *Prefer = nullptr;
510 int32_t PreferReg = Variable::NoRegister;
511 bool AllowOverlap = false;
512 if (FindPreference) {
513 if (const Inst *DefInst = VMetadata->getFirstDefinition(Cur)) {
514 assert(DefInst->getDest() == Cur);
515 bool IsAssign = DefInst->isSimpleAssign();
516 bool IsSingleDef = !VMetadata->isMultiDef(Cur);
517 for (SizeT i = 0; i < DefInst->getSrcSize(); ++i) {
518 // TODO(stichnot): Iterate through the actual Variables of the
519 // instruction, not just the source operands. This could
520 // capture Load instructions, including address mode
521 // optimization, for Prefer (but not for AllowOverlap).
522 if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) {
523 int32_t SrcReg = SrcVar->getRegNumTmp();
524 // Only consider source variables that have (so far) been
525 // assigned a register. That register must be one in the
526 // RegMask set, e.g. don't try to prefer the stack pointer
527 // as a result of the stacksave intrinsic.
528 if (SrcVar->hasRegTmp() && RegMask[SrcReg]) {
529 if (FindOverlap && !Free[SrcReg]) {
530 // Don't bother trying to enable AllowOverlap if the
531 // register is already free.
532 AllowOverlap =
533 IsSingleDef && IsAssign && !overlapsDefs(Func, Cur, SrcVar);
534 }
535 if (AllowOverlap || Free[SrcReg]) {
536 Prefer = SrcVar;
537 PreferReg = SrcReg;
538 }
539 }
540 }
541 }
542 if (Verbose && Prefer) {
543 Ostream &Str = Ctx->getStrDump();
544 Str << "Initial Prefer=";
545 Prefer->dump(Func);
546 Str << " R=" << PreferReg << " LIVE=" << Prefer->getLiveRange()
547 << " Overlap=" << AllowOverlap << "\n";
548 }
549 }
550 }
551
552 // Remove registers from the Free[] list where an Inactive range
553 // overlaps with the current range.
554 for (const Variable *Item : Inactive) {
555 if (Item->rangeOverlaps(Cur)) {
556 int32_t RegNum = Item->getRegNumTmp();
557 // Don't assert(Free[RegNum]) because in theory (though
558 // probably never in practice) there could be two inactive
559 // variables that were marked with AllowOverlap.
560 Free[RegNum] = false;
561 // Disable AllowOverlap if an Inactive variable, which is not
562 // Prefer, shares Prefer's register, and has a definition
563 // within Cur's live range.
564 if (AllowOverlap && Item != Prefer && RegNum == PreferReg &&
565 overlapsDefs(Func, Cur, Item)) {
566 AllowOverlap = false;
567 dumpDisableOverlap(Func, Item, "Inactive");
568 }
569 }
570 }
571
572 // Disable AllowOverlap if an Active variable, which is not
573 // Prefer, shares Prefer's register, and has a definition within
574 // Cur's live range.
575 if (AllowOverlap) {
576 for (const Variable *Item : Active) { 721 for (const Variable *Item : Active) {
577 int32_t RegNum = Item->getRegNumTmp(); 722 int32_t RegNum = Item->getRegNumTmp();
578 if (Item != Prefer && RegNum == PreferReg && 723 if (Item != Iter.Prefer && RegNum == Iter.PreferReg &&
579 overlapsDefs(Func, Cur, Item)) { 724 overlapsDefs(Func, Iter.Cur, Item)) {
580 AllowOverlap = false; 725 Iter.AllowOverlap = false;
581 dumpDisableOverlap(Func, Item, "Active"); 726 dumpDisableOverlap(Func, Item, "Active");
582 } 727 }
583 } 728 }
584 } 729 }
585 730
586 llvm::SmallVector<RegWeight, REGS_SIZE> Weights(RegMask.size()); 731 Iter.Weights.resize(Iter.RegMask.size());
732 std::fill(Iter.Weights.begin(), Iter.Weights.end(), RegWeight());
587 733
588 // Remove registers from the Free[] list where an Unhandled 734 Iter.PrecoloredUnhandledMask.resize(Iter.RegMask.size());
589 // pre-colored range overlaps with the current range, and set those 735 Iter.PrecoloredUnhandledMask.reset();
590 // registers to infinite weight so that they aren't candidates for
591 // eviction. Cur->rangeEndsBefore(Item) is an early exit check
592 // that turns a guaranteed O(N^2) algorithm into expected linear
593 // complexity.
594 llvm::SmallBitVector PrecoloredUnhandledMask(RegMask.size());
595 // Note: PrecoloredUnhandledMask is only used for dumping.
596 for (Variable *Item : reverse_range(UnhandledPrecolored)) {
597 assert(Item->hasReg());
598 if (Cur->rangeEndsBefore(Item))
599 break;
600 if (Item->rangeOverlaps(Cur)) {
601 int32_t ItemReg = Item->getRegNum(); // Note: not getRegNumTmp()
602 Weights[ItemReg].setWeight(RegWeight::Inf);
603 Free[ItemReg] = false;
604 PrecoloredUnhandledMask[ItemReg] = true;
605 // Disable AllowOverlap if the preferred register is one of
606 // these pre-colored unhandled overlapping ranges.
607 if (AllowOverlap && ItemReg == PreferReg) {
608 AllowOverlap = false;
609 dumpDisableOverlap(Func, Item, "PrecoloredUnhandled");
610 }
611 }
612 }
613 736
614 // Remove scratch registers from the Free[] list, and mark their 737 filterFreeWithPrecoloredRanges(Iter);
615 // Weights[] as infinite, if KillsRange overlaps Cur's live range. 738
739 // Remove scratch registers from the Free[] list, and mark their Weights[]
740 // as infinite, if KillsRange overlaps Cur's live range.
616 constexpr bool UseTrimmed = true; 741 constexpr bool UseTrimmed = true;
617 if (Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) { 742 if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) {
618 Free.reset(KillsMask); 743 Iter.Free.reset(KillsMask);
619 for (int i = KillsMask.find_first(); i != -1; 744 for (int i = KillsMask.find_first(); i != -1;
620 i = KillsMask.find_next(i)) { 745 i = KillsMask.find_next(i)) {
621 Weights[i].setWeight(RegWeight::Inf); 746 Iter.Weights[i].setWeight(RegWeight::Inf);
622 if (PreferReg == i) 747 if (Iter.PreferReg == i)
623 AllowOverlap = false; 748 Iter.AllowOverlap = false;
624 } 749 }
625 } 750 }
626 751
627 // Print info about physical register availability. 752 // Print info about physical register availability.
628 if (Verbose) { 753 if (Verbose) {
629 Ostream &Str = Ctx->getStrDump(); 754 Ostream &Str = Ctx->getStrDump();
630 for (SizeT i = 0; i < RegMask.size(); ++i) { 755 for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
631 if (RegMask[i]) { 756 if (Iter.RegMask[i]) {
632 Str << Func->getTarget()->getRegName(i, IceType_i32) 757 Str << Func->getTarget()->getRegName(i, IceType_i32)
633 << "(U=" << RegUses[i] << ",F=" << Free[i] 758 << "(U=" << RegUses[i] << ",F=" << Iter.Free[i]
634 << ",P=" << PrecoloredUnhandledMask[i] << ") "; 759 << ",P=" << Iter.PrecoloredUnhandledMask[i] << ") ";
635 } 760 }
636 } 761 }
637 Str << "\n"; 762 Str << "\n";
638 } 763 }
639 764
640 if (Prefer && (AllowOverlap || Free[PreferReg])) { 765 if (Iter.Prefer && (Iter.AllowOverlap || Iter.Free[Iter.PreferReg])) {
641 // First choice: a preferred register that is either free or is 766 // First choice: a preferred register that is either free or is allowed
642 // allowed to overlap with its linked variable. 767 // to overlap with its linked variable.
643 Cur->setRegNumTmp(PreferReg); 768 allocatePreferredRegister(Iter);
644 if (Verbose) { 769 } else if (Iter.Free.any()) {
645 Ostream &Str = Ctx->getStrDump(); 770 // Second choice: any free register.
646 Str << "Preferring "; 771 allocateFreeRegister(Iter);
647 dumpLiveRange(Cur, Func);
648 Str << "\n";
649 }
650 assert(RegUses[PreferReg] >= 0);
651 ++RegUses[PreferReg];
652 Active.push_back(Cur);
653 } else if (Free.any()) {
654 // Second choice: any free register. TODO: After explicit
655 // affinity is considered, is there a strategy better than just
656 // picking the lowest-numbered available register?
657 int32_t RegNum = Free.find_first();
658 Cur->setRegNumTmp(RegNum);
659 if (Verbose) {
660 Ostream &Str = Ctx->getStrDump();
661 Str << "Allocating ";
662 dumpLiveRange(Cur, Func);
663 Str << "\n";
664 }
665 assert(RegUses[RegNum] >= 0);
666 ++RegUses[RegNum];
667 Active.push_back(Cur);
668 } else { 772 } else {
669 // Fallback: there are no free registers, so we look for the 773 // Fallback: there are no free registers, so we look for the
670 // lowest-weight register and see if Cur has higher weight. 774 // lowest-weight register and see if Cur has higher weight.
671 // Check Active ranges. 775 handleNoFreeRegisters(Iter);
672 for (const Variable *Item : Active) {
673 assert(Item->rangeOverlaps(Cur));
674 int32_t RegNum = Item->getRegNumTmp();
675 assert(Item->hasRegTmp());
676 Weights[RegNum].addWeight(Item->getLiveRange().getWeight());
677 }
678 // Same as above, but check Inactive ranges instead of Active.
679 for (const Variable *Item : Inactive) {
680 int32_t RegNum = Item->getRegNumTmp();
681 assert(Item->hasRegTmp());
682 if (Item->rangeOverlaps(Cur))
683 Weights[RegNum].addWeight(Item->getLiveRange().getWeight());
684 }
685
686 // All the weights are now calculated. Find the register with
687 // smallest weight.
688 int32_t MinWeightIndex = RegMask.find_first();
689 // MinWeightIndex must be valid because of the initial
690 // RegMask.any() test.
691 assert(MinWeightIndex >= 0);
692 for (SizeT i = MinWeightIndex + 1; i < Weights.size(); ++i) {
693 if (RegMask[i] && Weights[i] < Weights[MinWeightIndex])
694 MinWeightIndex = i;
695 }
696
697 if (Cur->getLiveRange().getWeight() <= Weights[MinWeightIndex]) {
698 // Cur doesn't have priority over any other live ranges, so
699 // don't allocate any register to it, and move it to the
700 // Handled state.
701 Handled.push_back(Cur);
702 if (Cur->getLiveRange().getWeight().isInf()) {
703 if (Kind == RAK_Phi)
704 addSpillFill(Cur, RegMask);
705 else
706 Func->setError("Unable to find a physical register for an "
707 "infinite-weight live range");
708 }
709 } else {
710 // Evict all live ranges in Active that register number
711 // MinWeightIndex is assigned to.
712 for (SizeT I = Active.size(); I > 0; --I) {
713 const SizeT Index = I - 1;
714 Variable *Item = Active[Index];
715 if (Item->getRegNumTmp() == MinWeightIndex) {
716 if (Verbose) {
717 Ostream &Str = Ctx->getStrDump();
718 Str << "Evicting ";
719 dumpLiveRange(Item, Func);
720 Str << "\n";
721 }
722 --RegUses[MinWeightIndex];
723 assert(RegUses[MinWeightIndex] >= 0);
724 Item->setRegNumTmp(Variable::NoRegister);
725 moveItem(Active, Index, Handled);
726 }
727 }
728 // Do the same for Inactive.
729 for (SizeT I = Inactive.size(); I > 0; --I) {
730 const SizeT Index = I - 1;
731 Variable *Item = Inactive[Index];
732 // Note: The Item->rangeOverlaps(Cur) clause is not part of the
733 // description of AssignMemLoc() in the original paper. But
734 // there doesn't seem to be any need to evict an inactive
735 // live range that doesn't overlap with the live range
736 // currently being considered. It's especially bad if we
737 // would end up evicting an infinite-weight but
738 // currently-inactive live range. The most common situation
739 // for this would be a scratch register kill set for call
740 // instructions.
741 if (Item->getRegNumTmp() == MinWeightIndex &&
742 Item->rangeOverlaps(Cur)) {
743 if (Verbose) {
744 Ostream &Str = Ctx->getStrDump();
745 Str << "Evicting ";
746 dumpLiveRange(Item, Func);
747 Str << "\n";
748 }
749 Item->setRegNumTmp(Variable::NoRegister);
750 moveItem(Inactive, Index, Handled);
751 }
752 }
753 // Assign the register to Cur.
754 Cur->setRegNumTmp(MinWeightIndex);
755 assert(RegUses[MinWeightIndex] >= 0);
756 ++RegUses[MinWeightIndex];
757 Active.push_back(Cur);
758 if (Verbose) {
759 Ostream &Str = Ctx->getStrDump();
760 Str << "Allocating ";
761 dumpLiveRange(Cur, Func);
762 Str << "\n";
763 }
764 }
765 } 776 }
766 dump(Func); 777 dump(Func);
767 } 778 }
779
768 // Move anything Active or Inactive to Handled for easier handling. 780 // Move anything Active or Inactive to Handled for easier handling.
769 for (Variable *I : Active) 781 Handled.insert(Handled.end(), Active.begin(), Active.end());
770 Handled.push_back(I);
771 Active.clear(); 782 Active.clear();
772 for (Variable *I : Inactive) 783 Handled.insert(Handled.end(), Inactive.begin(), Inactive.end());
773 Handled.push_back(I);
774 Inactive.clear(); 784 Inactive.clear();
775 dump(Func); 785 dump(Func);
776 786
777 llvm::SmallVector<int32_t, REGS_SIZE> Permutation(NumRegisters); 787 assignFinalRegisters(RegMaskFull, PreDefinedRegisters, Randomized);
778 if (Randomized) {
779 // Create a random number generator for regalloc randomization. Merge
780 // function's sequence and Kind value as the Salt. Because regAlloc()
781 // is called twice under O2, the second time with RAK_Phi, we check
782 // Kind == RAK_Phi to determine the lowest-order bit to make sure the
783 // Salt is different.
784 uint64_t Salt =
785 (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u);
786 Func->getTarget()->makeRandomRegisterPermutation(
787 Permutation, PreDefinedRegisters | ~RegMaskFull, Salt);
788 }
789 788
790 // Finish up by assigning RegNumTmp->RegNum (or a random permutation 789 // TODO: Consider running register allocation one more time, with infinite
791 // thereof) for each Variable. 790 // registers, for two reasons. First, evicted live ranges get a second chance
792 for (Variable *Item : Handled) { 791 // for a register. Second, it allows coalescing of stack slots. If there is
793 int32_t RegNum = Item->getRegNumTmp(); 792 // no time budget for the second register allocation run, each unallocated
794 int32_t AssignedRegNum = RegNum; 793 // variable just gets its own slot.
795
796 if (Randomized && Item->hasRegTmp() && !Item->hasReg()) {
797 AssignedRegNum = Permutation[RegNum];
798 }
799 if (Verbose) {
800 Ostream &Str = Ctx->getStrDump();
801 if (!Item->hasRegTmp()) {
802 Str << "Not assigning ";
803 Item->dump(Func);
804 Str << "\n";
805 } else {
806 Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning "
807 : "Assigning ")
808 << Func->getTarget()->getRegName(AssignedRegNum, IceType_i32)
809 << "(r" << AssignedRegNum << ") to ";
810 Item->dump(Func);
811 Str << "\n";
812 }
813 }
814 Item->setRegNum(AssignedRegNum);
815 }
816
817 // TODO: Consider running register allocation one more time, with
818 // infinite registers, for two reasons. First, evicted live ranges
819 // get a second chance for a register. Second, it allows coalescing
820 // of stack slots. If there is no time budget for the second
821 // register allocation run, each unallocated variable just gets its
822 // own slot.
823 // 794 //
824 // Another idea for coalescing stack slots is to initialize the 795 // Another idea for coalescing stack slots is to initialize the Unhandled
825 // Unhandled list with just the unallocated variables, saving time 796 // list with just the unallocated variables, saving time but not offering
826 // but not offering second-chance opportunities. 797 // second-chance opportunities.
827 798
828 if (Verbose) 799 if (Verbose)
829 Ctx->unlockStr(); 800 Ctx->unlockStr();
830 } 801 }
831 802
832 // ======================== Dump routines ======================== // 803 // ======================== Dump routines ======================== //
833 804
805 void LinearScan::dumpLiveRangeTrace(const char *Label, const Variable *Item) {
806 if (!BuildDefs::dump())
807 return;
808
809 if (Verbose) {
810 Ostream &Str = Ctx->getStrDump();
811 Str << Label;
812 dumpLiveRange(Item, Func);
813 Str << "\n";
814 }
815 }
816
834 void LinearScan::dump(Cfg *Func) const { 817 void LinearScan::dump(Cfg *Func) const {
835 if (!BuildDefs::dump()) 818 if (!BuildDefs::dump())
836 return; 819 return;
837 if (!Func->isVerbose(IceV_LinearScan)) 820 if (!Func->isVerbose(IceV_LinearScan))
838 return; 821 return;
839 Ostream &Str = Func->getContext()->getStrDump(); 822 Ostream &Str = Func->getContext()->getStrDump();
840 Func->resetCurrentNode(); 823 Func->resetCurrentNode();
841 Str << "**** Current regalloc state:\n"; 824 Str << "**** Current regalloc state:\n";
842 Str << "++++++ Handled:\n"; 825 Str << "++++++ Handled:\n";
843 for (const Variable *Item : Handled) { 826 for (const Variable *Item : Handled) {
(...skipping 11 matching lines...) Expand all
855 Str << "\n"; 838 Str << "\n";
856 } 839 }
857 Str << "++++++ Inactive:\n"; 840 Str << "++++++ Inactive:\n";
858 for (const Variable *Item : Inactive) { 841 for (const Variable *Item : Inactive) {
859 dumpLiveRange(Item, Func); 842 dumpLiveRange(Item, Func);
860 Str << "\n"; 843 Str << "\n";
861 } 844 }
862 } 845 }
863 846
864 } // end of namespace Ice 847 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceRegAlloc.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698