OLD | NEW |
1 //===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===// | 1 //===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
75 Var->dump(Func); | 75 Var->dump(Func); |
76 Str << " Range=" << Var->getLiveRange(); | 76 Str << " Range=" << Var->getLiveRange(); |
77 } | 77 } |
78 | 78 |
79 } // end of anonymous namespace | 79 } // end of anonymous namespace |
80 | 80 |
81 LinearScan::LinearScan(Cfg *Func) | 81 LinearScan::LinearScan(Cfg *Func) |
82 : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()), | 82 : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()), |
83 Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)) {} | 83 Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)) {} |
84 | 84 |
85 // Prepare for full register allocation of all variables. We depend on | 85 // Prepare for full register allocation of all variables. We depend on liveness |
86 // liveness analysis to have calculated live ranges. | 86 // analysis to have calculated live ranges. |
87 void LinearScan::initForGlobal() { | 87 void LinearScan::initForGlobal() { |
88 TimerMarker T(TimerStack::TT_initUnhandled, Func); | 88 TimerMarker T(TimerStack::TT_initUnhandled, Func); |
89 FindPreference = true; | 89 FindPreference = true; |
90 // For full register allocation, normally we want to enable FindOverlap | 90 // For full register allocation, normally we want to enable FindOverlap |
91 // (meaning we look for opportunities for two overlapping live ranges to | 91 // (meaning we look for opportunities for two overlapping live ranges to |
92 // safely share the same register). However, we disable it for phi-lowering | 92 // safely share the same register). However, we disable it for phi-lowering |
93 // register allocation since no overlap opportunities should be available and | 93 // register allocation since no overlap opportunities should be available and |
94 // it's more expensive to look for opportunities. | 94 // it's more expensive to look for opportunities. |
95 FindOverlap = (Kind != RAK_Phi); | 95 FindOverlap = (Kind != RAK_Phi); |
96 const VarList &Vars = Func->getVariables(); | 96 const VarList &Vars = Func->getVariables(); |
97 Unhandled.reserve(Vars.size()); | 97 Unhandled.reserve(Vars.size()); |
98 UnhandledPrecolored.reserve(Vars.size()); | 98 UnhandledPrecolored.reserve(Vars.size()); |
99 // Gather the live ranges of all variables and add them to the Unhandled set. | 99 // Gather the live ranges of all variables and add them to the Unhandled set. |
100 for (Variable *Var : Vars) { | 100 for (Variable *Var : Vars) { |
101 // Explicitly don't consider zero-weight variables, which are meant to be | 101 // Explicitly don't consider zero-weight variables, which are meant to be |
102 // spill slots. | 102 // spill slots. |
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
255 std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges); | 255 std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges); |
256 std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(), | 256 std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(), |
257 CompareRanges); | 257 CompareRanges); |
258 | 258 |
259 Handled.reserve(Unhandled.size()); | 259 Handled.reserve(Unhandled.size()); |
260 Inactive.reserve(Unhandled.size()); | 260 Inactive.reserve(Unhandled.size()); |
261 Active.reserve(Unhandled.size()); | 261 Active.reserve(Unhandled.size()); |
262 } | 262 } |
263 | 263 |
264 // This is called when Cur must be allocated a register but no registers are | 264 // This is called when Cur must be allocated a register but no registers are |
265 // available across Cur's live range. To handle this, we find a register that | 265 // available across Cur's live range. To handle this, we find a register that |
266 // is not explicitly used during Cur's live range, spill that register to a | 266 // is not explicitly used during Cur's live range, spill that register to a |
267 // stack location right before Cur's live range begins, and fill (reload) the | 267 // stack location right before Cur's live range begins, and fill (reload) the |
268 // register from the stack location right after Cur's live range ends. | 268 // register from the stack location right after Cur's live range ends. |
269 void LinearScan::addSpillFill(IterationState &Iter) { | 269 void LinearScan::addSpillFill(IterationState &Iter) { |
270 // Identify the actual instructions that begin and end Iter.Cur's live range. | 270 // Identify the actual instructions that begin and end Iter.Cur's live range. |
271 // Iterate through Iter.Cur's node's instruction list until we find the actual | 271 // Iterate through Iter.Cur's node's instruction list until we find the actual |
272 // instructions with instruction numbers corresponding to Iter.Cur's recorded | 272 // instructions with instruction numbers corresponding to Iter.Cur's recorded |
273 // live range endpoints. This sounds inefficient but shouldn't be a problem | 273 // live range endpoints. This sounds inefficient but shouldn't be a problem |
274 // in practice because: | 274 // in practice because: |
275 // (1) This function is almost never called in practice. | 275 // (1) This function is almost never called in practice. |
(...skipping 14 matching lines...) Expand all Loading... |
290 InstList::iterator SpillPoint = Insts.end(); | 290 InstList::iterator SpillPoint = Insts.end(); |
291 InstList::iterator FillPoint = Insts.end(); | 291 InstList::iterator FillPoint = Insts.end(); |
292 // Stop searching after we have found both the SpillPoint and the FillPoint. | 292 // Stop searching after we have found both the SpillPoint and the FillPoint. |
293 for (auto I = Insts.begin(), E = Insts.end(); | 293 for (auto I = Insts.begin(), E = Insts.end(); |
294 I != E && (SpillPoint == E || FillPoint == E); ++I) { | 294 I != E && (SpillPoint == E || FillPoint == E); ++I) { |
295 if (I->getNumber() == Start) | 295 if (I->getNumber() == Start) |
296 SpillPoint = I; | 296 SpillPoint = I; |
297 if (I->getNumber() == End) | 297 if (I->getNumber() == End) |
298 FillPoint = I; | 298 FillPoint = I; |
299 if (SpillPoint != E) { | 299 if (SpillPoint != E) { |
300 // Remove from RegMask any physical registers referenced during Cur's live | 300 // Remove from RegMask any physical registers referenced during Cur's |
301 // range. Start looking after SpillPoint gets set, i.e. once Cur's live | 301 // live range. Start looking after SpillPoint gets set, i.e. once Cur's |
302 // range begins. | 302 // live range begins. |
303 FOREACH_VAR_IN_INST(Var, *I) { | 303 FOREACH_VAR_IN_INST(Var, *I) { |
304 if (!Var->hasRegTmp()) | 304 if (!Var->hasRegTmp()) |
305 continue; | 305 continue; |
306 const llvm::SmallBitVector &Aliases = *RegAliases[Var->getRegNumTmp()]; | 306 const llvm::SmallBitVector &Aliases = *RegAliases[Var->getRegNumTmp()]; |
307 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; | 307 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; |
308 RegAlias = Aliases.find_next(RegAlias)) { | 308 RegAlias = Aliases.find_next(RegAlias)) { |
309 Iter.RegMask[RegAlias] = false; | 309 Iter.RegMask[RegAlias] = false; |
310 } | 310 } |
311 } | 311 } |
312 } | 312 } |
313 } | 313 } |
314 assert(SpillPoint != Insts.end()); | 314 assert(SpillPoint != Insts.end()); |
315 assert(FillPoint != Insts.end()); | 315 assert(FillPoint != Insts.end()); |
316 ++FillPoint; | 316 ++FillPoint; |
317 // TODO(stichnot): Randomize instead of find_first(). | 317 // TODO(stichnot): Randomize instead of find_first(). |
318 int32_t RegNum = Iter.RegMask.find_first(); | 318 int32_t RegNum = Iter.RegMask.find_first(); |
319 assert(RegNum != -1); | 319 assert(RegNum != -1); |
320 Iter.Cur->setRegNumTmp(RegNum); | 320 Iter.Cur->setRegNumTmp(RegNum); |
321 Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType()); | 321 Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType()); |
322 // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc | 322 // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that |
323 // is correctly identified as !isMultiBlock(), reducing stack frame size. | 323 // SpillLoc is correctly identified as !isMultiBlock(), reducing stack frame |
| 324 // size. |
324 Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType()); | 325 Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType()); |
325 // Add "reg=FakeDef;spill=reg" before SpillPoint | 326 // Add "reg=FakeDef;spill=reg" before SpillPoint |
326 Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg)); | 327 Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg)); |
327 Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg)); | 328 Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg)); |
328 // add "reg=spill;FakeUse(reg)" before FillPoint | 329 // add "reg=spill;FakeUse(reg)" before FillPoint |
329 Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc)); | 330 Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc)); |
330 Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg)); | 331 Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg)); |
331 } | 332 } |
332 | 333 |
333 void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) { | 334 void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) { |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
406 bool IsAssign = DefInst->isSimpleAssign(); | 407 bool IsAssign = DefInst->isSimpleAssign(); |
407 bool IsSingleDef = !VMetadata->isMultiDef(Iter.Cur); | 408 bool IsSingleDef = !VMetadata->isMultiDef(Iter.Cur); |
408 for (SizeT i = 0; i < DefInst->getSrcSize(); ++i) { | 409 for (SizeT i = 0; i < DefInst->getSrcSize(); ++i) { |
409 // TODO(stichnot): Iterate through the actual Variables of the | 410 // TODO(stichnot): Iterate through the actual Variables of the |
410 // instruction, not just the source operands. This could capture Load | 411 // instruction, not just the source operands. This could capture Load |
411 // instructions, including address mode optimization, for Prefer (but | 412 // instructions, including address mode optimization, for Prefer (but |
412 // not for AllowOverlap). | 413 // not for AllowOverlap). |
413 if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) { | 414 if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) { |
414 int32_t SrcReg = SrcVar->getRegNumTmp(); | 415 int32_t SrcReg = SrcVar->getRegNumTmp(); |
415 // Only consider source variables that have (so far) been assigned a | 416 // Only consider source variables that have (so far) been assigned a |
416 // register. That register must be one in the RegMask set, e.g. | 417 // register. That register must be one in the RegMask set, e.g. don't |
417 // don't try to prefer the stack pointer as a result of the stacksave | 418 // try to prefer the stack pointer as a result of the stacksave |
418 // intrinsic. | 419 // intrinsic. |
419 if (SrcVar->hasRegTmp() && Iter.RegMask[SrcReg]) { | 420 if (SrcVar->hasRegTmp() && Iter.RegMask[SrcReg]) { |
420 if (FindOverlap && !Iter.Free[SrcReg]) { | 421 if (FindOverlap && !Iter.Free[SrcReg]) { |
421 // Don't bother trying to enable AllowOverlap if the register is | 422 // Don't bother trying to enable AllowOverlap if the register is |
422 // already free. | 423 // already free. |
423 Iter.AllowOverlap = IsSingleDef && IsAssign && | 424 Iter.AllowOverlap = IsSingleDef && IsAssign && |
424 !overlapsDefs(Func, Iter.Cur, SrcVar); | 425 !overlapsDefs(Func, Iter.Cur, SrcVar); |
425 } | 426 } |
426 if (Iter.AllowOverlap || Iter.Free[SrcReg]) { | 427 if (Iter.AllowOverlap || Iter.Free[SrcReg]) { |
427 Iter.Prefer = SrcVar; | 428 Iter.Prefer = SrcVar; |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
462 RegAlias == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) { | 463 RegAlias == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) { |
463 Iter.AllowOverlap = false; | 464 Iter.AllowOverlap = false; |
464 dumpDisableOverlap(Func, Item, "Inactive"); | 465 dumpDisableOverlap(Func, Item, "Inactive"); |
465 } | 466 } |
466 } | 467 } |
467 } | 468 } |
468 } | 469 } |
469 | 470 |
470 // Remove registers from the Free[] list where an Unhandled pre-colored range | 471 // Remove registers from the Free[] list where an Unhandled pre-colored range |
471 // overlaps with the current range, and set those registers to infinite weight | 472 // overlaps with the current range, and set those registers to infinite weight |
472 // so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is | 473 // so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is |
473 // an early exit check that turns a guaranteed O(N^2) algorithm into expected | 474 // an early exit check that turns a guaranteed O(N^2) algorithm into expected |
474 // linear complexity. | 475 // linear complexity. |
475 void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) { | 476 void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) { |
476 for (Variable *Item : reverse_range(UnhandledPrecolored)) { | 477 for (Variable *Item : reverse_range(UnhandledPrecolored)) { |
477 assert(Item->hasReg()); | 478 assert(Item->hasReg()); |
478 if (Iter.Cur->rangeEndsBefore(Item)) | 479 if (Iter.Cur->rangeEndsBefore(Item)) |
479 break; | 480 break; |
480 if (Item->rangeOverlaps(Iter.Cur)) { | 481 if (Item->rangeOverlaps(Iter.Cur)) { |
481 const llvm::SmallBitVector &Aliases = | 482 const llvm::SmallBitVector &Aliases = |
482 *RegAliases[Item->getRegNum()]; // Note: not getRegNumTmp() | 483 *RegAliases[Item->getRegNum()]; // Note: not getRegNumTmp() |
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
603 } | 604 } |
604 Item->setRegNumTmp(Variable::NoRegister); | 605 Item->setRegNumTmp(Variable::NoRegister); |
605 moveItem(Active, Index, Handled); | 606 moveItem(Active, Index, Handled); |
606 } | 607 } |
607 } | 608 } |
608 // Do the same for Inactive. | 609 // Do the same for Inactive. |
609 for (SizeT I = Inactive.size(); I > 0; --I) { | 610 for (SizeT I = Inactive.size(); I > 0; --I) { |
610 const SizeT Index = I - 1; | 611 const SizeT Index = I - 1; |
611 Variable *Item = Inactive[Index]; | 612 Variable *Item = Inactive[Index]; |
612 // Note: The Item->rangeOverlaps(Cur) clause is not part of the | 613 // Note: The Item->rangeOverlaps(Cur) clause is not part of the |
613 // description of AssignMemLoc() in the original paper. But there | 614 // description of AssignMemLoc() in the original paper. But there doesn't |
614 // doesn't seem to be any need to evict an inactive live range that | 615 // seem to be any need to evict an inactive live range that doesn't |
615 // doesn't overlap with the live range currently being considered. It's | 616 // overlap with the live range currently being considered. It's |
616 // especially bad if we would end up evicting an infinite-weight but | 617 // especially bad if we would end up evicting an infinite-weight but |
617 // currently-inactive live range. The most common situation for this | 618 // currently-inactive live range. The most common situation for this |
618 // would be a scratch register kill set for call instructions. | 619 // would be a scratch register kill set for call instructions. |
619 if (Item->getRegNumTmp() == MinWeightIndex && | 620 if (Item->getRegNumTmp() == MinWeightIndex && |
620 Item->rangeOverlaps(Iter.Cur)) { | 621 Item->rangeOverlaps(Iter.Cur)) { |
621 dumpLiveRangeTrace("Evicting ", Item); | 622 dumpLiveRangeTrace("Evicting ", Item); |
622 Item->setRegNumTmp(Variable::NoRegister); | 623 Item->setRegNumTmp(Variable::NoRegister); |
623 moveItem(Inactive, Index, Handled); | 624 moveItem(Inactive, Index, Handled); |
624 } | 625 } |
625 } | 626 } |
(...skipping 11 matching lines...) Expand all Loading... |
637 } | 638 } |
638 | 639 |
639 void LinearScan::assignFinalRegisters( | 640 void LinearScan::assignFinalRegisters( |
640 const llvm::SmallBitVector &RegMaskFull, | 641 const llvm::SmallBitVector &RegMaskFull, |
641 const llvm::SmallBitVector &PreDefinedRegisters, bool Randomized) { | 642 const llvm::SmallBitVector &PreDefinedRegisters, bool Randomized) { |
642 const size_t NumRegisters = RegMaskFull.size(); | 643 const size_t NumRegisters = RegMaskFull.size(); |
643 llvm::SmallVector<int32_t, REGS_SIZE> Permutation(NumRegisters); | 644 llvm::SmallVector<int32_t, REGS_SIZE> Permutation(NumRegisters); |
644 if (Randomized) { | 645 if (Randomized) { |
645 // Create a random number generator for regalloc randomization. Merge | 646 // Create a random number generator for regalloc randomization. Merge |
646 // function's sequence and Kind value as the Salt. Because regAlloc() is | 647 // function's sequence and Kind value as the Salt. Because regAlloc() is |
647 // called twice under O2, the second time with RAK_Phi, we check | 648 // called twice under O2, the second time with RAK_Phi, we check Kind == |
648 // Kind == RAK_Phi to determine the lowest-order bit to make sure the Salt | 649 // RAK_Phi to determine the lowest-order bit to make sure the Salt is |
649 // is different. | 650 // different. |
650 uint64_t Salt = | 651 uint64_t Salt = |
651 (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u); | 652 (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u); |
652 Target->makeRandomRegisterPermutation( | 653 Target->makeRandomRegisterPermutation( |
653 Permutation, PreDefinedRegisters | ~RegMaskFull, Salt); | 654 Permutation, PreDefinedRegisters | ~RegMaskFull, Salt); |
654 } | 655 } |
655 | 656 |
656 // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof) | 657 // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof) |
657 // for each Variable. | 658 // for each Variable. |
658 for (Variable *Item : Handled) { | 659 for (Variable *Item : Handled) { |
659 int32_t RegNum = Item->getRegNumTmp(); | 660 int32_t RegNum = Item->getRegNumTmp(); |
(...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
882 Str << "\n"; | 883 Str << "\n"; |
883 } | 884 } |
884 Str << "++++++ Inactive:\n"; | 885 Str << "++++++ Inactive:\n"; |
885 for (const Variable *Item : Inactive) { | 886 for (const Variable *Item : Inactive) { |
886 dumpLiveRange(Item, Func); | 887 dumpLiveRange(Item, Func); |
887 Str << "\n"; | 888 Str << "\n"; |
888 } | 889 } |
889 } | 890 } |
890 | 891 |
891 } // end of namespace Ice | 892 } // end of namespace Ice |
OLD | NEW |