| OLD | NEW |
| 1 //===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===// | 1 //===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 75 Var->dump(Func); | 75 Var->dump(Func); |
| 76 Str << " Range=" << Var->getLiveRange(); | 76 Str << " Range=" << Var->getLiveRange(); |
| 77 } | 77 } |
| 78 | 78 |
| 79 } // end of anonymous namespace | 79 } // end of anonymous namespace |
| 80 | 80 |
| 81 LinearScan::LinearScan(Cfg *Func) | 81 LinearScan::LinearScan(Cfg *Func) |
| 82 : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()), | 82 : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()), |
| 83 Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)) {} | 83 Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)) {} |
| 84 | 84 |
| 85 // Prepare for full register allocation of all variables. We depend on | 85 // Prepare for full register allocation of all variables. We depend on liveness |
| 86 // liveness analysis to have calculated live ranges. | 86 // analysis to have calculated live ranges. |
| 87 void LinearScan::initForGlobal() { | 87 void LinearScan::initForGlobal() { |
| 88 TimerMarker T(TimerStack::TT_initUnhandled, Func); | 88 TimerMarker T(TimerStack::TT_initUnhandled, Func); |
| 89 FindPreference = true; | 89 FindPreference = true; |
| 90 // For full register allocation, normally we want to enable FindOverlap | 90 // For full register allocation, normally we want to enable FindOverlap |
| 91 // (meaning we look for opportunities for two overlapping live ranges to | 91 // (meaning we look for opportunities for two overlapping live ranges to |
| 92 // safely share the same register). However, we disable it for phi-lowering | 92 // safely share the same register). However, we disable it for phi-lowering |
| 93 // register allocation since no overlap opportunities should be available and | 93 // register allocation since no overlap opportunities should be available and |
| 94 // it's more expensive to look for opportunities. | 94 // it's more expensive to look for opportunities. |
| 95 FindOverlap = (Kind != RAK_Phi); | 95 FindOverlap = (Kind != RAK_Phi); |
| 96 const VarList &Vars = Func->getVariables(); | 96 const VarList &Vars = Func->getVariables(); |
| 97 Unhandled.reserve(Vars.size()); | 97 Unhandled.reserve(Vars.size()); |
| 98 UnhandledPrecolored.reserve(Vars.size()); | 98 UnhandledPrecolored.reserve(Vars.size()); |
| 99 // Gather the live ranges of all variables and add them to the Unhandled set. | 99 // Gather the live ranges of all variables and add them to the Unhandled set. |
| 100 for (Variable *Var : Vars) { | 100 for (Variable *Var : Vars) { |
| 101 // Explicitly don't consider zero-weight variables, which are meant to be | 101 // Explicitly don't consider zero-weight variables, which are meant to be |
| 102 // spill slots. | 102 // spill slots. |
| (...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 255 std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges); | 255 std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges); |
| 256 std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(), | 256 std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(), |
| 257 CompareRanges); | 257 CompareRanges); |
| 258 | 258 |
| 259 Handled.reserve(Unhandled.size()); | 259 Handled.reserve(Unhandled.size()); |
| 260 Inactive.reserve(Unhandled.size()); | 260 Inactive.reserve(Unhandled.size()); |
| 261 Active.reserve(Unhandled.size()); | 261 Active.reserve(Unhandled.size()); |
| 262 } | 262 } |
| 263 | 263 |
| 264 // This is called when Cur must be allocated a register but no registers are | 264 // This is called when Cur must be allocated a register but no registers are |
| 265 // available across Cur's live range. To handle this, we find a register that | 265 // available across Cur's live range. To handle this, we find a register that |
| 266 // is not explicitly used during Cur's live range, spill that register to a | 266 // is not explicitly used during Cur's live range, spill that register to a |
| 267 // stack location right before Cur's live range begins, and fill (reload) the | 267 // stack location right before Cur's live range begins, and fill (reload) the |
| 268 // register from the stack location right after Cur's live range ends. | 268 // register from the stack location right after Cur's live range ends. |
| 269 void LinearScan::addSpillFill(IterationState &Iter) { | 269 void LinearScan::addSpillFill(IterationState &Iter) { |
| 270 // Identify the actual instructions that begin and end Iter.Cur's live range. | 270 // Identify the actual instructions that begin and end Iter.Cur's live range. |
| 271 // Iterate through Iter.Cur's node's instruction list until we find the actual | 271 // Iterate through Iter.Cur's node's instruction list until we find the actual |
| 272 // instructions with instruction numbers corresponding to Iter.Cur's recorded | 272 // instructions with instruction numbers corresponding to Iter.Cur's recorded |
| 273 // live range endpoints. This sounds inefficient but shouldn't be a problem | 273 // live range endpoints. This sounds inefficient but shouldn't be a problem |
| 274 // in practice because: | 274 // in practice because: |
| 275 // (1) This function is almost never called in practice. | 275 // (1) This function is almost never called in practice. |
| (...skipping 14 matching lines...) Expand all Loading... |
| 290 InstList::iterator SpillPoint = Insts.end(); | 290 InstList::iterator SpillPoint = Insts.end(); |
| 291 InstList::iterator FillPoint = Insts.end(); | 291 InstList::iterator FillPoint = Insts.end(); |
| 292 // Stop searching after we have found both the SpillPoint and the FillPoint. | 292 // Stop searching after we have found both the SpillPoint and the FillPoint. |
| 293 for (auto I = Insts.begin(), E = Insts.end(); | 293 for (auto I = Insts.begin(), E = Insts.end(); |
| 294 I != E && (SpillPoint == E || FillPoint == E); ++I) { | 294 I != E && (SpillPoint == E || FillPoint == E); ++I) { |
| 295 if (I->getNumber() == Start) | 295 if (I->getNumber() == Start) |
| 296 SpillPoint = I; | 296 SpillPoint = I; |
| 297 if (I->getNumber() == End) | 297 if (I->getNumber() == End) |
| 298 FillPoint = I; | 298 FillPoint = I; |
| 299 if (SpillPoint != E) { | 299 if (SpillPoint != E) { |
| 300 // Remove from RegMask any physical registers referenced during Cur's live | 300 // Remove from RegMask any physical registers referenced during Cur's |
| 301 // range. Start looking after SpillPoint gets set, i.e. once Cur's live | 301 // live range. Start looking after SpillPoint gets set, i.e. once Cur's |
| 302 // range begins. | 302 // live range begins. |
| 303 FOREACH_VAR_IN_INST(Var, *I) { | 303 FOREACH_VAR_IN_INST(Var, *I) { |
| 304 if (!Var->hasRegTmp()) | 304 if (!Var->hasRegTmp()) |
| 305 continue; | 305 continue; |
| 306 const llvm::SmallBitVector &Aliases = *RegAliases[Var->getRegNumTmp()]; | 306 const llvm::SmallBitVector &Aliases = *RegAliases[Var->getRegNumTmp()]; |
| 307 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; | 307 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; |
| 308 RegAlias = Aliases.find_next(RegAlias)) { | 308 RegAlias = Aliases.find_next(RegAlias)) { |
| 309 Iter.RegMask[RegAlias] = false; | 309 Iter.RegMask[RegAlias] = false; |
| 310 } | 310 } |
| 311 } | 311 } |
| 312 } | 312 } |
| 313 } | 313 } |
| 314 assert(SpillPoint != Insts.end()); | 314 assert(SpillPoint != Insts.end()); |
| 315 assert(FillPoint != Insts.end()); | 315 assert(FillPoint != Insts.end()); |
| 316 ++FillPoint; | 316 ++FillPoint; |
| 317 // TODO(stichnot): Randomize instead of find_first(). | 317 // TODO(stichnot): Randomize instead of find_first(). |
| 318 int32_t RegNum = Iter.RegMask.find_first(); | 318 int32_t RegNum = Iter.RegMask.find_first(); |
| 319 assert(RegNum != -1); | 319 assert(RegNum != -1); |
| 320 Iter.Cur->setRegNumTmp(RegNum); | 320 Iter.Cur->setRegNumTmp(RegNum); |
| 321 Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType()); | 321 Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType()); |
| 322 // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc | 322 // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that |
| 323 // is correctly identified as !isMultiBlock(), reducing stack frame size. | 323 // SpillLoc is correctly identified as !isMultiBlock(), reducing stack frame |
| 324 // size. |
| 324 Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType()); | 325 Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType()); |
| 325 // Add "reg=FakeDef;spill=reg" before SpillPoint | 326 // Add "reg=FakeDef;spill=reg" before SpillPoint |
| 326 Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg)); | 327 Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg)); |
| 327 Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg)); | 328 Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg)); |
| 328 // add "reg=spill;FakeUse(reg)" before FillPoint | 329 // add "reg=spill;FakeUse(reg)" before FillPoint |
| 329 Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc)); | 330 Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc)); |
| 330 Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg)); | 331 Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg)); |
| 331 } | 332 } |
| 332 | 333 |
| 333 void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) { | 334 void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) { |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 406 bool IsAssign = DefInst->isSimpleAssign(); | 407 bool IsAssign = DefInst->isSimpleAssign(); |
| 407 bool IsSingleDef = !VMetadata->isMultiDef(Iter.Cur); | 408 bool IsSingleDef = !VMetadata->isMultiDef(Iter.Cur); |
| 408 for (SizeT i = 0; i < DefInst->getSrcSize(); ++i) { | 409 for (SizeT i = 0; i < DefInst->getSrcSize(); ++i) { |
| 409 // TODO(stichnot): Iterate through the actual Variables of the | 410 // TODO(stichnot): Iterate through the actual Variables of the |
| 410 // instruction, not just the source operands. This could capture Load | 411 // instruction, not just the source operands. This could capture Load |
| 411 // instructions, including address mode optimization, for Prefer (but | 412 // instructions, including address mode optimization, for Prefer (but |
| 412 // not for AllowOverlap). | 413 // not for AllowOverlap). |
| 413 if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) { | 414 if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) { |
| 414 int32_t SrcReg = SrcVar->getRegNumTmp(); | 415 int32_t SrcReg = SrcVar->getRegNumTmp(); |
| 415 // Only consider source variables that have (so far) been assigned a | 416 // Only consider source variables that have (so far) been assigned a |
| 416 // register. That register must be one in the RegMask set, e.g. | 417 // register. That register must be one in the RegMask set, e.g. don't |
| 417 // don't try to prefer the stack pointer as a result of the stacksave | 418 // try to prefer the stack pointer as a result of the stacksave |
| 418 // intrinsic. | 419 // intrinsic. |
| 419 if (SrcVar->hasRegTmp() && Iter.RegMask[SrcReg]) { | 420 if (SrcVar->hasRegTmp() && Iter.RegMask[SrcReg]) { |
| 420 if (FindOverlap && !Iter.Free[SrcReg]) { | 421 if (FindOverlap && !Iter.Free[SrcReg]) { |
| 421 // Don't bother trying to enable AllowOverlap if the register is | 422 // Don't bother trying to enable AllowOverlap if the register is |
| 422 // already free. | 423 // already free. |
| 423 Iter.AllowOverlap = IsSingleDef && IsAssign && | 424 Iter.AllowOverlap = IsSingleDef && IsAssign && |
| 424 !overlapsDefs(Func, Iter.Cur, SrcVar); | 425 !overlapsDefs(Func, Iter.Cur, SrcVar); |
| 425 } | 426 } |
| 426 if (Iter.AllowOverlap || Iter.Free[SrcReg]) { | 427 if (Iter.AllowOverlap || Iter.Free[SrcReg]) { |
| 427 Iter.Prefer = SrcVar; | 428 Iter.Prefer = SrcVar; |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 462 RegAlias == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) { | 463 RegAlias == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) { |
| 463 Iter.AllowOverlap = false; | 464 Iter.AllowOverlap = false; |
| 464 dumpDisableOverlap(Func, Item, "Inactive"); | 465 dumpDisableOverlap(Func, Item, "Inactive"); |
| 465 } | 466 } |
| 466 } | 467 } |
| 467 } | 468 } |
| 468 } | 469 } |
| 469 | 470 |
| 470 // Remove registers from the Free[] list where an Unhandled pre-colored range | 471 // Remove registers from the Free[] list where an Unhandled pre-colored range |
| 471 // overlaps with the current range, and set those registers to infinite weight | 472 // overlaps with the current range, and set those registers to infinite weight |
| 472 // so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is | 473 // so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is |
| 473 // an early exit check that turns a guaranteed O(N^2) algorithm into expected | 474 // an early exit check that turns a guaranteed O(N^2) algorithm into expected |
| 474 // linear complexity. | 475 // linear complexity. |
| 475 void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) { | 476 void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) { |
| 476 for (Variable *Item : reverse_range(UnhandledPrecolored)) { | 477 for (Variable *Item : reverse_range(UnhandledPrecolored)) { |
| 477 assert(Item->hasReg()); | 478 assert(Item->hasReg()); |
| 478 if (Iter.Cur->rangeEndsBefore(Item)) | 479 if (Iter.Cur->rangeEndsBefore(Item)) |
| 479 break; | 480 break; |
| 480 if (Item->rangeOverlaps(Iter.Cur)) { | 481 if (Item->rangeOverlaps(Iter.Cur)) { |
| 481 const llvm::SmallBitVector &Aliases = | 482 const llvm::SmallBitVector &Aliases = |
| 482 *RegAliases[Item->getRegNum()]; // Note: not getRegNumTmp() | 483 *RegAliases[Item->getRegNum()]; // Note: not getRegNumTmp() |
| (...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 603 } | 604 } |
| 604 Item->setRegNumTmp(Variable::NoRegister); | 605 Item->setRegNumTmp(Variable::NoRegister); |
| 605 moveItem(Active, Index, Handled); | 606 moveItem(Active, Index, Handled); |
| 606 } | 607 } |
| 607 } | 608 } |
| 608 // Do the same for Inactive. | 609 // Do the same for Inactive. |
| 609 for (SizeT I = Inactive.size(); I > 0; --I) { | 610 for (SizeT I = Inactive.size(); I > 0; --I) { |
| 610 const SizeT Index = I - 1; | 611 const SizeT Index = I - 1; |
| 611 Variable *Item = Inactive[Index]; | 612 Variable *Item = Inactive[Index]; |
| 612 // Note: The Item->rangeOverlaps(Cur) clause is not part of the | 613 // Note: The Item->rangeOverlaps(Cur) clause is not part of the |
| 613 // description of AssignMemLoc() in the original paper. But there | 614 // description of AssignMemLoc() in the original paper. But there doesn't |
| 614 // doesn't seem to be any need to evict an inactive live range that | 615 // seem to be any need to evict an inactive live range that doesn't |
| 615 // doesn't overlap with the live range currently being considered. It's | 616 // overlap with the live range currently being considered. It's |
| 616 // especially bad if we would end up evicting an infinite-weight but | 617 // especially bad if we would end up evicting an infinite-weight but |
| 617 // currently-inactive live range. The most common situation for this | 618 // currently-inactive live range. The most common situation for this |
| 618 // would be a scratch register kill set for call instructions. | 619 // would be a scratch register kill set for call instructions. |
| 619 if (Item->getRegNumTmp() == MinWeightIndex && | 620 if (Item->getRegNumTmp() == MinWeightIndex && |
| 620 Item->rangeOverlaps(Iter.Cur)) { | 621 Item->rangeOverlaps(Iter.Cur)) { |
| 621 dumpLiveRangeTrace("Evicting ", Item); | 622 dumpLiveRangeTrace("Evicting ", Item); |
| 622 Item->setRegNumTmp(Variable::NoRegister); | 623 Item->setRegNumTmp(Variable::NoRegister); |
| 623 moveItem(Inactive, Index, Handled); | 624 moveItem(Inactive, Index, Handled); |
| 624 } | 625 } |
| 625 } | 626 } |
| (...skipping 11 matching lines...) Expand all Loading... |
| 637 } | 638 } |
| 638 | 639 |
| 639 void LinearScan::assignFinalRegisters( | 640 void LinearScan::assignFinalRegisters( |
| 640 const llvm::SmallBitVector &RegMaskFull, | 641 const llvm::SmallBitVector &RegMaskFull, |
| 641 const llvm::SmallBitVector &PreDefinedRegisters, bool Randomized) { | 642 const llvm::SmallBitVector &PreDefinedRegisters, bool Randomized) { |
| 642 const size_t NumRegisters = RegMaskFull.size(); | 643 const size_t NumRegisters = RegMaskFull.size(); |
| 643 llvm::SmallVector<int32_t, REGS_SIZE> Permutation(NumRegisters); | 644 llvm::SmallVector<int32_t, REGS_SIZE> Permutation(NumRegisters); |
| 644 if (Randomized) { | 645 if (Randomized) { |
| 645 // Create a random number generator for regalloc randomization. Merge | 646 // Create a random number generator for regalloc randomization. Merge |
| 646 // function's sequence and Kind value as the Salt. Because regAlloc() is | 647 // function's sequence and Kind value as the Salt. Because regAlloc() is |
| 647 // called twice under O2, the second time with RAK_Phi, we check | 648 // called twice under O2, the second time with RAK_Phi, we check Kind == |
| 648 // Kind == RAK_Phi to determine the lowest-order bit to make sure the Salt | 649 // RAK_Phi to determine the lowest-order bit to make sure the Salt is |
| 649 // is different. | 650 // different. |
| 650 uint64_t Salt = | 651 uint64_t Salt = |
| 651 (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u); | 652 (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u); |
| 652 Target->makeRandomRegisterPermutation( | 653 Target->makeRandomRegisterPermutation( |
| 653 Permutation, PreDefinedRegisters | ~RegMaskFull, Salt); | 654 Permutation, PreDefinedRegisters | ~RegMaskFull, Salt); |
| 654 } | 655 } |
| 655 | 656 |
| 656 // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof) | 657 // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof) |
| 657 // for each Variable. | 658 // for each Variable. |
| 658 for (Variable *Item : Handled) { | 659 for (Variable *Item : Handled) { |
| 659 int32_t RegNum = Item->getRegNumTmp(); | 660 int32_t RegNum = Item->getRegNumTmp(); |
| (...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 882 Str << "\n"; | 883 Str << "\n"; |
| 883 } | 884 } |
| 884 Str << "++++++ Inactive:\n"; | 885 Str << "++++++ Inactive:\n"; |
| 885 for (const Variable *Item : Inactive) { | 886 for (const Variable *Item : Inactive) { |
| 886 dumpLiveRange(Item, Func); | 887 dumpLiveRange(Item, Func); |
| 887 Str << "\n"; | 888 Str << "\n"; |
| 888 } | 889 } |
| 889 } | 890 } |
| 890 | 891 |
| 891 } // end of namespace Ice | 892 } // end of namespace Ice |
| OLD | NEW |