Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// | 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 194 addCallToProfileSummary(); | 194 addCallToProfileSummary(); |
| 195 } | 195 } |
| 196 dump("Profiled CFG"); | 196 dump("Profiled CFG"); |
| 197 } | 197 } |
| 198 | 198 |
| 199 // Create the Hi and Lo variables where a split was needed | 199 // Create the Hi and Lo variables where a split was needed |
| 200 for (Variable *Var : Variables) | 200 for (Variable *Var : Variables) |
| 201 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) | 201 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) |
| 202 Var64On32->initHiLo(this); | 202 Var64On32->initHiLo(this); |
| 203 | 203 |
| 204 processAllocas(); | 204 // Figure out which alloca instructions result in storage at known stack frame |
| 205 // offsets. If this is true for all alloca instructions, then a stack pointer | |
| 206 // can still be used instead of a frame pointer, freeing up the frame pointer | |
| 207 // for normal register allocation. Additionally, for each such alloca, its | |
| 208 // address could be rematerialized at each use in terms of the stack/frame | |
| 209 // pointer, saving a stack slot and a load from that stack slot. | |
|
Jim Stichnoth
2015/11/11 17:39:44
And extra register pressure from having to load fr
sehr
2015/11/11 22:14:10
Done.
| |
| 210 // | |
| 211 // This simple implementation is limited to alloca instructions at the start | |
|
Jim Stichnoth
2015/11/11 17:39:44
Didn't you find an example where a non-alloca stat
sehr
2015/11/11 22:14:10
The example is tests_lit/llvm2ice_tests/alloc.ll,
| |
| 212 // of the entry node. | |
| 213 for (Inst &Instr : getEntryNode()->getInsts()) { | |
| 214 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { | |
| 215 if (llvm::isa<Constant>(Alloca->getSizeInBytes())) { | |
| 216 Alloca->setKnownFrameOffset(); | |
| 217 continue; | |
| 218 } | |
| 219 } | |
| 220 // The first instruction that is not an alloca with a constant size stops | |
| 221 // the search. | |
| 222 break; | |
| 223 } | |
| 205 | 224 |
| 206 // The set of translation passes and their order are determined by the | 225 // The set of translation passes and their order are determined by the |
| 207 // target. | 226 // target. |
| 208 getTarget()->translate(); | 227 getTarget()->translate(); |
| 209 | 228 |
| 210 dump("Final output"); | 229 dump("Final output"); |
| 211 if (getFocusedTiming()) | 230 if (getFocusedTiming()) |
| 212 getContext()->dumpTimers(); | 231 getContext()->dumpTimers(); |
| 213 } | 232 } |
| 214 | 233 |
| (...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 444 swapNodes(Shuffled); | 463 swapNodes(Shuffled); |
| 445 | 464 |
| 446 dump("After basic block shuffling"); | 465 dump("After basic block shuffling"); |
| 447 } | 466 } |
| 448 | 467 |
| 449 void Cfg::doArgLowering() { | 468 void Cfg::doArgLowering() { |
| 450 TimerMarker T(TimerStack::TT_doArgLowering, this); | 469 TimerMarker T(TimerStack::TT_doArgLowering, this); |
| 451 getTarget()->lowerArguments(); | 470 getTarget()->lowerArguments(); |
| 452 } | 471 } |
| 453 | 472 |
| 454 void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts, | 473 void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas, |
| 455 bool IsKnownFrameOffset) { | 474 uint32_t CombinedAlignment, InstList &Insts, |
| 475 AllocaBaseVariableType BaseVariableType) { | |
| 456 if (Allocas.empty()) | 476 if (Allocas.empty()) |
| 457 return; | 477 return; |
| 458 // Sort by decreasing alignment. This does not really matter at the moment, | 478 // Sort by decreasing alignment. |
| 459 // but will allow compacting stack allocation when we fuse to one alloca. | |
| 460 std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) { | 479 std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) { |
| 461 auto *A1 = llvm::dyn_cast<InstAlloca>(I1); | 480 auto *A1 = llvm::dyn_cast<InstAlloca>(I1); |
| 462 auto *A2 = llvm::dyn_cast<InstAlloca>(I2); | 481 auto *A2 = llvm::dyn_cast<InstAlloca>(I2); |
| 463 return A1->getAlignInBytes() > A2->getAlignInBytes(); | 482 return A1->getAlignInBytes() > A2->getAlignInBytes(); |
| 464 }); | 483 }); |
| 484 // Process the allocas in order of decreasing stack alignment. This allows | |
| 485 // us to pack less-aligned pieces after more-aligned ones, resulting in less | |
| 486 // stack growth. It also allows there to be at most one stack alignment "and" | |
| 487 // instruction for a whole list of allocas. | |
| 488 uint32_t CurrentOffset = 0; | |
| 489 CfgVector<int32_t> Offsets; | |
| 465 for (Inst *Instr : Allocas) { | 490 for (Inst *Instr : Allocas) { |
| 466 auto *Alloca = llvm::cast<InstAlloca>(Instr); | 491 auto *Alloca = llvm::cast<InstAlloca>(Instr); |
| 467 // Move the alloca to its sorted position. | 492 // Adjust the size of the allocation up to the next multiple of the |
| 468 InstAlloca *NewAlloca = | 493 // object's alignment. |
| 469 InstAlloca::create(this, Alloca->getSizeInBytes(), | 494 uint32_t Alignment = std::max(Alloca->getAlignInBytes(), 1u); |
| 470 Alloca->getAlignInBytes(), Alloca->getDest()); | 495 auto *ConstSize = |
| 471 if (IsKnownFrameOffset) | 496 llvm::dyn_cast<ConstantInteger32>(Alloca->getSizeInBytes()); |
| 472 NewAlloca->setKnownFrameOffset(); | 497 uint32_t Size = Utils::applyAlignment(ConstSize->getValue(), Alignment); |
| 473 Insts.push_front(NewAlloca); | 498 if (BaseVariableType == FramePointer) { |
| 499 // Addressing is relative to the frame pointer. Subtract the offset after | |
| 500 // adding the size of the alloca, because it grows downwards from the | |
| 501 // frame pointer. | |
| 502 Offsets.push_back(-(CurrentOffset + Size)); | |
| 503 } else { | |
| 504 // Addressing is relative to the stack pointer or to a user pointer. Add | |
| 505 // the offset before adding the size of the object, because it grows | |
| 506 // upwards from the stack pointer. | |
| 507 Offsets.push_back(CurrentOffset); | |
| 508 } | |
| 509 // Update the running offset of the fused alloca region. | |
| 510 CurrentOffset += Size; | |
| 511 } | |
| 512 // Round the offset up to the alignment granularity to use as the size. | |
| 513 uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment); | |
| 514 // Ensure every alloca was assigned an offset. | |
| 515 assert(Allocas.size() == Offsets.size()); | |
| 516 Variable *BaseVariable = makeVariable(IceType_i32); | |
| 517 Variable *AllocaDest = BaseVariable; | |
| 518 // Emit one addition for each alloca after the first. | |
| 519 for (size_t i = 0; i < Allocas.size(); ++i) { | |
| 520 auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]); | |
| 521 switch (BaseVariableType) { | |
| 522 case FramePointer: | |
| 523 case UserPointer: { | |
| 524 // Emit a new addition operation to replace the alloca. | |
| 525 Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]); | |
| 526 InstArithmetic *Add = | |
| 527 InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(), | |
| 528 BaseVariable, AllocaOffset); | |
| 529 Insts.push_front(Add); | |
| 530 } break; | |
| 531 case StackPointer: { | |
| 532 // Emit a fake definition of the rematerializable variable. | |
| 533 Variable *Dest = Alloca->getDest(); | |
| 534 InstFakeDef *Def = InstFakeDef::create(this, Dest); | |
| 535 Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]); | |
| 536 Insts.push_front(Def); | |
| 537 } break; | |
| 538 } | |
| 474 Alloca->setDeleted(); | 539 Alloca->setDeleted(); |
| 475 } | 540 } |
| 541 Operand *AllocaSize = Ctx->getConstantInt32(TotalSize); | |
| 542 switch (BaseVariableType) { | |
| 543 case FramePointer: { | |
| 544 // Adjust the return of the alloca to the top of the returned region. | |
| 545 AllocaDest = makeVariable(IceType_i32); | |
| 546 InstArithmetic *Add = InstArithmetic::create( | |
| 547 this, InstArithmetic::Add, BaseVariable, AllocaDest, AllocaSize); | |
| 548 Insts.push_front(Add); | |
| 549 } break; | |
| 550 case StackPointer: { | |
| 551 // Emit a fake use to keep the Alloca live. | |
| 552 InstFakeUse *Use = InstFakeUse::create(this, AllocaDest); | |
| 553 Insts.push_front(Use); | |
| 554 } break; | |
| 555 case UserPointer: | |
| 556 break; | |
| 557 } | |
| 558 // And insert the fused alloca. | |
| 559 InstAlloca *CombinedAlloca = | |
| 560 InstAlloca::create(this, AllocaSize, CombinedAlignment, AllocaDest); | |
| 561 CombinedAlloca->setKnownFrameOffset(); | |
| 562 Insts.push_front(CombinedAlloca); | |
| 476 } | 563 } |
| 477 | 564 |
| 478 void Cfg::processAllocas() { | 565 void Cfg::processAllocas() { |
| 479 const uint32_t StackAlignment = getTarget()->getStackAlignment(); | 566 const uint32_t StackAlignment = getTarget()->getStackAlignment(); |
| 480 CfgNode *EntryNode = getEntryNode(); | 567 CfgNode *EntryNode = getEntryNode(); |
| 481 // Allocas in the entry block that have constant size and alignment less | 568 // Allocas in the entry block that have constant size and alignment less |
| 482 // than or equal to the function's stack alignment. | 569 // than or equal to the function's stack alignment. |
| 483 CfgVector<Inst *> FixedAllocas; | 570 CfgVector<Inst *> FixedAllocas; |
| 484 // Allocas in the entry block that have constant size and alignment greater | 571 // Allocas in the entry block that have constant size and alignment greater |
| 485 // than the function's stack alignment. | 572 // than the function's stack alignment. |
| 486 CfgVector<Inst *> AlignedAllocas; | 573 CfgVector<Inst *> AlignedAllocas; |
| 487 // LLVM enforces power of 2 alignment. | 574 // LLVM enforces power of 2 alignment. |
| 488 assert(llvm::isPowerOf2_32(StackAlignment)); | 575 assert(llvm::isPowerOf2_32(StackAlignment)); |
| 489 // Collect the Allocas into the two vectors. | 576 // Determine if there are large alignment allocations in the entry block or |
| 490 bool RequiresFramePointer = false; | 577 // dynamic allocations (variable size in the entry block). |
| 578 bool HasLargeAlignment = false; | |
| 579 bool HasDynamicAllocation = false; | |
| 491 for (Inst &Instr : EntryNode->getInsts()) { | 580 for (Inst &Instr : EntryNode->getInsts()) { |
| 492 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { | 581 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { |
| 493 if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) { | |
| 494 // Variable-sized allocations require a frame pointer. | |
| 495 RequiresFramePointer = true; | |
| 496 continue; | |
| 497 } | |
| 498 uint32_t AlignmentParam = Alloca->getAlignInBytes(); | 582 uint32_t AlignmentParam = Alloca->getAlignInBytes(); |
| 499 // For default align=0, set it to the real value 1, to avoid any | 583 if (AlignmentParam > StackAlignment) |
| 500 // bit-manipulation problems below. | 584 HasLargeAlignment = true; |
| 501 AlignmentParam = std::max(AlignmentParam, 1u); | 585 if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) |
| 502 assert(llvm::isPowerOf2_32(AlignmentParam)); | 586 HasDynamicAllocation = true; |
| 503 if (AlignmentParam > StackAlignment) { | |
| 504 // Allocations aligned more than the stack require a frame pointer. | |
| 505 RequiresFramePointer = true; | |
| 506 AlignedAllocas.push_back(Alloca); | |
| 507 } else | |
| 508 FixedAllocas.push_back(Alloca); | |
| 509 } | 587 } |
| 510 } | 588 } |
| 511 // Look for alloca instructions in other blocks | 589 // Any alloca outside the entry block is a dynamic allocation. |
| 512 for (CfgNode *Node : Nodes) { | 590 for (CfgNode *Node : Nodes) { |
| 513 if (Node == EntryNode) | 591 if (Node == EntryNode) |
| 514 continue; | 592 continue; |
| 515 for (Inst &Instr : Node->getInsts()) { | 593 for (Inst &Instr : Node->getInsts()) { |
| 516 if (llvm::isa<InstAlloca>(&Instr)) { | 594 if (llvm::isa<InstAlloca>(&Instr)) { |
| 517 // Allocations outside the entry block require a frame pointer. | 595 // Allocations outside the entry block require a frame pointer. |
| 518 RequiresFramePointer = true; | 596 HasDynamicAllocation = true; |
| 519 break; | 597 break; |
| 520 } | 598 } |
| 521 } | 599 } |
| 522 if (RequiresFramePointer) | 600 if (HasDynamicAllocation) |
| 523 break; | 601 break; |
| 524 } | 602 } |
| 525 // Mark the target as requiring a frame pointer. | 603 // Mark the target as requiring a frame pointer. |
| 526 if (RequiresFramePointer) | 604 if (HasLargeAlignment || HasDynamicAllocation) |
| 527 getTarget()->setHasFramePointer(); | 605 getTarget()->setHasFramePointer(); |
| 606 // Collect the Allocas into the two vectors. | |
| 607 // Maximum alignment used for the dynamic/aligned allocas. | |
| 608 uint32_t MaxAlignment = StackAlignment; | |
| 609 for (Inst &Instr : EntryNode->getInsts()) { | |
| 610 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { | |
| 611 if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) | |
| 612 continue; | |
| 613 uint32_t AlignmentParam = Alloca->getAlignInBytes(); | |
| 614 // For default align=0, set it to the real value 1, to avoid any | |
| 615 // bit-manipulation problems below. | |
| 616 AlignmentParam = std::max(AlignmentParam, 1u); | |
| 617 assert(llvm::isPowerOf2_32(AlignmentParam)); | |
| 618 if (HasDynamicAllocation && AlignmentParam > StackAlignment) { | |
| 619 // If we have both dynamic allocations and large stack alignments, | |
| 620 // high-alignment allocations are pulled out with their own base. | |
| 621 AlignedAllocas.push_back(Alloca); | |
| 622 } else | |
|
Jim Stichnoth
2015/11/11 17:39:44
} else {
(for consistency)
sehr
2015/11/11 22:14:10
Done.
| |
| 623 FixedAllocas.push_back(Alloca); | |
| 624 MaxAlignment = std::max(AlignmentParam, MaxAlignment); | |
| 625 } | |
| 626 } | |
| 528 // Add instructions to the head of the entry block in reverse order. | 627 // Add instructions to the head of the entry block in reverse order. |
| 529 InstList &Insts = getEntryNode()->getInsts(); | 628 InstList &Insts = getEntryNode()->getInsts(); |
| 530 // Fixed, large alignment alloca addresses do not have known offset. | 629 if (HasDynamicAllocation && HasLargeAlignment) { |
| 531 sortAllocas(AlignedAllocas, Insts, false); | 630 // We are using a frame pointer, but fixed large-alignment alloca addresses, |
| 532 // Fixed, small alignment alloca addresses have known offset. | 631 // do not have a known offset from either the stack or frame pointer. |
| 533 sortAllocas(FixedAllocas, Insts, true); | 632 // They grow up from a user pointer from an alloca. |
| 633 sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, UserPointer); | |
| 634 } | |
| 635 // Otherwise, fixed size allocas are always addressed relative to the stack | |
| 636 // unless there are dynamic allocas. | |
| 637 // TODO(sehr): re-enable frame pointer and decrementing addressing. | |
| 638 AllocaBaseVariableType BasePointerType = | |
| 639 (HasDynamicAllocation ? UserPointer : StackPointer); | |
| 640 sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType); | |
| 534 } | 641 } |
| 535 | 642 |
| 536 void Cfg::doAddressOpt() { | 643 void Cfg::doAddressOpt() { |
| 537 TimerMarker T(TimerStack::TT_doAddressOpt, this); | 644 TimerMarker T(TimerStack::TT_doAddressOpt, this); |
| 538 for (CfgNode *Node : Nodes) | 645 for (CfgNode *Node : Nodes) |
| 539 Node->doAddressOpt(); | 646 Node->doAddressOpt(); |
| 540 } | 647 } |
| 541 | 648 |
| 542 void Cfg::doNopInsertion() { | 649 void Cfg::doNopInsertion() { |
| 543 if (!Ctx->getFlags().shouldDoNopInsertion()) | 650 if (!Ctx->getFlags().shouldDoNopInsertion()) |
| (...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 895 } | 1002 } |
| 896 } | 1003 } |
| 897 // Print each basic block | 1004 // Print each basic block |
| 898 for (CfgNode *Node : Nodes) | 1005 for (CfgNode *Node : Nodes) |
| 899 Node->dump(this); | 1006 Node->dump(this); |
| 900 if (isVerbose(IceV_Instructions)) | 1007 if (isVerbose(IceV_Instructions)) |
| 901 Str << "}\n"; | 1008 Str << "}\n"; |
| 902 } | 1009 } |
| 903 | 1010 |
| 904 } // end of namespace Ice | 1011 } // end of namespace Ice |
| OLD | NEW |