OLD | NEW |
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// | 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
194 addCallToProfileSummary(); | 194 addCallToProfileSummary(); |
195 } | 195 } |
196 dump("Profiled CFG"); | 196 dump("Profiled CFG"); |
197 } | 197 } |
198 | 198 |
199 // Create the Hi and Lo variables where a split was needed | 199 // Create the Hi and Lo variables where a split was needed |
200 for (Variable *Var : Variables) | 200 for (Variable *Var : Variables) |
201 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) | 201 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) |
202 Var64On32->initHiLo(this); | 202 Var64On32->initHiLo(this); |
203 | 203 |
204 processAllocas(); | |
205 | |
206 // The set of translation passes and their order are determined by the | 204 // The set of translation passes and their order are determined by the |
207 // target. | 205 // target. |
208 getTarget()->translate(); | 206 getTarget()->translate(); |
209 | 207 |
210 dump("Final output"); | 208 dump("Final output"); |
211 if (getFocusedTiming()) | 209 if (getFocusedTiming()) |
212 getContext()->dumpTimers(); | 210 getContext()->dumpTimers(); |
213 } | 211 } |
214 | 212 |
215 void Cfg::computeInOutEdges() { | 213 void Cfg::computeInOutEdges() { |
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
444 swapNodes(Shuffled); | 442 swapNodes(Shuffled); |
445 | 443 |
446 dump("After basic block shuffling"); | 444 dump("After basic block shuffling"); |
447 } | 445 } |
448 | 446 |
449 void Cfg::doArgLowering() { | 447 void Cfg::doArgLowering() { |
450 TimerMarker T(TimerStack::TT_doArgLowering, this); | 448 TimerMarker T(TimerStack::TT_doArgLowering, this); |
451 getTarget()->lowerArguments(); | 449 getTarget()->lowerArguments(); |
452 } | 450 } |
453 | 451 |
454 void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts, | 452 void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas, |
455 bool IsKnownFrameOffset) { | 453 uint32_t CombinedAlignment, InstList &Insts, |
| 454 AllocaBaseVariableType BaseVariableType) { |
456 if (Allocas.empty()) | 455 if (Allocas.empty()) |
457 return; | 456 return; |
458 // Sort by decreasing alignment. This does not really matter at the moment, | 457 // Sort by decreasing alignment. |
459 // but will allow compacting stack allocation when we fuse to one alloca. | |
460 std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) { | 458 std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) { |
461 auto *A1 = llvm::dyn_cast<InstAlloca>(I1); | 459 auto *A1 = llvm::dyn_cast<InstAlloca>(I1); |
462 auto *A2 = llvm::dyn_cast<InstAlloca>(I2); | 460 auto *A2 = llvm::dyn_cast<InstAlloca>(I2); |
463 return A1->getAlignInBytes() > A2->getAlignInBytes(); | 461 return A1->getAlignInBytes() > A2->getAlignInBytes(); |
464 }); | 462 }); |
| 463 // Process the allocas in order of decreasing stack alignment. This allows |
| 464 // us to pack less-aligned pieces after more-aligned ones, resulting in less |
| 465 // stack growth. It also allows there to be at most one stack alignment "and" |
| 466 // instruction for a whole list of allocas. |
| 467 uint32_t CurrentOffset = 0; |
| 468 CfgVector<int32_t> Offsets; |
465 for (Inst *Instr : Allocas) { | 469 for (Inst *Instr : Allocas) { |
466 auto *Alloca = llvm::cast<InstAlloca>(Instr); | 470 auto *Alloca = llvm::cast<InstAlloca>(Instr); |
467 // Move the alloca to its sorted position. | 471 // Adjust the size of the allocation up to the next multiple of the |
468 InstAlloca *NewAlloca = | 472 // object's alignment. |
469 InstAlloca::create(this, Alloca->getSizeInBytes(), | 473 uint32_t Alignment = std::max(Alloca->getAlignInBytes(), 1u); |
470 Alloca->getAlignInBytes(), Alloca->getDest()); | 474 auto *ConstSize = |
471 if (IsKnownFrameOffset) | 475 llvm::dyn_cast<ConstantInteger32>(Alloca->getSizeInBytes()); |
472 NewAlloca->setKnownFrameOffset(); | 476 uint32_t Size = Utils::applyAlignment(ConstSize->getValue(), Alignment); |
473 Insts.push_front(NewAlloca); | 477 if (BaseVariableType == BVT_FramePointer) { |
| 478 // Addressing is relative to the frame pointer. Subtract the offset after |
| 479 // adding the size of the alloca, because it grows downwards from the |
| 480 // frame pointer. |
| 481 Offsets.push_back(-(CurrentOffset + Size)); |
| 482 } else { |
| 483 // Addressing is relative to the stack pointer or to a user pointer. Add |
| 484 // the offset before adding the size of the object, because it grows |
| 485 // upwards from the stack pointer. |
| 486 Offsets.push_back(CurrentOffset); |
| 487 } |
| 488 // Update the running offset of the fused alloca region. |
| 489 CurrentOffset += Size; |
| 490 } |
| 491 // Round the offset up to the alignment granularity to use as the size. |
| 492 uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment); |
| 493 // Ensure every alloca was assigned an offset. |
| 494 assert(Allocas.size() == Offsets.size()); |
| 495 Variable *BaseVariable = makeVariable(IceType_i32); |
| 496 Variable *AllocaDest = BaseVariable; |
| 497 // Emit one addition for each alloca after the first. |
| 498 for (size_t i = 0; i < Allocas.size(); ++i) { |
| 499 auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]); |
| 500 switch (BaseVariableType) { |
| 501 case BVT_FramePointer: |
| 502 case BVT_UserPointer: { |
| 503 // Emit a new addition operation to replace the alloca. |
| 504 Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]); |
| 505 InstArithmetic *Add = |
| 506 InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(), |
| 507 BaseVariable, AllocaOffset); |
| 508 Insts.push_front(Add); |
| 509 } break; |
| 510 case BVT_StackPointer: { |
| 511 // Emit a fake definition of the rematerializable variable. |
| 512 Variable *Dest = Alloca->getDest(); |
| 513 InstFakeDef *Def = InstFakeDef::create(this, Dest); |
| 514 Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]); |
| 515 Insts.push_front(Def); |
| 516 } break; |
| 517 } |
474 Alloca->setDeleted(); | 518 Alloca->setDeleted(); |
475 } | 519 } |
| 520 Operand *AllocaSize = Ctx->getConstantInt32(TotalSize); |
| 521 switch (BaseVariableType) { |
| 522 case BVT_FramePointer: { |
| 523 // Adjust the return of the alloca to the top of the returned region. |
| 524 AllocaDest = makeVariable(IceType_i32); |
| 525 InstArithmetic *Add = InstArithmetic::create( |
| 526 this, InstArithmetic::Add, BaseVariable, AllocaDest, AllocaSize); |
| 527 Insts.push_front(Add); |
| 528 } break; |
| 529 case BVT_StackPointer: { |
| 530 // Emit a fake use to keep the Alloca live. |
| 531 InstFakeUse *Use = InstFakeUse::create(this, AllocaDest); |
| 532 Insts.push_front(Use); |
| 533 } break; |
| 534 case BVT_UserPointer: |
| 535 break; |
| 536 } |
| 537 // And insert the fused alloca. |
| 538 InstAlloca *CombinedAlloca = |
| 539 InstAlloca::create(this, AllocaSize, CombinedAlignment, AllocaDest); |
| 540 CombinedAlloca->setKnownFrameOffset(); |
| 541 Insts.push_front(CombinedAlloca); |
476 } | 542 } |
477 | 543 |
478 void Cfg::processAllocas() { | 544 void Cfg::processAllocas(bool SortAndCombine) { |
479 const uint32_t StackAlignment = getTarget()->getStackAlignment(); | 545 const uint32_t StackAlignment = getTarget()->getStackAlignment(); |
480 CfgNode *EntryNode = getEntryNode(); | 546 CfgNode *EntryNode = getEntryNode(); |
| 547 // LLVM enforces power of 2 alignment. |
| 548 assert(llvm::isPowerOf2_32(StackAlignment)); |
| 549 // Determine if there are large alignment allocations in the entry block or |
| 550 // dynamic allocations (variable size in the entry block). |
| 551 bool HasLargeAlignment = false; |
| 552 bool HasDynamicAllocation = false; |
| 553 for (Inst &Instr : EntryNode->getInsts()) { |
| 554 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { |
| 555 uint32_t AlignmentParam = Alloca->getAlignInBytes(); |
| 556 if (AlignmentParam > StackAlignment) |
| 557 HasLargeAlignment = true; |
| 558 if (llvm::isa<Constant>(Alloca->getSizeInBytes())) |
| 559 Alloca->setKnownFrameOffset(); |
| 560 else |
| 561 HasDynamicAllocation = true; |
| 562 } |
| 563 } |
| 564 // Don't do the heavyweight sorting and layout for low optimization levels. |
| 565 if (!SortAndCombine) |
| 566 return; |
| 567 // Any alloca outside the entry block is a dynamic allocation. |
| 568 for (CfgNode *Node : Nodes) { |
| 569 if (Node == EntryNode) |
| 570 continue; |
| 571 for (Inst &Instr : Node->getInsts()) { |
| 572 if (llvm::isa<InstAlloca>(&Instr)) { |
| 573 // Allocations outside the entry block require a frame pointer. |
| 574 HasDynamicAllocation = true; |
| 575 break; |
| 576 } |
| 577 } |
| 578 if (HasDynamicAllocation) |
| 579 break; |
| 580 } |
| 581 // Mark the target as requiring a frame pointer. |
| 582 if (HasLargeAlignment || HasDynamicAllocation) |
| 583 getTarget()->setHasFramePointer(); |
| 584 // Collect the Allocas into the two vectors. |
481 // Allocas in the entry block that have constant size and alignment less | 585 // Allocas in the entry block that have constant size and alignment less |
482 // than or equal to the function's stack alignment. | 586 // than or equal to the function's stack alignment. |
483 CfgVector<Inst *> FixedAllocas; | 587 CfgVector<Inst *> FixedAllocas; |
484 // Allocas in the entry block that have constant size and alignment greater | 588 // Allocas in the entry block that have constant size and alignment greater |
485 // than the function's stack alignment. | 589 // than the function's stack alignment. |
486 CfgVector<Inst *> AlignedAllocas; | 590 CfgVector<Inst *> AlignedAllocas; |
487 // LLVM enforces power of 2 alignment. | 591 // Maximum alignment used for the dynamic/aligned allocas. |
488 assert(llvm::isPowerOf2_32(StackAlignment)); | 592 uint32_t MaxAlignment = StackAlignment; |
489 // Collect the Allocas into the two vectors. | |
490 bool RequiresFramePointer = false; | |
491 for (Inst &Instr : EntryNode->getInsts()) { | 593 for (Inst &Instr : EntryNode->getInsts()) { |
492 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { | 594 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { |
493 if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) { | 595 if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) |
494 // Variable-sized allocations require a frame pointer. | |
495 RequiresFramePointer = true; | |
496 continue; | 596 continue; |
497 } | |
498 uint32_t AlignmentParam = Alloca->getAlignInBytes(); | 597 uint32_t AlignmentParam = Alloca->getAlignInBytes(); |
499 // For default align=0, set it to the real value 1, to avoid any | 598 // For default align=0, set it to the real value 1, to avoid any |
500 // bit-manipulation problems below. | 599 // bit-manipulation problems below. |
501 AlignmentParam = std::max(AlignmentParam, 1u); | 600 AlignmentParam = std::max(AlignmentParam, 1u); |
502 assert(llvm::isPowerOf2_32(AlignmentParam)); | 601 assert(llvm::isPowerOf2_32(AlignmentParam)); |
503 if (AlignmentParam > StackAlignment) { | 602 if (HasDynamicAllocation && AlignmentParam > StackAlignment) { |
504 // Allocations aligned more than the stack require a frame pointer. | 603 // If we have both dynamic allocations and large stack alignments, |
505 RequiresFramePointer = true; | 604 // high-alignment allocations are pulled out with their own base. |
506 AlignedAllocas.push_back(Alloca); | 605 AlignedAllocas.push_back(Alloca); |
507 } else | 606 } else { |
508 FixedAllocas.push_back(Alloca); | 607 FixedAllocas.push_back(Alloca); |
| 608 } |
| 609 MaxAlignment = std::max(AlignmentParam, MaxAlignment); |
509 } | 610 } |
510 } | 611 } |
511 // Look for alloca instructions in other blocks | |
512 for (CfgNode *Node : Nodes) { | |
513 if (Node == EntryNode) | |
514 continue; | |
515 for (Inst &Instr : Node->getInsts()) { | |
516 if (llvm::isa<InstAlloca>(&Instr)) { | |
517 // Allocations outside the entry block require a frame pointer. | |
518 RequiresFramePointer = true; | |
519 break; | |
520 } | |
521 } | |
522 if (RequiresFramePointer) | |
523 break; | |
524 } | |
525 // Mark the target as requiring a frame pointer. | |
526 if (RequiresFramePointer) | |
527 getTarget()->setHasFramePointer(); | |
528 // Add instructions to the head of the entry block in reverse order. | 612 // Add instructions to the head of the entry block in reverse order. |
529 InstList &Insts = getEntryNode()->getInsts(); | 613 InstList &Insts = getEntryNode()->getInsts(); |
530 // Fixed, large alignment alloca addresses do not have known offset. | 614 if (HasDynamicAllocation && HasLargeAlignment) { |
531 sortAllocas(AlignedAllocas, Insts, false); | 615 // We are using a frame pointer, but fixed large-alignment alloca addresses, |
532 // Fixed, small alignment alloca addresses have known offset. | 616 // do not have a known offset from either the stack or frame pointer. |
533 sortAllocas(FixedAllocas, Insts, true); | 617 // They grow up from a user pointer from an alloca. |
| 618 sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, BVT_UserPointer); |
| 619 } |
| 620 // Otherwise, fixed size allocas are always addressed relative to the stack |
| 621 // unless there are dynamic allocas. |
| 622 // TODO(sehr): re-enable frame pointer and decrementing addressing. |
| 623 AllocaBaseVariableType BasePointerType = |
| 624 (HasDynamicAllocation ? BVT_UserPointer : BVT_StackPointer); |
| 625 sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType); |
534 } | 626 } |
535 | 627 |
536 void Cfg::doAddressOpt() { | 628 void Cfg::doAddressOpt() { |
537 TimerMarker T(TimerStack::TT_doAddressOpt, this); | 629 TimerMarker T(TimerStack::TT_doAddressOpt, this); |
538 for (CfgNode *Node : Nodes) | 630 for (CfgNode *Node : Nodes) |
539 Node->doAddressOpt(); | 631 Node->doAddressOpt(); |
540 } | 632 } |
541 | 633 |
542 void Cfg::doNopInsertion() { | 634 void Cfg::doNopInsertion() { |
543 if (!Ctx->getFlags().shouldDoNopInsertion()) | 635 if (!Ctx->getFlags().shouldDoNopInsertion()) |
(...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
895 } | 987 } |
896 } | 988 } |
897 // Print each basic block | 989 // Print each basic block |
898 for (CfgNode *Node : Nodes) | 990 for (CfgNode *Node : Nodes) |
899 Node->dump(this); | 991 Node->dump(this); |
900 if (isVerbose(IceV_Instructions)) | 992 if (isVerbose(IceV_Instructions)) |
901 Str << "}\n"; | 993 Str << "}\n"; |
902 } | 994 } |
903 | 995 |
904 } // end of namespace Ice | 996 } // end of namespace Ice |
OLD | NEW |