Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(369)

Side by Side Diff: src/IceCfg.cpp

Issue 1411583007: Combine allocas (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fix a typo. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
194 addCallToProfileSummary(); 194 addCallToProfileSummary();
195 } 195 }
196 dump("Profiled CFG"); 196 dump("Profiled CFG");
197 } 197 }
198 198
199 // Create the Hi and Lo variables where a split was needed 199 // Create the Hi and Lo variables where a split was needed
200 for (Variable *Var : Variables) 200 for (Variable *Var : Variables)
201 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) 201 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var))
202 Var64On32->initHiLo(this); 202 Var64On32->initHiLo(this);
203 203
204 processAllocas(); 204 // Figure out which alloca instructions result in storage at known stack frame
205 // offsets. If this is true for all alloca instructions, then a stack pointer
206 // can still be used instead of a frame pointer, freeing up the frame pointer
207 // for normal register allocation. Additionally, for each such alloca, its
208 // address could be rematerialized at each use in terms of the stack/frame
209 // pointer, saving a stack slot and a load from that stack slot.
Jim Stichnoth 2015/11/11 17:39:44 And extra register pressure from having to load fr
sehr 2015/11/11 22:14:10 Done.
210 //
211 // This simple implementation is limited to alloca instructions at the start
Jim Stichnoth 2015/11/11 17:39:44 Didn't you find an example where a non-alloca stat
sehr 2015/11/11 22:14:10 The example is tests_lit/llvm2ice_tests/alloc.ll,
212 // of the entry node.
213 for (Inst &Instr : getEntryNode()->getInsts()) {
214 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
215 if (llvm::isa<Constant>(Alloca->getSizeInBytes())) {
216 Alloca->setKnownFrameOffset();
217 continue;
218 }
219 }
220 // The first instruction that is not an alloca with a constant size stops
221 // the search.
222 break;
223 }
205 224
206 // The set of translation passes and their order are determined by the 225 // The set of translation passes and their order are determined by the
207 // target. 226 // target.
208 getTarget()->translate(); 227 getTarget()->translate();
209 228
210 dump("Final output"); 229 dump("Final output");
211 if (getFocusedTiming()) 230 if (getFocusedTiming())
212 getContext()->dumpTimers(); 231 getContext()->dumpTimers();
213 } 232 }
214 233
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after
444 swapNodes(Shuffled); 463 swapNodes(Shuffled);
445 464
446 dump("After basic block shuffling"); 465 dump("After basic block shuffling");
447 } 466 }
448 467
449 void Cfg::doArgLowering() { 468 void Cfg::doArgLowering() {
450 TimerMarker T(TimerStack::TT_doArgLowering, this); 469 TimerMarker T(TimerStack::TT_doArgLowering, this);
451 getTarget()->lowerArguments(); 470 getTarget()->lowerArguments();
452 } 471 }
453 472
454 void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts, 473 void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
455 bool IsKnownFrameOffset) { 474 uint32_t CombinedAlignment, InstList &Insts,
475 AllocaBaseVariableType BaseVariableType) {
456 if (Allocas.empty()) 476 if (Allocas.empty())
457 return; 477 return;
458 // Sort by decreasing alignment. This does not really matter at the moment, 478 // Sort by decreasing alignment.
459 // but will allow compacting stack allocation when we fuse to one alloca.
460 std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) { 479 std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) {
461 auto *A1 = llvm::dyn_cast<InstAlloca>(I1); 480 auto *A1 = llvm::dyn_cast<InstAlloca>(I1);
462 auto *A2 = llvm::dyn_cast<InstAlloca>(I2); 481 auto *A2 = llvm::dyn_cast<InstAlloca>(I2);
463 return A1->getAlignInBytes() > A2->getAlignInBytes(); 482 return A1->getAlignInBytes() > A2->getAlignInBytes();
464 }); 483 });
484 // Process the allocas in order of decreasing stack alignment. This allows
485 // us to pack less-aligned pieces after more-aligned ones, resulting in less
486 // stack growth. It also allows there to be at most one stack alignment "and"
487 // instruction for a whole list of allocas.
488 uint32_t CurrentOffset = 0;
489 CfgVector<int32_t> Offsets;
465 for (Inst *Instr : Allocas) { 490 for (Inst *Instr : Allocas) {
466 auto *Alloca = llvm::cast<InstAlloca>(Instr); 491 auto *Alloca = llvm::cast<InstAlloca>(Instr);
467 // Move the alloca to its sorted position. 492 // Adjust the size of the allocation up to the next multiple of the
468 InstAlloca *NewAlloca = 493 // object's alignment.
469 InstAlloca::create(this, Alloca->getSizeInBytes(), 494 uint32_t Alignment = std::max(Alloca->getAlignInBytes(), 1u);
470 Alloca->getAlignInBytes(), Alloca->getDest()); 495 auto *ConstSize =
471 if (IsKnownFrameOffset) 496 llvm::dyn_cast<ConstantInteger32>(Alloca->getSizeInBytes());
472 NewAlloca->setKnownFrameOffset(); 497 uint32_t Size = Utils::applyAlignment(ConstSize->getValue(), Alignment);
473 Insts.push_front(NewAlloca); 498 if (BaseVariableType == FramePointer) {
499 // Addressing is relative to the frame pointer. Subtract the offset after
500 // adding the size of the alloca, because it grows downwards from the
501 // frame pointer.
502 Offsets.push_back(-(CurrentOffset + Size));
503 } else {
504 // Addressing is relative to the stack pointer or to a user pointer. Add
505 // the offset before adding the size of the object, because it grows
506 // upwards from the stack pointer.
507 Offsets.push_back(CurrentOffset);
508 }
509 // Update the running offset of the fused alloca region.
510 CurrentOffset += Size;
511 }
512 // Round the offset up to the alignment granularity to use as the size.
513 uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment);
514 // Ensure every alloca was assigned an offset.
515 assert(Allocas.size() == Offsets.size());
516 Variable *BaseVariable = makeVariable(IceType_i32);
517 Variable *AllocaDest = BaseVariable;
518 // Emit one addition for each alloca after the first.
519 for (size_t i = 0; i < Allocas.size(); ++i) {
520 auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
521 switch (BaseVariableType) {
522 case FramePointer:
523 case UserPointer: {
524 // Emit a new addition operation to replace the alloca.
525 Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]);
526 InstArithmetic *Add =
527 InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(),
528 BaseVariable, AllocaOffset);
529 Insts.push_front(Add);
530 } break;
531 case StackPointer: {
532 // Emit a fake definition of the rematerializable variable.
533 Variable *Dest = Alloca->getDest();
534 InstFakeDef *Def = InstFakeDef::create(this, Dest);
535 Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]);
536 Insts.push_front(Def);
537 } break;
538 }
474 Alloca->setDeleted(); 539 Alloca->setDeleted();
475 } 540 }
541 Operand *AllocaSize = Ctx->getConstantInt32(TotalSize);
542 switch (BaseVariableType) {
543 case FramePointer: {
544 // Adjust the return of the alloca to the top of the returned region.
545 AllocaDest = makeVariable(IceType_i32);
546 InstArithmetic *Add = InstArithmetic::create(
547 this, InstArithmetic::Add, BaseVariable, AllocaDest, AllocaSize);
548 Insts.push_front(Add);
549 } break;
550 case StackPointer: {
551 // Emit a fake use to keep the Alloca live.
552 InstFakeUse *Use = InstFakeUse::create(this, AllocaDest);
553 Insts.push_front(Use);
554 } break;
555 case UserPointer:
556 break;
557 }
558 // And insert the fused alloca.
559 InstAlloca *CombinedAlloca =
560 InstAlloca::create(this, AllocaSize, CombinedAlignment, AllocaDest);
561 CombinedAlloca->setKnownFrameOffset();
562 Insts.push_front(CombinedAlloca);
476 } 563 }
477 564
478 void Cfg::processAllocas() { 565 void Cfg::processAllocas() {
479 const uint32_t StackAlignment = getTarget()->getStackAlignment(); 566 const uint32_t StackAlignment = getTarget()->getStackAlignment();
480 CfgNode *EntryNode = getEntryNode(); 567 CfgNode *EntryNode = getEntryNode();
481 // Allocas in the entry block that have constant size and alignment less 568 // Allocas in the entry block that have constant size and alignment less
482 // than or equal to the function's stack alignment. 569 // than or equal to the function's stack alignment.
483 CfgVector<Inst *> FixedAllocas; 570 CfgVector<Inst *> FixedAllocas;
484 // Allocas in the entry block that have constant size and alignment greater 571 // Allocas in the entry block that have constant size and alignment greater
485 // than the function's stack alignment. 572 // than the function's stack alignment.
486 CfgVector<Inst *> AlignedAllocas; 573 CfgVector<Inst *> AlignedAllocas;
487 // LLVM enforces power of 2 alignment. 574 // LLVM enforces power of 2 alignment.
488 assert(llvm::isPowerOf2_32(StackAlignment)); 575 assert(llvm::isPowerOf2_32(StackAlignment));
489 // Collect the Allocas into the two vectors. 576 // Determine if there are large alignment allocations in the entry block or
490 bool RequiresFramePointer = false; 577 // dynamic allocations (variable size in the entry block).
578 bool HasLargeAlignment = false;
579 bool HasDynamicAllocation = false;
491 for (Inst &Instr : EntryNode->getInsts()) { 580 for (Inst &Instr : EntryNode->getInsts()) {
492 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { 581 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
493 if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) {
494 // Variable-sized allocations require a frame pointer.
495 RequiresFramePointer = true;
496 continue;
497 }
498 uint32_t AlignmentParam = Alloca->getAlignInBytes(); 582 uint32_t AlignmentParam = Alloca->getAlignInBytes();
499 // For default align=0, set it to the real value 1, to avoid any 583 if (AlignmentParam > StackAlignment)
500 // bit-manipulation problems below. 584 HasLargeAlignment = true;
501 AlignmentParam = std::max(AlignmentParam, 1u); 585 if (!llvm::isa<Constant>(Alloca->getSizeInBytes()))
502 assert(llvm::isPowerOf2_32(AlignmentParam)); 586 HasDynamicAllocation = true;
503 if (AlignmentParam > StackAlignment) {
504 // Allocations aligned more than the stack require a frame pointer.
505 RequiresFramePointer = true;
506 AlignedAllocas.push_back(Alloca);
507 } else
508 FixedAllocas.push_back(Alloca);
509 } 587 }
510 } 588 }
511 // Look for alloca instructions in other blocks 589 // Any alloca outside the entry block is a dynamic allocation.
512 for (CfgNode *Node : Nodes) { 590 for (CfgNode *Node : Nodes) {
513 if (Node == EntryNode) 591 if (Node == EntryNode)
514 continue; 592 continue;
515 for (Inst &Instr : Node->getInsts()) { 593 for (Inst &Instr : Node->getInsts()) {
516 if (llvm::isa<InstAlloca>(&Instr)) { 594 if (llvm::isa<InstAlloca>(&Instr)) {
517 // Allocations outside the entry block require a frame pointer. 595 // Allocations outside the entry block require a frame pointer.
518 RequiresFramePointer = true; 596 HasDynamicAllocation = true;
519 break; 597 break;
520 } 598 }
521 } 599 }
522 if (RequiresFramePointer) 600 if (HasDynamicAllocation)
523 break; 601 break;
524 } 602 }
525 // Mark the target as requiring a frame pointer. 603 // Mark the target as requiring a frame pointer.
526 if (RequiresFramePointer) 604 if (HasLargeAlignment || HasDynamicAllocation)
527 getTarget()->setHasFramePointer(); 605 getTarget()->setHasFramePointer();
606 // Collect the Allocas into the two vectors.
607 // Maximum alignment used for the dynamic/aligned allocas.
608 uint32_t MaxAlignment = StackAlignment;
609 for (Inst &Instr : EntryNode->getInsts()) {
610 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
611 if (!llvm::isa<Constant>(Alloca->getSizeInBytes()))
612 continue;
613 uint32_t AlignmentParam = Alloca->getAlignInBytes();
614 // For default align=0, set it to the real value 1, to avoid any
615 // bit-manipulation problems below.
616 AlignmentParam = std::max(AlignmentParam, 1u);
617 assert(llvm::isPowerOf2_32(AlignmentParam));
618 if (HasDynamicAllocation && AlignmentParam > StackAlignment) {
619 // If we have both dynamic allocations and large stack alignments,
620 // high-alignment allocations are pulled out with their own base.
621 AlignedAllocas.push_back(Alloca);
622 } else
Jim Stichnoth 2015/11/11 17:39:44 } else { (for consistency)
sehr 2015/11/11 22:14:10 Done.
623 FixedAllocas.push_back(Alloca);
624 MaxAlignment = std::max(AlignmentParam, MaxAlignment);
625 }
626 }
528 // Add instructions to the head of the entry block in reverse order. 627 // Add instructions to the head of the entry block in reverse order.
529 InstList &Insts = getEntryNode()->getInsts(); 628 InstList &Insts = getEntryNode()->getInsts();
530 // Fixed, large alignment alloca addresses do not have known offset. 629 if (HasDynamicAllocation && HasLargeAlignment) {
531 sortAllocas(AlignedAllocas, Insts, false); 630 // We are using a frame pointer, but fixed large-alignment alloca addresses,
532 // Fixed, small alignment alloca addresses have known offset. 631 // do not have a known offset from either the stack or frame pointer.
533 sortAllocas(FixedAllocas, Insts, true); 632 // They grow up from a user pointer from an alloca.
633 sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, UserPointer);
634 }
635 // Otherwise, fixed size allocas are always addressed relative to the stack
636 // unless there are dynamic allocas.
637 // TODO(sehr): re-enable frame pointer and decrementing addressing.
638 AllocaBaseVariableType BasePointerType =
639 (HasDynamicAllocation ? UserPointer : StackPointer);
640 sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
534 } 641 }
535 642
536 void Cfg::doAddressOpt() { 643 void Cfg::doAddressOpt() {
537 TimerMarker T(TimerStack::TT_doAddressOpt, this); 644 TimerMarker T(TimerStack::TT_doAddressOpt, this);
538 for (CfgNode *Node : Nodes) 645 for (CfgNode *Node : Nodes)
539 Node->doAddressOpt(); 646 Node->doAddressOpt();
540 } 647 }
541 648
542 void Cfg::doNopInsertion() { 649 void Cfg::doNopInsertion() {
543 if (!Ctx->getFlags().shouldDoNopInsertion()) 650 if (!Ctx->getFlags().shouldDoNopInsertion())
(...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after
895 } 1002 }
896 } 1003 }
897 // Print each basic block 1004 // Print each basic block
898 for (CfgNode *Node : Nodes) 1005 for (CfgNode *Node : Nodes)
899 Node->dump(this); 1006 Node->dump(this);
900 if (isVerbose(IceV_Instructions)) 1007 if (isVerbose(IceV_Instructions))
901 Str << "}\n"; 1008 Str << "}\n";
902 } 1009 }
903 1010
904 } // end of namespace Ice 1011 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698