Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(147)

Side by Side Diff: src/IceCfg.cpp

Issue 1411583007: Combine allocas (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Finish fast path. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceCfg.h ('k') | src/IceInstX8632.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
194 addCallToProfileSummary(); 194 addCallToProfileSummary();
195 } 195 }
196 dump("Profiled CFG"); 196 dump("Profiled CFG");
197 } 197 }
198 198
199 // Create the Hi and Lo variables where a split was needed 199 // Create the Hi and Lo variables where a split was needed
200 for (Variable *Var : Variables) 200 for (Variable *Var : Variables)
201 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) 201 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var))
202 Var64On32->initHiLo(this); 202 Var64On32->initHiLo(this);
203 203
204 processAllocas();
205
206 // The set of translation passes and their order are determined by the 204 // The set of translation passes and their order are determined by the
207 // target. 205 // target.
208 getTarget()->translate(); 206 getTarget()->translate();
209 207
210 dump("Final output"); 208 dump("Final output");
211 if (getFocusedTiming()) 209 if (getFocusedTiming())
212 getContext()->dumpTimers(); 210 getContext()->dumpTimers();
213 } 211 }
214 212
215 void Cfg::computeInOutEdges() { 213 void Cfg::computeInOutEdges() {
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after
444 swapNodes(Shuffled); 442 swapNodes(Shuffled);
445 443
446 dump("After basic block shuffling"); 444 dump("After basic block shuffling");
447 } 445 }
448 446
449 void Cfg::doArgLowering() { 447 void Cfg::doArgLowering() {
450 TimerMarker T(TimerStack::TT_doArgLowering, this); 448 TimerMarker T(TimerStack::TT_doArgLowering, this);
451 getTarget()->lowerArguments(); 449 getTarget()->lowerArguments();
452 } 450 }
453 451
454 void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts, 452 void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
455 bool IsKnownFrameOffset) { 453 uint32_t CombinedAlignment, InstList &Insts,
454 AllocaBaseVariableType BaseVariableType) {
456 if (Allocas.empty()) 455 if (Allocas.empty())
457 return; 456 return;
458 // Sort by decreasing alignment. This does not really matter at the moment, 457 // Sort by decreasing alignment.
459 // but will allow compacting stack allocation when we fuse to one alloca.
460 std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) { 458 std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) {
461 auto *A1 = llvm::dyn_cast<InstAlloca>(I1); 459 auto *A1 = llvm::dyn_cast<InstAlloca>(I1);
462 auto *A2 = llvm::dyn_cast<InstAlloca>(I2); 460 auto *A2 = llvm::dyn_cast<InstAlloca>(I2);
463 return A1->getAlignInBytes() > A2->getAlignInBytes(); 461 return A1->getAlignInBytes() > A2->getAlignInBytes();
464 }); 462 });
463 // Process the allocas in order of decreasing stack alignment. This allows
464 // us to pack less-aligned pieces after more-aligned ones, resulting in less
465 // stack growth. It also allows there to be at most one stack alignment "and"
466 // instruction for a whole list of allocas.
467 uint32_t CurrentOffset = 0;
468 CfgVector<int32_t> Offsets;
465 for (Inst *Instr : Allocas) { 469 for (Inst *Instr : Allocas) {
466 auto *Alloca = llvm::cast<InstAlloca>(Instr); 470 auto *Alloca = llvm::cast<InstAlloca>(Instr);
467 // Move the alloca to its sorted position. 471 // Adjust the size of the allocation up to the next multiple of the
468 InstAlloca *NewAlloca = 472 // object's alignment.
469 InstAlloca::create(this, Alloca->getSizeInBytes(), 473 uint32_t Alignment = std::max(Alloca->getAlignInBytes(), 1u);
470 Alloca->getAlignInBytes(), Alloca->getDest()); 474 auto *ConstSize =
471 if (IsKnownFrameOffset) 475 llvm::dyn_cast<ConstantInteger32>(Alloca->getSizeInBytes());
472 NewAlloca->setKnownFrameOffset(); 476 uint32_t Size = Utils::applyAlignment(ConstSize->getValue(), Alignment);
473 Insts.push_front(NewAlloca); 477 if (BaseVariableType == BVT_FramePointer) {
478 // Addressing is relative to the frame pointer. Subtract the offset after
479 // adding the size of the alloca, because it grows downwards from the
480 // frame pointer.
481 Offsets.push_back(-(CurrentOffset + Size));
482 } else {
483 // Addressing is relative to the stack pointer or to a user pointer. Add
484 // the offset before adding the size of the object, because it grows
485 // upwards from the stack pointer.
486 Offsets.push_back(CurrentOffset);
487 }
488 // Update the running offset of the fused alloca region.
489 CurrentOffset += Size;
490 }
491 // Round the offset up to the alignment granularity to use as the size.
492 uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment);
493 // Ensure every alloca was assigned an offset.
494 assert(Allocas.size() == Offsets.size());
495 Variable *BaseVariable = makeVariable(IceType_i32);
496 Variable *AllocaDest = BaseVariable;
497 // Emit one addition for each alloca after the first.
498 for (size_t i = 0; i < Allocas.size(); ++i) {
499 auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
500 switch (BaseVariableType) {
501 case BVT_FramePointer:
502 case BVT_UserPointer: {
503 // Emit a new addition operation to replace the alloca.
504 Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]);
505 InstArithmetic *Add =
506 InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(),
507 BaseVariable, AllocaOffset);
508 Insts.push_front(Add);
509 } break;
510 case BVT_StackPointer: {
511 // Emit a fake definition of the rematerializable variable.
512 Variable *Dest = Alloca->getDest();
513 InstFakeDef *Def = InstFakeDef::create(this, Dest);
514 Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]);
515 Insts.push_front(Def);
516 } break;
517 }
474 Alloca->setDeleted(); 518 Alloca->setDeleted();
475 } 519 }
520 Operand *AllocaSize = Ctx->getConstantInt32(TotalSize);
521 switch (BaseVariableType) {
522 case BVT_FramePointer: {
523 // Adjust the return of the alloca to the top of the returned region.
524 AllocaDest = makeVariable(IceType_i32);
525 InstArithmetic *Add = InstArithmetic::create(
526 this, InstArithmetic::Add, BaseVariable, AllocaDest, AllocaSize);
527 Insts.push_front(Add);
528 } break;
529 case BVT_StackPointer: {
530 // Emit a fake use to keep the Alloca live.
531 InstFakeUse *Use = InstFakeUse::create(this, AllocaDest);
532 Insts.push_front(Use);
533 } break;
534 case BVT_UserPointer:
535 break;
536 }
537 // And insert the fused alloca.
538 InstAlloca *CombinedAlloca =
539 InstAlloca::create(this, AllocaSize, CombinedAlignment, AllocaDest);
540 CombinedAlloca->setKnownFrameOffset();
541 Insts.push_front(CombinedAlloca);
476 } 542 }
477 543
478 void Cfg::processAllocas() { 544 void Cfg::processAllocas(bool SortAndCombine) {
479 const uint32_t StackAlignment = getTarget()->getStackAlignment(); 545 const uint32_t StackAlignment = getTarget()->getStackAlignment();
480 CfgNode *EntryNode = getEntryNode(); 546 CfgNode *EntryNode = getEntryNode();
547 // LLVM enforces power of 2 alignment.
548 assert(llvm::isPowerOf2_32(StackAlignment));
549 // Determine if there are large alignment allocations in the entry block or
550 // dynamic allocations (variable size in the entry block).
551 bool HasLargeAlignment = false;
552 bool HasDynamicAllocation = false;
553 for (Inst &Instr : EntryNode->getInsts()) {
554 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
555 uint32_t AlignmentParam = Alloca->getAlignInBytes();
556 if (AlignmentParam > StackAlignment)
557 HasLargeAlignment = true;
558 if (llvm::isa<Constant>(Alloca->getSizeInBytes()))
559 Alloca->setKnownFrameOffset();
560 else
561 HasDynamicAllocation = true;
562 }
563 }
564 // Don't do the heavyweight sorting and layout for low optimization levels.
565 if (!SortAndCombine)
566 return;
567 // Any alloca outside the entry block is a dynamic allocation.
568 for (CfgNode *Node : Nodes) {
569 if (Node == EntryNode)
570 continue;
571 for (Inst &Instr : Node->getInsts()) {
572 if (llvm::isa<InstAlloca>(&Instr)) {
573 // Allocations outside the entry block require a frame pointer.
574 HasDynamicAllocation = true;
575 break;
576 }
577 }
578 if (HasDynamicAllocation)
579 break;
580 }
581 // Mark the target as requiring a frame pointer.
582 if (HasLargeAlignment || HasDynamicAllocation)
583 getTarget()->setHasFramePointer();
584 // Collect the Allocas into the two vectors.
481 // Allocas in the entry block that have constant size and alignment less 585 // Allocas in the entry block that have constant size and alignment less
482 // than or equal to the function's stack alignment. 586 // than or equal to the function's stack alignment.
483 CfgVector<Inst *> FixedAllocas; 587 CfgVector<Inst *> FixedAllocas;
484 // Allocas in the entry block that have constant size and alignment greater 588 // Allocas in the entry block that have constant size and alignment greater
485 // than the function's stack alignment. 589 // than the function's stack alignment.
486 CfgVector<Inst *> AlignedAllocas; 590 CfgVector<Inst *> AlignedAllocas;
487 // LLVM enforces power of 2 alignment. 591 // Maximum alignment used for the dynamic/aligned allocas.
488 assert(llvm::isPowerOf2_32(StackAlignment)); 592 uint32_t MaxAlignment = StackAlignment;
489 // Collect the Allocas into the two vectors.
490 bool RequiresFramePointer = false;
491 for (Inst &Instr : EntryNode->getInsts()) { 593 for (Inst &Instr : EntryNode->getInsts()) {
492 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) { 594 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
493 if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) { 595 if (!llvm::isa<Constant>(Alloca->getSizeInBytes()))
494 // Variable-sized allocations require a frame pointer.
495 RequiresFramePointer = true;
496 continue; 596 continue;
497 }
498 uint32_t AlignmentParam = Alloca->getAlignInBytes(); 597 uint32_t AlignmentParam = Alloca->getAlignInBytes();
499 // For default align=0, set it to the real value 1, to avoid any 598 // For default align=0, set it to the real value 1, to avoid any
500 // bit-manipulation problems below. 599 // bit-manipulation problems below.
501 AlignmentParam = std::max(AlignmentParam, 1u); 600 AlignmentParam = std::max(AlignmentParam, 1u);
502 assert(llvm::isPowerOf2_32(AlignmentParam)); 601 assert(llvm::isPowerOf2_32(AlignmentParam));
503 if (AlignmentParam > StackAlignment) { 602 if (HasDynamicAllocation && AlignmentParam > StackAlignment) {
504 // Allocations aligned more than the stack require a frame pointer. 603 // If we have both dynamic allocations and large stack alignments,
505 RequiresFramePointer = true; 604 // high-alignment allocations are pulled out with their own base.
506 AlignedAllocas.push_back(Alloca); 605 AlignedAllocas.push_back(Alloca);
507 } else 606 } else {
508 FixedAllocas.push_back(Alloca); 607 FixedAllocas.push_back(Alloca);
608 }
609 MaxAlignment = std::max(AlignmentParam, MaxAlignment);
509 } 610 }
510 } 611 }
511 // Look for alloca instructions in other blocks
512 for (CfgNode *Node : Nodes) {
513 if (Node == EntryNode)
514 continue;
515 for (Inst &Instr : Node->getInsts()) {
516 if (llvm::isa<InstAlloca>(&Instr)) {
517 // Allocations outside the entry block require a frame pointer.
518 RequiresFramePointer = true;
519 break;
520 }
521 }
522 if (RequiresFramePointer)
523 break;
524 }
525 // Mark the target as requiring a frame pointer.
526 if (RequiresFramePointer)
527 getTarget()->setHasFramePointer();
528 // Add instructions to the head of the entry block in reverse order. 612 // Add instructions to the head of the entry block in reverse order.
529 InstList &Insts = getEntryNode()->getInsts(); 613 InstList &Insts = getEntryNode()->getInsts();
530 // Fixed, large alignment alloca addresses do not have known offset. 614 if (HasDynamicAllocation && HasLargeAlignment) {
531 sortAllocas(AlignedAllocas, Insts, false); 615 // We are using a frame pointer, but fixed large-alignment alloca addresses,
532 // Fixed, small alignment alloca addresses have known offset. 616 // do not have a known offset from either the stack or frame pointer.
533 sortAllocas(FixedAllocas, Insts, true); 617 // They grow up from a user pointer from an alloca.
618 sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, BVT_UserPointer);
619 }
620 // Otherwise, fixed size allocas are always addressed relative to the stack
621 // unless there are dynamic allocas.
622 // TODO(sehr): re-enable frame pointer and decrementing addressing.
623 AllocaBaseVariableType BasePointerType =
624 (HasDynamicAllocation ? BVT_UserPointer : BVT_StackPointer);
625 sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
534 } 626 }
535 627
536 void Cfg::doAddressOpt() { 628 void Cfg::doAddressOpt() {
537 TimerMarker T(TimerStack::TT_doAddressOpt, this); 629 TimerMarker T(TimerStack::TT_doAddressOpt, this);
538 for (CfgNode *Node : Nodes) 630 for (CfgNode *Node : Nodes)
539 Node->doAddressOpt(); 631 Node->doAddressOpt();
540 } 632 }
541 633
542 void Cfg::doNopInsertion() { 634 void Cfg::doNopInsertion() {
543 if (!Ctx->getFlags().shouldDoNopInsertion()) 635 if (!Ctx->getFlags().shouldDoNopInsertion())
(...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after
895 } 987 }
896 } 988 }
897 // Print each basic block 989 // Print each basic block
898 for (CfgNode *Node : Nodes) 990 for (CfgNode *Node : Nodes)
899 Node->dump(this); 991 Node->dump(this);
900 if (isVerbose(IceV_Instructions)) 992 if (isVerbose(IceV_Instructions))
901 Str << "}\n"; 993 Str << "}\n";
902 } 994 }
903 995
904 } // end of namespace Ice 996 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceCfg.h ('k') | src/IceInstX8632.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698