Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(537)

Side by Side Diff: src/IceCfg.cpp

Issue 1414343010: Sort allocas, compute frame pointer in Cfg pass (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Cut over to sorted alloca for computing frame pointer Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceCfg.h ('k') | src/IceTargetLowering.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
194 addCallToProfileSummary(); 194 addCallToProfileSummary();
195 } 195 }
196 dump("Profiled CFG"); 196 dump("Profiled CFG");
197 } 197 }
198 198
199 // Create the Hi and Lo variables where a split was needed 199 // Create the Hi and Lo variables where a split was needed
200 for (Variable *Var : Variables) 200 for (Variable *Var : Variables)
201 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) 201 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var))
202 Var64On32->initHiLo(this); 202 Var64On32->initHiLo(this);
203 203
204 // Figure out which alloca instructions result in storage at known stack frame 204 processAllocas();
205 // offsets. If this is true for all alloca instructions, then a stack pointer
206 // can still be used instead of a frame pointer, freeing up the frame pointer
207 // for normal register allocation. Additionally, for each such alloca, its
208 // address could be rematerialized at each use in terms of the stack/frame
209 // pointer, saving a stack slot and a load from that stack slot.
210 //
211 // This simple implementation is limited to alloca instructions at the start
212 // of the entry node.
213 for (Inst &Instr : getEntryNode()->getInsts()) {
214 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
215 if (llvm::isa<Constant>(Alloca->getSizeInBytes())) {
216 Alloca->setKnownFrameOffset();
217 continue;
218 }
219 }
220 // The first instruction that is not an alloca with a constant size stops
221 // the search.
222 break;
223 }
224 205
225 // The set of translation passes and their order are determined by the 206 // The set of translation passes and their order are determined by the
226 // target. 207 // target.
227 getTarget()->translate(); 208 getTarget()->translate();
228 209
229 dump("Final output"); 210 dump("Final output");
230 if (getFocusedTiming()) 211 if (getFocusedTiming())
231 getContext()->dumpTimers(); 212 getContext()->dumpTimers();
232 } 213 }
233 214
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after
463 swapNodes(Shuffled); 444 swapNodes(Shuffled);
464 445
465 dump("After basic block shuffling"); 446 dump("After basic block shuffling");
466 } 447 }
467 448
468 void Cfg::doArgLowering() { 449 void Cfg::doArgLowering() {
469 TimerMarker T(TimerStack::TT_doArgLowering, this); 450 TimerMarker T(TimerStack::TT_doArgLowering, this);
470 getTarget()->lowerArguments(); 451 getTarget()->lowerArguments();
471 } 452 }
472 453
454 void Cfg::sortAllocas(std::vector<Inst *> &Allocas, InstList &Insts,
Jim Stichnoth 2015/11/06 18:44:11 Use CfgVector instead of std::vector here and belo
sehr 2015/11/06 19:01:15 Done.
455 bool IsKnownFrameOffset) {
456 if (Allocas.size() == 0)
Jim Stichnoth 2015/11/06 18:44:11 .empty()
sehr 2015/11/06 19:01:15 Done.
457 return;
458 // Sort by decreasing alignment.
Jim Stichnoth 2015/11/06 18:45:27 This is so that they get prepended in order of inc
sehr 2015/11/06 19:01:15 It's sort of arbitrary in the general sense here,
459 std::sort(Allocas.begin(), Allocas.end(),
460 [](Inst *I1, Inst *I2) {
461 auto *A1 = llvm::dyn_cast<InstAlloca>(I1);
462 auto *A2 = llvm::dyn_cast<InstAlloca>(I2);
463 return A1->getAlignInBytes() > A2->getAlignInBytes();
464 });
465 for (Inst *Instr: Allocas) {
466 auto *Alloca = llvm::cast<InstAlloca>(Instr);
467 // Move the alloca to its sorted position.
468 InstAlloca *NewAlloca = InstAlloca::create(this,
469 Alloca->getSizeInBytes(),
470 Alloca->getAlignInBytes(),
471 Alloca->getDest());
472 if (IsKnownFrameOffset)
473 NewAlloca->setKnownFrameOffset();
474 Insts.push_front(NewAlloca);
475 Alloca->setDeleted();
476 }
477 }
478
479 void Cfg::processAllocas() {
480 const uint32_t StackAlignment = getTarget()->getStackAlignment();
481 CfgNode *EntryNode = getEntryNode();
482 // Allocas in the entry block that have constant size and alignment less
483 // than or equal to the function's stack alignment.
484 std::vector<Inst *> FixedAllocas;
485 // Allocas in the entry block that have constant size and alignment greater
486 // than the function's stack alignment.
487 std::vector<Inst *> AlignedAllocas;
488 // LLVM enforces power of 2 alignment.
489 assert(llvm::isPowerOf2_32(StackAlignment));
490 // Collect the Allocas into the two vectors.
491 bool RequiresFramePointer = false;
492 for (Inst &Instr : EntryNode->getInsts()) {
493 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
494 if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) {
495 // Variable-sized allocations require a frame pointer.
496 RequiresFramePointer = true;
497 continue;
498 }
499 uint32_t AlignmentParam = Alloca->getAlignInBytes();
500 // For default align=0, set it to the real value 1, to avoid any
501 // bit-manipulation problems below.
502 AlignmentParam = std::max(AlignmentParam, 1u);
503 assert(llvm::isPowerOf2_32(AlignmentParam));
504 if (AlignmentParam > StackAlignment) {
505 // Allocations aligned more than the stack require a frame pointer.
506 RequiresFramePointer = true;
507 AlignedAllocas.push_back(Alloca);
508 }
509 else
510 FixedAllocas.push_back(Alloca);
511 }
512 }
513 // Look for alloca instructions in other blocks
514 for (CfgNode *Node : Nodes) {
515 if (Node == EntryNode)
516 continue;
517 for (Inst &Instr : Node->getInsts()) {
518 if (llvm::isa<InstAlloca>(&Instr)) {
519 // Allocations outside the entry block require a frame pointer.
520 RequiresFramePointer = true;
521 break;
522 }
523 }
524 if (RequiresFramePointer)
525 break;
526 }
527 // Mark the target as requiring a frame pointer.
528 if (RequiresFramePointer)
529 getTarget()->setHasFramePointer();
530 // Add instructions to the head of the entry block in reverse order.
531 InstList &Insts = getEntryNode()->getInsts();
532 // Fixed, large alignment alloca addresses do not have known offset.
533 sortAllocas(AlignedAllocas, Insts, false);
534 // Fixed, small alignment alloca addresses have known offset.
535 sortAllocas(FixedAllocas, Insts, true);
536 }
537
473 void Cfg::doAddressOpt() { 538 void Cfg::doAddressOpt() {
474 TimerMarker T(TimerStack::TT_doAddressOpt, this); 539 TimerMarker T(TimerStack::TT_doAddressOpt, this);
475 for (CfgNode *Node : Nodes) 540 for (CfgNode *Node : Nodes)
476 Node->doAddressOpt(); 541 Node->doAddressOpt();
477 } 542 }
478 543
479 void Cfg::doNopInsertion() { 544 void Cfg::doNopInsertion() {
480 if (!Ctx->getFlags().shouldDoNopInsertion()) 545 if (!Ctx->getFlags().shouldDoNopInsertion())
481 return; 546 return;
482 TimerMarker T(TimerStack::TT_doNopInsertion, this); 547 TimerMarker T(TimerStack::TT_doNopInsertion, this);
(...skipping 349 matching lines...) Expand 10 before | Expand all | Expand 10 after
832 } 897 }
833 } 898 }
834 // Print each basic block 899 // Print each basic block
835 for (CfgNode *Node : Nodes) 900 for (CfgNode *Node : Nodes)
836 Node->dump(this); 901 Node->dump(this);
837 if (isVerbose(IceV_Instructions)) 902 if (isVerbose(IceV_Instructions))
838 Str << "}\n"; 903 Str << "}\n";
839 } 904 }
840 905
841 } // end of namespace Ice 906 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceCfg.h ('k') | src/IceTargetLowering.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698