Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(67)

Side by Side Diff: src/IceCfg.cpp

Issue 2185193002: Enable Local CSE by default (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Address Comments Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 496 matching lines...) Expand 10 before | Expand all | Expand 10 after
507 for (CfgNode *Node : reverse_range(ReversedReachable)) 507 for (CfgNode *Node : reverse_range(ReversedReachable))
508 Shuffled.push_back(Node); 508 Shuffled.push_back(Node);
509 for (CfgNode *Node : Unreachable) 509 for (CfgNode *Node : Unreachable)
510 Shuffled.push_back(Node); 510 Shuffled.push_back(Node);
511 assert(Nodes.size() == Shuffled.size()); 511 assert(Nodes.size() == Shuffled.size());
512 swapNodes(Shuffled); 512 swapNodes(Shuffled);
513 513
514 dump("After basic block shuffling"); 514 dump("After basic block shuffling");
515 } 515 }
516 516
517 void Cfg::localCSE() { 517 void Cfg::localCSE(bool AssumeSSA) {
518 // Performs basic-block local common-subexpression elimination 518 // Performs basic-block local common-subexpression elimination
519 // If we have 519 // If we have
520 // t1 = op b c 520 // t1 = op b c
521 // t2 = op b c 521 // t2 = op b c
522 // This pass will replace future references to t2 in a basic block by t1 522 // This pass will replace future references to t2 in a basic block by t1
523 // Points to note: 523 // Points to note:
524 // 1. Does not assume SSA, but not tested on non-SSA input yet as it is run 524 // 1. Assumes SSA by default. To change this, use -lcse=no-ssa
525 // at the beginning. 525 // This is needed if this pass is moved to a point later in the pipeline.
526 // If variables have a single definition (in the node), CSE can work just
527 // on the basis of an equality compare on instructions (sans Dest). When
528 // variables can be updated (hence, non-SSA) the result of a previous
529 // instruction which used that variable as an operand can not be reused.
526 // 2. Leaves removal of instructions to DCE. 530 // 2. Leaves removal of instructions to DCE.
527 // 3. Only enabled on arithmetic instructions. pnacl-clang (-O2) is expected 531 // 3. Only enabled on arithmetic instructions. pnacl-clang (-O2) is expected
528 // to take care of cases not arising from GEP simplification. 532 // to take care of cases not arising from GEP simplification.
529 // 4. By default, two passes are made over each basic block. Control this 533 // 4. By default, a single pass is made over each basic block. Control this
530 // with -lcse-max-iters=N 534 // with -lcse-max-iters=N
531 535
532 TimerMarker T(TimerStack::TT_localCse, this); 536 TimerMarker T(TimerStack::TT_localCse, this);
533 struct VariableHash { 537 struct VariableHash {
534 size_t operator()(const Variable *Var) const { return Var->hashValue(); } 538 size_t operator()(const Variable *Var) const { return Var->hashValue(); }
535 }; 539 };
536 540
537 struct InstHash { 541 struct InstHash {
538 size_t operator()(const Inst *Instr) const { 542 size_t operator()(const Inst *Instr) const {
539 auto Kind = Instr->getKind(); 543 auto Kind = Instr->getKind();
(...skipping 28 matching lines...) Expand all
568 for (SizeT i = 0; i < InstrA->getSrcSize(); ++i) { 572 for (SizeT i = 0; i < InstrA->getSrcSize(); ++i) {
569 if (!srcEq(InstrA->getSrc(i), InstrB->getSrc(i))) 573 if (!srcEq(InstrA->getSrc(i), InstrB->getSrc(i)))
570 return false; 574 return false;
571 } 575 }
572 return true; 576 return true;
573 } 577 }
574 }; 578 };
575 579
576 for (CfgNode *Node : getNodes()) { 580 for (CfgNode *Node : getNodes()) {
577 CfgUnorderedSet<Inst *, InstHash, InstEq> Seen; 581 CfgUnorderedSet<Inst *, InstHash, InstEq> Seen;
582 // Stores currently available instructions.
578 583
579 CfgUnorderedMap<Variable *, Variable *, VariableHash> Replacements; 584 CfgUnorderedMap<Variable *, Variable *, VariableHash> Replacements;
580 // Combining the above two into a single data structure might consume less 585 // Combining the above two into a single data structure might consume less
581 // memory but will be slower i.e map of Instruction -> Set of Variables 586 // memory but will be slower i.e map of Instruction -> Set of Variables
582 587
583 CfgUnorderedMap<Variable *, std::vector<Inst *>, VariableHash> Dependency; 588 CfgUnorderedMap<Variable *, std::vector<Inst *>, VariableHash> Dependency;
584 // Not necessary for SSA, still keeping it in case this pass is not run at 589 // Maps a variable to the Instructions that depend on it.
585 // the beginning. Remove to improve performace. 590 // a = op1 b c
591 // x = op2 c d
592 // Will result in the map : b -> {a}, c -> {a, x}, d -> {x}
593 // Not necessary for SSA as dependencies will never be invalidated, and the
594 // container will use minimal memory when left unused.
586 595
587 int IterCount = getFlags().getLocalCseMaxIterations(); 596 auto IterCount = getFlags().getLocalCseMaxIterations();
588 597
589 while (IterCount--) { 598 for (SizeT i = 0; i < IterCount; ++i) {
Jim Stichnoth 2016/08/01 20:25:36 I would declare this as uint32_t instead of SizeT,
590 // TODO : Stats on IterCount -> performance 599 // TODO(manasijm): Stats on IterCount -> performance
591 for (Inst &Instr : Node->getInsts()) { 600 for (Inst &Instr : Node->getInsts()) {
592 if (Instr.isDeleted() || !llvm::isa<InstArithmetic>(&Instr)) 601 if (Instr.isDeleted() || !llvm::isa<InstArithmetic>(&Instr))
593 continue; 602 continue;
603 if (!AssumeSSA) {
604 // Invalidate replacements
605 auto Iter = Replacements.find(Instr.getDest());
606 if (Iter != Replacements.end()) {
607 Replacements.erase(Iter);
608 }
594 609
595 // Invalidate replacements 610 // Invalidate 'seen' instructions whose operands were just updated.
596 auto Iter = Replacements.find(Instr.getDest()); 611 auto DepIter = Dependency.find(Instr.getDest());
597 if (Iter != Replacements.end()) { 612 if (DepIter != Dependency.end()) {
598 Replacements.erase(Iter); 613 for (auto *DepInst : DepIter->second) {
614 Seen.erase(DepInst);
615 }
616 }
599 } 617 }
600 618
601 // Invalidate 'seen' instructions whose operands were just updated.
602 auto DepIter = Dependency.find(Instr.getDest());
603 if (DepIter != Dependency.end()) {
604 for (auto DepInst : DepIter->second) {
605 Seen.erase(DepInst);
606 }
607 }
608 // The above two can be removed if SSA is assumed.
609
610 // Replace - doing this before checking for repetitions might enable 619 // Replace - doing this before checking for repetitions might enable
611 // more 620 // more optimizations
612 // optimizations
613 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { 621 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) {
614 auto *Opnd = Instr.getSrc(i); 622 auto *Opnd = Instr.getSrc(i);
615 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { 623 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) {
616 if (Replacements.find(Var) != Replacements.end()) { 624 if (Replacements.find(Var) != Replacements.end()) {
617 Instr.replaceSource(i, Replacements[Var]); 625 Instr.replaceSource(i, Replacements[Var]);
618 } 626 }
619 } 627 }
620 } 628 }
621 629
622 // Check for repetitions 630 // Check for repetitions
623 auto SeenIter = Seen.find(&Instr); 631 auto SeenIter = Seen.find(&Instr);
624 if (SeenIter != Seen.end()) { // seen before 632 if (SeenIter != Seen.end()) { // seen before
625 const Inst *Found = *SeenIter; 633 const Inst *Found = *SeenIter;
626 Replacements[Instr.getDest()] = Found->getDest(); 634 Replacements[Instr.getDest()] = Found->getDest();
627 } else { // new 635 } else { // new
628 Seen.insert(&Instr); 636 Seen.insert(&Instr);
629 637
630 // Update dependencies 638 if (!AssumeSSA) {
631 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { 639 // Update dependencies
632 auto *Opnd = Instr.getSrc(i); 640 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) {
633 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { 641 auto *Opnd = Instr.getSrc(i);
634 Dependency[Var].push_back(&Instr); 642 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) {
643 Dependency[Var].push_back(&Instr);
644 }
635 } 645 }
636 } 646 }
637 } 647 }
638 } 648 }
639 } 649 }
640 } 650 }
641 } 651 }
642 652
643 void Cfg::loopInvariantCodeMotion() { 653 void Cfg::loopInvariantCodeMotion() {
644 TimerMarker T(TimerStack::TT_loopInvariantCodeMotion, this); 654 TimerMarker T(TimerStack::TT_loopInvariantCodeMotion, this);
(...skipping 1122 matching lines...) Expand 10 before | Expand all | Expand 10 after
1767 } 1777 }
1768 } 1778 }
1769 // Print each basic block 1779 // Print each basic block
1770 for (CfgNode *Node : Nodes) 1780 for (CfgNode *Node : Nodes)
1771 Node->dump(this); 1781 Node->dump(this);
1772 if (isVerbose(IceV_Instructions)) 1782 if (isVerbose(IceV_Instructions))
1773 Str << "}\n"; 1783 Str << "}\n";
1774 } 1784 }
1775 1785
1776 } // end of namespace Ice 1786 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceCfg.h ('k') | src/IceClFlags.def » ('j') | src/IceClFlags.def » ('J')

Powered by Google App Engine
This is Rietveld 408576698