Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// | 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 496 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 507 for (CfgNode *Node : reverse_range(ReversedReachable)) | 507 for (CfgNode *Node : reverse_range(ReversedReachable)) |
| 508 Shuffled.push_back(Node); | 508 Shuffled.push_back(Node); |
| 509 for (CfgNode *Node : Unreachable) | 509 for (CfgNode *Node : Unreachable) |
| 510 Shuffled.push_back(Node); | 510 Shuffled.push_back(Node); |
| 511 assert(Nodes.size() == Shuffled.size()); | 511 assert(Nodes.size() == Shuffled.size()); |
| 512 swapNodes(Shuffled); | 512 swapNodes(Shuffled); |
| 513 | 513 |
| 514 dump("After basic block shuffling"); | 514 dump("After basic block shuffling"); |
| 515 } | 515 } |
| 516 | 516 |
| 517 void Cfg::localCSE() { | 517 void Cfg::localCSE(bool AssumeSSA) { |
| 518 // Performs basic-block local common-subexpression elimination | 518 // Performs basic-block local common-subexpression elimination |
| 519 // If we have | 519 // If we have |
| 520 // t1 = op b c | 520 // t1 = op b c |
| 521 // t2 = op b c | 521 // t2 = op b c |
| 522 // This pass will replace future references to t2 in a basic block by t1 | 522 // This pass will replace future references to t2 in a basic block by t1 |
| 523 // Points to note: | 523 // Points to note: |
| 524 // 1. Does not assume SSA, but not tested on non-SSA input yet as it is run | 524 // 1. Assumes SSA by default. To change this, use -lcse=no-ssa |
| 525 // at the beginning. | 525 // This is needed if this pass is moved to a point later in the pipeline. |
| 526 // If variables have a single definition (in the node), CSE can work just | |
| 527 // on the basis of an equality compare on instructions (sans Dest). When | |
| 528 // variables can be updated (hence, non-SSA) the result of a previous | |
| 529 // instruction which used that variable as an operand can not be reused. | |
| 526 // 2. Leaves removal of instructions to DCE. | 530 // 2. Leaves removal of instructions to DCE. |
| 527 // 3. Only enabled on arithmetic instructions. pnacl-clang (-O2) is expected | 531 // 3. Only enabled on arithmetic instructions. pnacl-clang (-O2) is expected |
| 528 // to take care of cases not arising from GEP simplification. | 532 // to take care of cases not arising from GEP simplification. |
| 529 // 4. By default, two passes are made over each basic block. Control this | 533 // 4. By default, a single pass is made over each basic block. Control this |
| 530 // with -lcse-max-iters=N | 534 // with -lcse-max-iters=N |
| 531 | 535 |
| 532 TimerMarker T(TimerStack::TT_localCse, this); | 536 TimerMarker T(TimerStack::TT_localCse, this); |
| 533 struct VariableHash { | 537 struct VariableHash { |
| 534 size_t operator()(const Variable *Var) const { return Var->hashValue(); } | 538 size_t operator()(const Variable *Var) const { return Var->hashValue(); } |
| 535 }; | 539 }; |
| 536 | 540 |
| 537 struct InstHash { | 541 struct InstHash { |
| 538 size_t operator()(const Inst *Instr) const { | 542 size_t operator()(const Inst *Instr) const { |
| 539 auto Kind = Instr->getKind(); | 543 auto Kind = Instr->getKind(); |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 568 for (SizeT i = 0; i < InstrA->getSrcSize(); ++i) { | 572 for (SizeT i = 0; i < InstrA->getSrcSize(); ++i) { |
| 569 if (!srcEq(InstrA->getSrc(i), InstrB->getSrc(i))) | 573 if (!srcEq(InstrA->getSrc(i), InstrB->getSrc(i))) |
| 570 return false; | 574 return false; |
| 571 } | 575 } |
| 572 return true; | 576 return true; |
| 573 } | 577 } |
| 574 }; | 578 }; |
| 575 | 579 |
| 576 for (CfgNode *Node : getNodes()) { | 580 for (CfgNode *Node : getNodes()) { |
| 577 CfgUnorderedSet<Inst *, InstHash, InstEq> Seen; | 581 CfgUnorderedSet<Inst *, InstHash, InstEq> Seen; |
| 582 // Stores currently available instructions. | |
| 578 | 583 |
| 579 CfgUnorderedMap<Variable *, Variable *, VariableHash> Replacements; | 584 CfgUnorderedMap<Variable *, Variable *, VariableHash> Replacements; |
| 580 // Combining the above two into a single data structure might consume less | 585 // Combining the above two into a single data structure might consume less |
| 581 // memory but will be slower i.e map of Instruction -> Set of Variables | 586 // memory but will be slower i.e map of Instruction -> Set of Variables |
| 582 | 587 |
| 583 CfgUnorderedMap<Variable *, std::vector<Inst *>, VariableHash> Dependency; | 588 CfgUnorderedMap<Variable *, std::vector<Inst *>, VariableHash> Dependency; |
| 584 // Not necessary for SSA, still keeping it in case this pass is not run at | 589 // Maps a variable to the Instructions that depend on it. |
| 585 // the beginning. Remove to improve performace. | 590 // a = op1 b c |
| 591 // x = op2 c d | |
| 592 // Will result in the map : b -> {a}, c -> {a, x}, d -> {x} | |
| 593 // Not necessary for SSA as dependencies will never be invalidated, and the | |
| 594 // container will use minimal memory when left unused. | |
| 586 | 595 |
| 587 int IterCount = getFlags().getLocalCseMaxIterations(); | 596 auto IterCount = getFlags().getLocalCseMaxIterations(); |
| 588 | 597 |
| 589 while (IterCount--) { | 598 for (SizeT i = 0; i < IterCount; ++i) { |
|
Jim Stichnoth
2016/08/01 20:25:36
I would declare this as uint32_t instead of SizeT,
| |
| 590 // TODO : Stats on IterCount -> performance | 599 // TODO(manasijm): Stats on IterCount -> performance |
| 591 for (Inst &Instr : Node->getInsts()) { | 600 for (Inst &Instr : Node->getInsts()) { |
| 592 if (Instr.isDeleted() || !llvm::isa<InstArithmetic>(&Instr)) | 601 if (Instr.isDeleted() || !llvm::isa<InstArithmetic>(&Instr)) |
| 593 continue; | 602 continue; |
| 603 if (!AssumeSSA) { | |
| 604 // Invalidate replacements | |
| 605 auto Iter = Replacements.find(Instr.getDest()); | |
| 606 if (Iter != Replacements.end()) { | |
| 607 Replacements.erase(Iter); | |
| 608 } | |
| 594 | 609 |
| 595 // Invalidate replacements | 610 // Invalidate 'seen' instructions whose operands were just updated. |
| 596 auto Iter = Replacements.find(Instr.getDest()); | 611 auto DepIter = Dependency.find(Instr.getDest()); |
| 597 if (Iter != Replacements.end()) { | 612 if (DepIter != Dependency.end()) { |
| 598 Replacements.erase(Iter); | 613 for (auto *DepInst : DepIter->second) { |
| 614 Seen.erase(DepInst); | |
| 615 } | |
| 616 } | |
| 599 } | 617 } |
| 600 | 618 |
| 601 // Invalidate 'seen' instructions whose operands were just updated. | |
| 602 auto DepIter = Dependency.find(Instr.getDest()); | |
| 603 if (DepIter != Dependency.end()) { | |
| 604 for (auto DepInst : DepIter->second) { | |
| 605 Seen.erase(DepInst); | |
| 606 } | |
| 607 } | |
| 608 // The above two can be removed if SSA is assumed. | |
| 609 | |
| 610 // Replace - doing this before checking for repetitions might enable | 619 // Replace - doing this before checking for repetitions might enable |
| 611 // more | 620 // more optimizations |
| 612 // optimizations | |
| 613 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { | 621 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { |
| 614 auto *Opnd = Instr.getSrc(i); | 622 auto *Opnd = Instr.getSrc(i); |
| 615 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { | 623 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { |
| 616 if (Replacements.find(Var) != Replacements.end()) { | 624 if (Replacements.find(Var) != Replacements.end()) { |
| 617 Instr.replaceSource(i, Replacements[Var]); | 625 Instr.replaceSource(i, Replacements[Var]); |
| 618 } | 626 } |
| 619 } | 627 } |
| 620 } | 628 } |
| 621 | 629 |
| 622 // Check for repetitions | 630 // Check for repetitions |
| 623 auto SeenIter = Seen.find(&Instr); | 631 auto SeenIter = Seen.find(&Instr); |
| 624 if (SeenIter != Seen.end()) { // seen before | 632 if (SeenIter != Seen.end()) { // seen before |
| 625 const Inst *Found = *SeenIter; | 633 const Inst *Found = *SeenIter; |
| 626 Replacements[Instr.getDest()] = Found->getDest(); | 634 Replacements[Instr.getDest()] = Found->getDest(); |
| 627 } else { // new | 635 } else { // new |
| 628 Seen.insert(&Instr); | 636 Seen.insert(&Instr); |
| 629 | 637 |
| 630 // Update dependencies | 638 if (!AssumeSSA) { |
| 631 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { | 639 // Update dependencies |
| 632 auto *Opnd = Instr.getSrc(i); | 640 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { |
| 633 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { | 641 auto *Opnd = Instr.getSrc(i); |
| 634 Dependency[Var].push_back(&Instr); | 642 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { |
| 643 Dependency[Var].push_back(&Instr); | |
| 644 } | |
| 635 } | 645 } |
| 636 } | 646 } |
| 637 } | 647 } |
| 638 } | 648 } |
| 639 } | 649 } |
| 640 } | 650 } |
| 641 } | 651 } |
| 642 | 652 |
| 643 void Cfg::loopInvariantCodeMotion() { | 653 void Cfg::loopInvariantCodeMotion() { |
| 644 TimerMarker T(TimerStack::TT_loopInvariantCodeMotion, this); | 654 TimerMarker T(TimerStack::TT_loopInvariantCodeMotion, this); |
| (...skipping 1122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1767 } | 1777 } |
| 1768 } | 1778 } |
| 1769 // Print each basic block | 1779 // Print each basic block |
| 1770 for (CfgNode *Node : Nodes) | 1780 for (CfgNode *Node : Nodes) |
| 1771 Node->dump(this); | 1781 Node->dump(this); |
| 1772 if (isVerbose(IceV_Instructions)) | 1782 if (isVerbose(IceV_Instructions)) |
| 1773 Str << "}\n"; | 1783 Str << "}\n"; |
| 1774 } | 1784 } |
| 1775 | 1785 |
| 1776 } // end of namespace Ice | 1786 } // end of namespace Ice |
| OLD | NEW |