OLD | NEW |
---|---|
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// | 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 496 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
507 for (CfgNode *Node : reverse_range(ReversedReachable)) | 507 for (CfgNode *Node : reverse_range(ReversedReachable)) |
508 Shuffled.push_back(Node); | 508 Shuffled.push_back(Node); |
509 for (CfgNode *Node : Unreachable) | 509 for (CfgNode *Node : Unreachable) |
510 Shuffled.push_back(Node); | 510 Shuffled.push_back(Node); |
511 assert(Nodes.size() == Shuffled.size()); | 511 assert(Nodes.size() == Shuffled.size()); |
512 swapNodes(Shuffled); | 512 swapNodes(Shuffled); |
513 | 513 |
514 dump("After basic block shuffling"); | 514 dump("After basic block shuffling"); |
515 } | 515 } |
516 | 516 |
517 void Cfg::localCSE() { | 517 void Cfg::localCSE(bool AssumeSSA) { |
518 // Performs basic-block local common-subexpression elimination | 518 // Performs basic-block local common-subexpression elimination |
519 // If we have | 519 // If we have |
520 // t1 = op b c | 520 // t1 = op b c |
521 // t2 = op b c | 521 // t2 = op b c |
522 // This pass will replace future references to t2 in a basic block by t1 | 522 // This pass will replace future references to t2 in a basic block by t1 |
523 // Points to note: | 523 // Points to note: |
524 // 1. Does not assume SSA, but not tested on non-SSA input yet as it is run | 524 // 1. Assumes SSA by default. To change this, use -lcse=no-ssa |
525 // at the beginning. | 525 // This is needed if this pass is moved to a point later in the pipeline. |
526 // If variables have a single definition (in the node), CSE can work just | |
527 // on the basis of an equality compare on instructions (sans Dest). When | |
528 // variables can be updated (hence, non-SSA) the result of a previous | |
529 // instruction which used that variable as an operand can not be reused. | |
526 // 2. Leaves removal of instructions to DCE. | 530 // 2. Leaves removal of instructions to DCE. |
527 // 3. Only enabled on arithmetic instructions. pnacl-clang (-O2) is expected | 531 // 3. Only enabled on arithmetic instructions. pnacl-clang (-O2) is expected |
528 // to take care of cases not arising from GEP simplification. | 532 // to take care of cases not arising from GEP simplification. |
529 // 4. By default, two passes are made over each basic block. Control this | 533 // 4. By default, a single pass is made over each basic block. Control this |
530 // with -lcse-max-iters=N | 534 // with -lcse-max-iters=N |
531 | 535 |
532 TimerMarker T(TimerStack::TT_localCse, this); | 536 TimerMarker T(TimerStack::TT_localCse, this); |
533 struct VariableHash { | 537 struct VariableHash { |
534 size_t operator()(const Variable *Var) const { return Var->hashValue(); } | 538 size_t operator()(const Variable *Var) const { return Var->hashValue(); } |
535 }; | 539 }; |
536 | 540 |
537 struct InstHash { | 541 struct InstHash { |
538 size_t operator()(const Inst *Instr) const { | 542 size_t operator()(const Inst *Instr) const { |
539 auto Kind = Instr->getKind(); | 543 auto Kind = Instr->getKind(); |
(...skipping 28 matching lines...) Expand all Loading... | |
568 for (SizeT i = 0; i < InstrA->getSrcSize(); ++i) { | 572 for (SizeT i = 0; i < InstrA->getSrcSize(); ++i) { |
569 if (!srcEq(InstrA->getSrc(i), InstrB->getSrc(i))) | 573 if (!srcEq(InstrA->getSrc(i), InstrB->getSrc(i))) |
570 return false; | 574 return false; |
571 } | 575 } |
572 return true; | 576 return true; |
573 } | 577 } |
574 }; | 578 }; |
575 | 579 |
576 for (CfgNode *Node : getNodes()) { | 580 for (CfgNode *Node : getNodes()) { |
577 CfgUnorderedSet<Inst *, InstHash, InstEq> Seen; | 581 CfgUnorderedSet<Inst *, InstHash, InstEq> Seen; |
582 // Stores currently available instructions. | |
578 | 583 |
579 CfgUnorderedMap<Variable *, Variable *, VariableHash> Replacements; | 584 CfgUnorderedMap<Variable *, Variable *, VariableHash> Replacements; |
580 // Combining the above two into a single data structure might consume less | 585 // Combining the above two into a single data structure might consume less |
581 // memory but will be slower i.e map of Instruction -> Set of Variables | 586 // memory but will be slower i.e map of Instruction -> Set of Variables |
582 | 587 |
583 CfgUnorderedMap<Variable *, std::vector<Inst *>, VariableHash> Dependency; | 588 CfgUnorderedMap<Variable *, std::vector<Inst *>, VariableHash> Dependency; |
584 // Not necessary for SSA, still keeping it in case this pass is not run at | 589 // Maps a variable to the Instructions that depend on it. |
585 // the beginning. Remove to improve performace. | 590 // a = op1 b c |
591 // x = op2 c d | |
592 // Will result in the map : b -> {a}, c -> {a, x}, d -> {x} | |
593 // Not necessary for SSA as dependencies will never be invalidated, and the | |
594 // container will use minimal memory when left unused. | |
586 | 595 |
587 int IterCount = getFlags().getLocalCseMaxIterations(); | 596 auto IterCount = getFlags().getLocalCseMaxIterations(); |
588 | 597 |
589 while (IterCount--) { | 598 for (SizeT i = 0; i < IterCount; ++i) { |
Jim Stichnoth
2016/08/01 20:25:36
I would declare this as uint32_t instead of SizeT,
| |
590 // TODO : Stats on IterCount -> performance | 599 // TODO(manasijm): Stats on IterCount -> performance |
591 for (Inst &Instr : Node->getInsts()) { | 600 for (Inst &Instr : Node->getInsts()) { |
592 if (Instr.isDeleted() || !llvm::isa<InstArithmetic>(&Instr)) | 601 if (Instr.isDeleted() || !llvm::isa<InstArithmetic>(&Instr)) |
593 continue; | 602 continue; |
603 if (!AssumeSSA) { | |
604 // Invalidate replacements | |
605 auto Iter = Replacements.find(Instr.getDest()); | |
606 if (Iter != Replacements.end()) { | |
607 Replacements.erase(Iter); | |
608 } | |
594 | 609 |
595 // Invalidate replacements | 610 // Invalidate 'seen' instructions whose operands were just updated. |
596 auto Iter = Replacements.find(Instr.getDest()); | 611 auto DepIter = Dependency.find(Instr.getDest()); |
597 if (Iter != Replacements.end()) { | 612 if (DepIter != Dependency.end()) { |
598 Replacements.erase(Iter); | 613 for (auto *DepInst : DepIter->second) { |
614 Seen.erase(DepInst); | |
615 } | |
616 } | |
599 } | 617 } |
600 | 618 |
601 // Invalidate 'seen' instructions whose operands were just updated. | |
602 auto DepIter = Dependency.find(Instr.getDest()); | |
603 if (DepIter != Dependency.end()) { | |
604 for (auto DepInst : DepIter->second) { | |
605 Seen.erase(DepInst); | |
606 } | |
607 } | |
608 // The above two can be removed if SSA is assumed. | |
609 | |
610 // Replace - doing this before checking for repetitions might enable | 619 // Replace - doing this before checking for repetitions might enable |
611 // more | 620 // more optimizations |
612 // optimizations | |
613 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { | 621 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { |
614 auto *Opnd = Instr.getSrc(i); | 622 auto *Opnd = Instr.getSrc(i); |
615 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { | 623 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { |
616 if (Replacements.find(Var) != Replacements.end()) { | 624 if (Replacements.find(Var) != Replacements.end()) { |
617 Instr.replaceSource(i, Replacements[Var]); | 625 Instr.replaceSource(i, Replacements[Var]); |
618 } | 626 } |
619 } | 627 } |
620 } | 628 } |
621 | 629 |
622 // Check for repetitions | 630 // Check for repetitions |
623 auto SeenIter = Seen.find(&Instr); | 631 auto SeenIter = Seen.find(&Instr); |
624 if (SeenIter != Seen.end()) { // seen before | 632 if (SeenIter != Seen.end()) { // seen before |
625 const Inst *Found = *SeenIter; | 633 const Inst *Found = *SeenIter; |
626 Replacements[Instr.getDest()] = Found->getDest(); | 634 Replacements[Instr.getDest()] = Found->getDest(); |
627 } else { // new | 635 } else { // new |
628 Seen.insert(&Instr); | 636 Seen.insert(&Instr); |
629 | 637 |
630 // Update dependencies | 638 if (!AssumeSSA) { |
631 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { | 639 // Update dependencies |
632 auto *Opnd = Instr.getSrc(i); | 640 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { |
633 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { | 641 auto *Opnd = Instr.getSrc(i); |
634 Dependency[Var].push_back(&Instr); | 642 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { |
643 Dependency[Var].push_back(&Instr); | |
644 } | |
635 } | 645 } |
636 } | 646 } |
637 } | 647 } |
638 } | 648 } |
639 } | 649 } |
640 } | 650 } |
641 } | 651 } |
642 | 652 |
643 void Cfg::loopInvariantCodeMotion() { | 653 void Cfg::loopInvariantCodeMotion() { |
644 TimerMarker T(TimerStack::TT_loopInvariantCodeMotion, this); | 654 TimerMarker T(TimerStack::TT_loopInvariantCodeMotion, this); |
(...skipping 1122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1767 } | 1777 } |
1768 } | 1778 } |
1769 // Print each basic block | 1779 // Print each basic block |
1770 for (CfgNode *Node : Nodes) | 1780 for (CfgNode *Node : Nodes) |
1771 Node->dump(this); | 1781 Node->dump(this); |
1772 if (isVerbose(IceV_Instructions)) | 1782 if (isVerbose(IceV_Instructions)) |
1773 Str << "}\n"; | 1783 Str << "}\n"; |
1774 } | 1784 } |
1775 | 1785 |
1776 } // end of namespace Ice | 1786 } // end of namespace Ice |
OLD | NEW |