OLD | NEW |
---|---|
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// | 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 489 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
500 for (CfgNode *Node : reverse_range(ReversedReachable)) | 500 for (CfgNode *Node : reverse_range(ReversedReachable)) |
501 Shuffled.push_back(Node); | 501 Shuffled.push_back(Node); |
502 for (CfgNode *Node : Unreachable) | 502 for (CfgNode *Node : Unreachable) |
503 Shuffled.push_back(Node); | 503 Shuffled.push_back(Node); |
504 assert(Nodes.size() == Shuffled.size()); | 504 assert(Nodes.size() == Shuffled.size()); |
505 swapNodes(Shuffled); | 505 swapNodes(Shuffled); |
506 | 506 |
507 dump("After basic block shuffling"); | 507 dump("After basic block shuffling"); |
508 } | 508 } |
509 | 509 |
510 void Cfg::localCSE() { | |
511 // Performs basic-block local common-subexpression elimination | |
512 // If we have | |
513 // t1 = op b c | |
514 // t2 = op b c | |
515 // This pass will replace future references to t2 in a basic block by t1 | |
516 // Points to note: | |
517 // 1. Does not assume SSA, but not tested on non-SSA input yet as it is run | |
518 // at the beginning. | |
519 // 2. Leaves removal of instructions to DCE. | |
520 // 3. Only enabled on arithmetic instructions. pnacl-clang (-O2) is expected | |
521 // to take care of cases not arising from GEP simplification. | |
522 // 4. By default, two passes are made over each basic block. Control this | |
523 // with -lcse-max-iters=N | |
524 | |
525 TimerMarker T(TimerStack::TT_localCse, this); | |
526 struct VariableHash { | |
527 SizeT operator()(const Variable *Var) const { return Var->hashValue(); } | |
Jim Stichnoth
2016/05/24 04:58:11
For these hash functions, I think it's more standa
manasijm
2016/05/24 16:54:21
Done.
| |
528 }; | |
529 | |
530 struct InstHash { | |
531 SizeT operator()(const Inst *Instr) const { | |
532 auto Kind = Instr->getKind(); | |
533 auto Result = | |
534 std::hash<typename std::underlying_type<Inst::InstKind>::type>()( | |
535 Kind); | |
536 for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { | |
537 Result ^= Instr->getSrc(i)->hashValue(); | |
538 } | |
539 return Result; | |
540 } | |
541 }; | |
542 struct InstEq { | |
543 bool srcEq(const Operand *A, const Operand *B) const { | |
544 if (llvm::isa<Variable>(A) || llvm::isa<Constant>(A)) | |
545 return (A == B); | |
546 return false; | |
547 } | |
548 bool operator()(const Inst *InstrA, const Inst *InstrB) const { | |
549 if ((InstrA->getKind() != InstrB->getKind()) || | |
550 (InstrA->getSrcSize() != InstrB->getSrcSize())) | |
551 return false; | |
552 | |
553 if (auto *A = llvm::dyn_cast<InstArithmetic>(InstrA)) { | |
554 auto *B = llvm::cast<InstArithmetic>(InstrB); | |
555 // A, B are guaranteed to be of the same 'kind' at this point | |
556 // So, dyn_cast is not needed | |
557 if (A->getOp() != B->getOp()) | |
558 return false; | |
559 } | |
560 // Does not enter loop if different kind or number of operands | |
561 for (SizeT i = 0; i < InstrA->getSrcSize(); ++i) { | |
562 if (!srcEq(InstrA->getSrc(i), InstrB->getSrc(i))) | |
563 return false; | |
564 } | |
565 return true; | |
566 } | |
567 }; | |
568 | |
569 for (CfgNode *Node : getNodes()) { | |
570 CfgUnorderedSet<Inst *, InstHash, InstEq> Seen; | |
571 | |
572 CfgUnorderedMap<Variable *, Variable *, VariableHash> Replacements; | |
573 // Combining the above two into a single data structure might consume less | |
574 // memory but will be slower i.e map of Instruction -> Set of Variables | |
575 | |
576 CfgUnorderedMap<Variable *, std::vector<Inst *>, VariableHash> Dependency; | |
577 // Not necessary for SSA, still keeping it in case this pass is not run at | |
578 // the beginning. Remove to improve performace. | |
579 | |
580 int IterCount = getFlags().getLocalCseMaxIterations(); | |
581 | |
582 while (IterCount--) { | |
583 // TODO : Stats on IterCount -> performance | |
584 for (Inst &Instr : Node->getInsts()) { | |
585 if (Instr.isDeleted() || !llvm::isa<InstArithmetic>(&Instr)) | |
586 continue; | |
587 | |
588 // Invalidate replacements | |
589 auto Iter = Replacements.find(Instr.getDest()); | |
590 if (Iter != Replacements.end()) { | |
591 Replacements.erase(Iter); | |
592 } | |
593 | |
594 // Invalidate 'seen' instructions whose operands were just updated. | |
595 auto DepIter = Dependency.find(Instr.getDest()); | |
596 if (DepIter != Dependency.end()) { | |
597 for (auto DepInst : DepIter->second) { | |
598 Seen.erase(DepInst); | |
599 } | |
600 } | |
601 // The above two can be removed if SSA is assumed. | |
602 | |
603 // Replace - doing this before checking for repetitions might enable | |
604 // more | |
605 // optimizations | |
606 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { | |
607 auto *Opnd = Instr.getSrc(i); | |
608 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { | |
609 if (Replacements.find(Var) != Replacements.end()) { | |
610 Instr.replaceSource(i, Replacements[Var]); | |
611 } | |
612 } | |
613 } | |
614 | |
615 // Check for repetitions | |
616 auto SeenIter = Seen.find(&Instr); | |
617 if (SeenIter != Seen.end()) { // seen before | |
618 const Inst *Found = *SeenIter; | |
619 Replacements[Instr.getDest()] = Found->getDest(); | |
620 } else { // new | |
621 Seen.insert(&Instr); | |
622 | |
623 // Update dependencies | |
624 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { | |
625 auto *Opnd = Instr.getSrc(i); | |
626 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { | |
627 Dependency[Var].push_back(&Instr); | |
628 } | |
629 } | |
630 } | |
631 } | |
632 } | |
633 } | |
634 } | |
635 | |
510 void Cfg::doArgLowering() { | 636 void Cfg::doArgLowering() { |
511 TimerMarker T(TimerStack::TT_doArgLowering, this); | 637 TimerMarker T(TimerStack::TT_doArgLowering, this); |
512 getTarget()->lowerArguments(); | 638 getTarget()->lowerArguments(); |
513 } | 639 } |
514 | 640 |
515 void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas, | 641 void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas, |
516 uint32_t CombinedAlignment, InstList &Insts, | 642 uint32_t CombinedAlignment, InstList &Insts, |
517 AllocaBaseVariableType BaseVariableType) { | 643 AllocaBaseVariableType BaseVariableType) { |
518 if (Allocas.empty()) | 644 if (Allocas.empty()) |
519 return; | 645 return; |
(...skipping 958 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1478 } | 1604 } |
1479 } | 1605 } |
1480 // Print each basic block | 1606 // Print each basic block |
1481 for (CfgNode *Node : Nodes) | 1607 for (CfgNode *Node : Nodes) |
1482 Node->dump(this); | 1608 Node->dump(this); |
1483 if (isVerbose(IceV_Instructions)) | 1609 if (isVerbose(IceV_Instructions)) |
1484 Str << "}\n"; | 1610 Str << "}\n"; |
1485 } | 1611 } |
1486 | 1612 |
1487 } // end of namespace Ice | 1613 } // end of namespace Ice |
OLD | NEW |