OLD | NEW |
1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// | 1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 489 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
500 for (CfgNode *Node : reverse_range(ReversedReachable)) | 500 for (CfgNode *Node : reverse_range(ReversedReachable)) |
501 Shuffled.push_back(Node); | 501 Shuffled.push_back(Node); |
502 for (CfgNode *Node : Unreachable) | 502 for (CfgNode *Node : Unreachable) |
503 Shuffled.push_back(Node); | 503 Shuffled.push_back(Node); |
504 assert(Nodes.size() == Shuffled.size()); | 504 assert(Nodes.size() == Shuffled.size()); |
505 swapNodes(Shuffled); | 505 swapNodes(Shuffled); |
506 | 506 |
507 dump("After basic block shuffling"); | 507 dump("After basic block shuffling"); |
508 } | 508 } |
509 | 509 |
| 510 void Cfg::localCSE() { |
| 511 // Performs basic-block local common-subexpression elimination |
| 512 // If we have |
| 513 // t1 = op b c |
| 514 // t2 = op b c |
| 515 // This pass will replace future references to t2 in a basic block by t1 |
| 516 // Points to note: |
| 517 // 1. Does not assume SSA, but not tested on non-SSA input yet as it is run |
| 518 // at the beginning. |
| 519 // 2. Leaves removal of instructions to DCE. |
| 520 // 3. Only enabled on arithmetic instructions. pnacl-clang (-O2) is expected |
| 521 // to take care of cases not arising from GEP simplification. |
| 522 // 4. By default, two passes are made over each basic block. Control this |
| 523 // with -lcse-max-iters=N |
| 524 |
| 525 TimerMarker T(TimerStack::TT_localCse, this); |
| 526 struct VariableHash { |
| 527 size_t operator()(const Variable *Var) const { return Var->hashValue(); } |
| 528 }; |
| 529 |
| 530 struct InstHash { |
| 531 size_t operator()(const Inst *Instr) const { |
| 532 auto Kind = Instr->getKind(); |
| 533 auto Result = |
| 534 std::hash<typename std::underlying_type<Inst::InstKind>::type>()( |
| 535 Kind); |
| 536 for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { |
| 537 Result ^= Instr->getSrc(i)->hashValue(); |
| 538 } |
| 539 return Result; |
| 540 } |
| 541 }; |
| 542 struct InstEq { |
| 543 bool srcEq(const Operand *A, const Operand *B) const { |
| 544 if (llvm::isa<Variable>(A) || llvm::isa<Constant>(A)) |
| 545 return (A == B); |
| 546 return false; |
| 547 } |
| 548 bool operator()(const Inst *InstrA, const Inst *InstrB) const { |
| 549 if ((InstrA->getKind() != InstrB->getKind()) || |
| 550 (InstrA->getSrcSize() != InstrB->getSrcSize())) |
| 551 return false; |
| 552 |
| 553 if (auto *A = llvm::dyn_cast<InstArithmetic>(InstrA)) { |
| 554 auto *B = llvm::cast<InstArithmetic>(InstrB); |
| 555 // A, B are guaranteed to be of the same 'kind' at this point |
| 556 // So, dyn_cast is not needed |
| 557 if (A->getOp() != B->getOp()) |
| 558 return false; |
| 559 } |
| 560 // Does not enter loop if different kind or number of operands |
| 561 for (SizeT i = 0; i < InstrA->getSrcSize(); ++i) { |
| 562 if (!srcEq(InstrA->getSrc(i), InstrB->getSrc(i))) |
| 563 return false; |
| 564 } |
| 565 return true; |
| 566 } |
| 567 }; |
| 568 |
| 569 for (CfgNode *Node : getNodes()) { |
| 570 CfgUnorderedSet<Inst *, InstHash, InstEq> Seen; |
| 571 |
| 572 CfgUnorderedMap<Variable *, Variable *, VariableHash> Replacements; |
| 573 // Combining the above two into a single data structure might consume less |
| 574 // memory but will be slower i.e map of Instruction -> Set of Variables |
| 575 |
| 576 CfgUnorderedMap<Variable *, std::vector<Inst *>, VariableHash> Dependency; |
| 577 // Not necessary for SSA, still keeping it in case this pass is not run at |
| 578 // the beginning. Remove to improve performace. |
| 579 |
| 580 int IterCount = getFlags().getLocalCseMaxIterations(); |
| 581 |
| 582 while (IterCount--) { |
| 583 // TODO : Stats on IterCount -> performance |
| 584 for (Inst &Instr : Node->getInsts()) { |
| 585 if (Instr.isDeleted() || !llvm::isa<InstArithmetic>(&Instr)) |
| 586 continue; |
| 587 |
| 588 // Invalidate replacements |
| 589 auto Iter = Replacements.find(Instr.getDest()); |
| 590 if (Iter != Replacements.end()) { |
| 591 Replacements.erase(Iter); |
| 592 } |
| 593 |
| 594 // Invalidate 'seen' instructions whose operands were just updated. |
| 595 auto DepIter = Dependency.find(Instr.getDest()); |
| 596 if (DepIter != Dependency.end()) { |
| 597 for (auto DepInst : DepIter->second) { |
| 598 Seen.erase(DepInst); |
| 599 } |
| 600 } |
| 601 // The above two can be removed if SSA is assumed. |
| 602 |
| 603 // Replace - doing this before checking for repetitions might enable |
| 604 // more |
| 605 // optimizations |
| 606 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { |
| 607 auto *Opnd = Instr.getSrc(i); |
| 608 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { |
| 609 if (Replacements.find(Var) != Replacements.end()) { |
| 610 Instr.replaceSource(i, Replacements[Var]); |
| 611 } |
| 612 } |
| 613 } |
| 614 |
| 615 // Check for repetitions |
| 616 auto SeenIter = Seen.find(&Instr); |
| 617 if (SeenIter != Seen.end()) { // seen before |
| 618 const Inst *Found = *SeenIter; |
| 619 Replacements[Instr.getDest()] = Found->getDest(); |
| 620 } else { // new |
| 621 Seen.insert(&Instr); |
| 622 |
| 623 // Update dependencies |
| 624 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { |
| 625 auto *Opnd = Instr.getSrc(i); |
| 626 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) { |
| 627 Dependency[Var].push_back(&Instr); |
| 628 } |
| 629 } |
| 630 } |
| 631 } |
| 632 } |
| 633 } |
| 634 } |
| 635 |
510 void Cfg::doArgLowering() { | 636 void Cfg::doArgLowering() { |
511 TimerMarker T(TimerStack::TT_doArgLowering, this); | 637 TimerMarker T(TimerStack::TT_doArgLowering, this); |
512 getTarget()->lowerArguments(); | 638 getTarget()->lowerArguments(); |
513 } | 639 } |
514 | 640 |
515 void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas, | 641 void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas, |
516 uint32_t CombinedAlignment, InstList &Insts, | 642 uint32_t CombinedAlignment, InstList &Insts, |
517 AllocaBaseVariableType BaseVariableType) { | 643 AllocaBaseVariableType BaseVariableType) { |
518 if (Allocas.empty()) | 644 if (Allocas.empty()) |
519 return; | 645 return; |
(...skipping 958 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1478 } | 1604 } |
1479 } | 1605 } |
1480 // Print each basic block | 1606 // Print each basic block |
1481 for (CfgNode *Node : Nodes) | 1607 for (CfgNode *Node : Nodes) |
1482 Node->dump(this); | 1608 Node->dump(this); |
1483 if (isVerbose(IceV_Instructions)) | 1609 if (isVerbose(IceV_Instructions)) |
1484 Str << "}\n"; | 1610 Str << "}\n"; |
1485 } | 1611 } |
1486 | 1612 |
1487 } // end of namespace Ice | 1613 } // end of namespace Ice |
OLD | NEW |