Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(170)

Side by Side Diff: tools/pnacl-bccompress/pnacl-bccompress.cpp

Issue 154603002: Make pnacl-bccompress add abbreviations for obvious constants. (Closed) Base URL: http://git.chromium.org/native_client/pnacl-llvm.git@master
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/Bitcode/NaCl/Reader/NaClBitcodeDist.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===-- pnacl-bccompress.cpp - Bitcode (abbrev) compression ---------------===// 1 //===-- pnacl-bccompress.cpp - Bitcode (abbrev) compression ---------------===//
2 // 2 //
3 // The LLVM Compiler Infrastructure 3 // The LLVM Compiler Infrastructure
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This tool may be invoked in the following manner: 10 // This tool may be invoked in the following manner:
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
93 cl::desc("Show collected value distributions in bitcode records."), 93 cl::desc("Show collected value distributions in bitcode records."),
94 cl::init(false)); 94 cl::init(false));
95 95
96 /// Error - All bitcode analysis errors go through this function, 96 /// Error - All bitcode analysis errors go through this function,
97 /// making this a good place to breakpoint if debugging. 97 /// making this a good place to breakpoint if debugging.
98 static bool Error(const std::string &Err) { 98 static bool Error(const std::string &Err) {
99 errs() << Err << "\n"; 99 errs() << Err << "\n";
100 return true; 100 return true;
101 } 101 }
102 102
103 // For debugging. Prints out the abbreviation in readable form to errs(). 103 // Prints out the abbreviation in a readable form to the given stream.
jvoung (off chromium) 2014/02/05 00:24:16 document return bool value?
Karl 2014/02/06 19:46:28 Removed the need for this by adding method NumArgu
104 static void PrintAbbrev(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { 104 static bool PrintAbbrevOp(raw_ostream& Stream,
105 errs() << "Abbrev(block " << BlockID << "): ["; 105 const NaClBitCodeAbbrevOp &Op) {
106 // ContinuationCount>0 implies that the current operand is a 106 if (Op.isLiteral()) {
107 Stream << Op.getLiteralValue();
108 } else if (Op.isEncoding()) {
109 switch (Op.getEncoding()) {
110 case NaClBitCodeAbbrevOp::Fixed:
111 Stream << "Fixed(" << Op.getEncodingData() << ")";
112 break;
113 case NaClBitCodeAbbrevOp::VBR:
114 Stream << "VBR(" << Op.getEncodingData() << ")";
115 break;
116 case NaClBitCodeAbbrevOp::Array:
117 Stream << "Array:";
118 return true;
119 case NaClBitCodeAbbrevOp::Char6:
120 Stream << "Char6";
121 break;
122 case NaClBitCodeAbbrevOp::Blob:
123 Stream << "Blob";
124 break;
125 default:
126 Stream << "??";
127 break;
128 }
129 } else {
130 Stream << "??";
131 }
132 return false;
133 }
134
135 // Prints out the abbreviation in readable form to the given stream.
136 static void PrintAbbrev(raw_ostream &Stream, const NaClBitCodeAbbrev *Abbrev) {
137 Stream << "[";
138 // ContinuationCount > 0 implies that the current operand is a
107 // continuation of previous operand(s). 139 // continuation of previous operand(s).
108 unsigned ContinuationCount = 0; 140 unsigned ContinuationCount = 0;
109 for (unsigned i = 0; i < Abbrev->getNumOperandInfos(); ++i) { 141 for (unsigned i = 0; i < Abbrev->getNumOperandInfos(); ++i) {
110 if (i > 0 && ContinuationCount == 0) errs() << ", "; 142 if (i > 0 && ContinuationCount == 0) Stream << ", ";
111 const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(i); 143 const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(i);
112 if (Op.isLiteral()) { 144 if (PrintAbbrevOp(Stream, Op)) {
113 errs() << Op.getLiteralValue(); 145 ++ContinuationCount;
114 } else if (Op.isEncoding()) { 146 continue;
115 switch (Op.getEncoding()) {
116 case NaClBitCodeAbbrevOp::Fixed:
117 errs() << "Fixed(" << Op.getEncodingData() << ")";
118 break;
119 case NaClBitCodeAbbrevOp::VBR:
120 errs() << "VBR(" << Op.getEncodingData() << ")";
121 break;
122 case NaClBitCodeAbbrevOp::Array:
123 errs() << "Array:";
124 ++ContinuationCount;
125 continue;
126 case NaClBitCodeAbbrevOp::Char6:
127 errs() << "Char6";
128 break;
129 case NaClBitCodeAbbrevOp::Blob:
130 errs() << "Blob";
131 break;
132 default:
133 errs() << "??";
134 break;
135 }
136 } else {
137 errs() << "??";
138 } 147 }
139 if (ContinuationCount) --ContinuationCount; 148 if (ContinuationCount) --ContinuationCount;
140 } 149 }
141 errs() << "]\n"; 150 Stream << "]\n";
151 }
152
153 // Prints out the abbreviation in readable form to the given Stream.
154 static void PrintAbbrev(raw_ostream &Stream,
155 unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
156 Stream << "Abbrev(block " << BlockID << "): ";
157 PrintAbbrev(Stream, Abbrev);
142 } 158 }
143 159
144 // Reads the input file into the given buffer. 160 // Reads the input file into the given buffer.
145 static bool ReadAndBuffer(OwningPtr<MemoryBuffer> &MemBuf) { 161 static bool ReadAndBuffer(OwningPtr<MemoryBuffer> &MemBuf) {
146 if (error_code ec = 162 if (error_code ec =
147 MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), MemBuf)) { 163 MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), MemBuf)) {
148 return Error("Error reading '" + InputFilename + "': " + ec.message()); 164 return Error("Error reading '" + InputFilename + "': " + ec.message());
149 } 165 }
150 166
151 if (MemBuf->getBufferSize() % 4 != 0) 167 if (MemBuf->getBufferSize() % 4 != 0)
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
208 public: 224 public:
209 // Vector to hold the (ordered) list of abbreviations. 225 // Vector to hold the (ordered) list of abbreviations.
210 typedef SmallVector<NaClBitCodeAbbrev*, 32> AbbrevVector; 226 typedef SmallVector<NaClBitCodeAbbrev*, 32> AbbrevVector;
211 227
212 BlockAbbrevs(unsigned BlockID) 228 BlockAbbrevs(unsigned BlockID)
213 : BlockID(BlockID) { 229 : BlockID(BlockID) {
214 // backfill internal indices that don't correspond to bitstream 230 // backfill internal indices that don't correspond to bitstream
215 // application abbreviations, so that added abbreviations have 231 // application abbreviations, so that added abbreviations have
216 // valid abbreviation indices. 232 // valid abbreviation indices.
217 for (unsigned i = 0; i < naclbitc::FIRST_APPLICATION_ABBREV; ++i) { 233 for (unsigned i = 0; i < naclbitc::FIRST_APPLICATION_ABBREV; ++i) {
218 Abbrevs.push_back(new NaClBitCodeAbbrev()); 234 // Make all non-application abbreviations look like the default
235 // abbreviation.
jvoung (off chromium) 2014/02/05 00:24:16 what goes wrong if the abbrev isn't like the defau
Karl 2014/02/06 19:46:28 The main reason for doing this is so that we don't
236 NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();
237 Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
238 Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
239 Abbrevs.push_back(Abbrev);
219 } 240 }
220 GlobalAbbrevBitstreamToInternalMap. 241 GlobalAbbrevBitstreamToInternalMap.
221 SetNextBitstreamAbbrevIndex(Abbrevs.size()); 242 SetNextBitstreamAbbrevIndex(Abbrevs.size());
222 } 243 }
223 244
224 ~BlockAbbrevs() { 245 ~BlockAbbrevs() {
225 for (AbbrevVector::const_iterator 246 for (AbbrevVector::const_iterator
226 Iter = Abbrevs.begin(), IterEnd = Abbrevs.end(); 247 Iter = Abbrevs.begin(), IterEnd = Abbrevs.end();
227 Iter != IterEnd; ++Iter) { 248 Iter != IterEnd; ++Iter) {
228 (*Iter)->dropRef(); 249 (*Iter)->dropRef();
(...skipping 27 matching lines...) Expand all
256 Abbrev->dropRef(); 277 Abbrev->dropRef();
257 return false; 278 return false;
258 } 279 }
259 280
260 // New abbreviation. Add. 281 // New abbreviation. Add.
261 Index = Abbrevs.size(); 282 Index = Abbrevs.size();
262 Abbrevs.push_back(Abbrev); 283 Abbrevs.push_back(Abbrev);
263 return true; 284 return true;
264 } 285 }
265 286
287 /// Adds the given abbreviation to the set of global abbreviations
288 /// defined for the block. Guarantees that duplicate abbreviations
289 /// are not added to the block. Note: Code takes ownership of
290 /// the given abbreviation. Returns true if new abbreviation.
291 /// Updates Index to the index where the abbreviation is located
jvoung (off chromium) 2014/02/05 00:24:16 The documentation for "Updates Index" doesn't seem
Karl 2014/02/06 19:46:28 Sorry, I really should be more careful when cuttin
292 /// in the set of abbreviations.
293 bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev) {
294 int Index;
295 return AddAbbreviation(Abbrev, Index);
296 }
297
266 /// The block ID associated with the block. 298 /// The block ID associated with the block.
267 unsigned GetBlockID() const { 299 unsigned GetBlockID() const {
268 return BlockID; 300 return BlockID;
269 } 301 }
270 302
271 /// Returns the index of the frist application abbreviation for the 303 /// Returns the index of the frist application abbreviation for the
272 /// block. 304 /// block.
273 unsigned GetFirstApplicationAbbreviation() const { 305 unsigned GetFirstApplicationAbbreviation() const {
274 return naclbitc::FIRST_APPLICATION_ABBREV; 306 return naclbitc::FIRST_APPLICATION_ABBREV;
275 } 307 }
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
478 // Adds the abbreviation to the list of abbreviations for the given 510 // Adds the abbreviation to the list of abbreviations for the given
479 // block. 511 // block.
480 void AddAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev, 512 void AddAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev,
481 int &Index) { 513 int &Index) {
482 // Get block abbreviations. 514 // Get block abbreviations.
483 BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(BlockID); 515 BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(BlockID);
484 516
485 // Read abbreviation and add as a global abbreviation. 517 // Read abbreviation and add as a global abbreviation.
486 if (Abbrevs->AddAbbreviation(Abbrev, Index) 518 if (Abbrevs->AddAbbreviation(Abbrev, Index)
487 && TraceGeneratedAbbreviations) { 519 && TraceGeneratedAbbreviations) {
488 PrintAbbrev(BlockID, Abbrev); 520 PrintAbbrev(errs(), BlockID, Abbrev);
489 } 521 }
490 } 522 }
491 523
492 /// Finds the index to the corresponding internal block abbrevation 524 /// Finds the index to the corresponding internal block abbrevation
jvoung (off chromium) 2014/02/05 00:24:16 abb
Karl 2014/02/06 19:46:28 Fixed spelling of abbreviation.
493 /// for the given abbreviation. 525 /// for the given abbreviation.
494 int FindAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { 526 int FindAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
495 return GetGlobalAbbrevs(BlockID)->FindAbbreviation(Abbrev); 527 return GetGlobalAbbrevs(BlockID)->FindAbbreviation(Abbrev);
496 } 528 }
497 529
498 void Init() { 530 void Init() {
499 GlobalBlockAbbrevs = GetGlobalAbbrevs(GetBlockID()); 531 GlobalBlockAbbrevs = GetGlobalAbbrevs(GetBlockID());
500 LocalAbbrevBitstreamToInternalMap.SetNextBitstreamAbbrevIndex( 532 LocalAbbrevBitstreamToInternalMap.SetNextBitstreamAbbrevIndex(
501 GlobalBlockAbbrevs-> 533 GlobalBlockAbbrevs->
502 GetGlobalAbbrevBitstreamToInternalMap().GetNextBitstreamAbbrevIndex()); 534 GetGlobalAbbrevBitstreamToInternalMap().GetNextBitstreamAbbrevIndex());
503 } 535 }
504 }; 536 };
505 537
506 bool NaClAnalyzeParser::ParseBlock(unsigned BlockID) { 538 bool NaClAnalyzeParser::ParseBlock(unsigned BlockID) {
507 NaClBlockAnalyzeParser Parser(BlockID, this); 539 NaClBlockAnalyzeParser Parser(BlockID, this);
508 return Parser.ParseThisBlock(); 540 return Parser.ParseThisBlock();
509 } 541 }
510 542
543 // Models the unrolling of an abbreviation into its sequence of
544 // individual operators. That is, unrolling arrays to match the width
545 // of the abbreviation. Note: We unroll in the form that best matches
546 // the distribution map. Hence, the code is stored as a separate
547 // operator. We also keep the array abbreviation op, for untracked
548 // elements within the distribution maps.
549 class UnrolledAbbreviation {
550 public:
551 /// Unroll the given abbreviation, assuming it has the given size
552 /// (as specified in the distribution maps).
553 UnrolledAbbreviation(NaClBitCodeAbbrev *Abbrev, unsigned Size)
554 : CodeOp(0),
555 HasMore(Size >= NaClValueIndexCutoff) {
556 unsigned NextOp = 0;
557 UnrollAbbrevOp(CodeOp, Abbrev, NextOp);
558 --Size;
559 for (unsigned i = 0; i < Size; ++i) {
560 // Create slot and then fill with appropriate operator.
561 AbbrevOps.push_back(CodeOp);
562 UnrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp);
563 }
564 }
565
566 /// Prints out the abbreviation modeled by the unrolled
567 /// abbreviation.
568 void Print(raw_ostream &Stream) const {
569 Stream << "[";
570 PrintAbbrevOp(Stream, CodeOp);
571 for (std::vector<NaClBitCodeAbbrevOp>::const_iterator
572 Iter = AbbrevOps.begin(), IterEnd = AbbrevOps.end();
573 Iter != IterEnd; ++Iter) {
574 Stream << ", ";
575 PrintAbbrevOp(Stream, *Iter);
576 }
577 if (HasMore) {
578 Stream << ", ";
579 PrintAbbrevOp(Stream, ArrayOp);
580 Stream << ":";
581 PrintAbbrevOp(Stream, VbrOp);
582 }
583 Stream << "]\n";
584 }
585
586 /// Converts the unrolled abbreviation back into a regular
587 /// abbreviation.
588 NaClBitCodeAbbrev *roll() const {
589 NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();
590 std::vector<NaClBitCodeAbbrevOp> Ops(AbbrevOps);
591
592 // If it ends with an array, move back over elements that
593 // don't make a difference.
594 if (HasMore) {
595 while (!Ops.empty() && Ops.back() == VbrOp) {
596 Ops.pop_back();
597 }
598 if (Ops.empty() && CodeOp == VbrOp) {
599 // Just use array.
600 Abbrev->Add(ArrayOp);
601 Abbrev->Add(VbrOp);
602 return Abbrev;
603 }
604 }
605
606 // Now add remaining abbreviation operators.
607 Abbrev->Add(CodeOp);
608 for (std::vector<NaClBitCodeAbbrevOp>::const_iterator
609 Iter = Ops.begin(), IterEnd = Ops.end();
610 Iter != IterEnd; ++Iter) {
611 Abbrev->Add(*Iter);
612 }
613 if (HasMore) {
614 Abbrev->Add(ArrayOp);
615 Abbrev->Add(VbrOp);
616 }
617 return Abbrev;
618 }
619
620 // The abbreviation used for the record code.
621 NaClBitCodeAbbrevOp CodeOp;
622
623 // The abbreviations used for each tracked value index.
624 std::vector<NaClBitCodeAbbrevOp> AbbrevOps;
625
626 private:
627
628 // True if the distribution map has untracked values in
629 // the distribution map.
630 bool HasMore;
631
632 // The symbol abbreviation operators to add to capture untracked
633 // values in the distribution map.
634 static NaClBitCodeAbbrevOp ArrayOp;
635 static NaClBitCodeAbbrevOp VbrOp;
636
637 // Extracts out the next abbreviation operator from the abbreviation
638 // Abbrev, given the index NextOp, and assigns it to AbbrevOp
639 void UnrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp,
640 NaClBitCodeAbbrev *Abbrev,
641 unsigned &NextOp) {
642 assert(NextOp < Abbrev->getNumOperandInfos());
643 const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(NextOp);
644 if (!Op.isLiteral() && Op.getEncoding() == NaClBitCodeAbbrevOp::Array) {
645 AbbrevOp = Abbrev->getOperandInfo(NextOp+1);
jvoung (off chromium) 2014/02/05 00:24:16 How come NextOp isn't bumped for Array?
Karl 2014/02/06 19:46:28 When array appears in an abbreviation expression,
jvoung (off chromium) 2014/02/06 23:19:36 Okay, so this is to peel out of the array during u
Karl 2014/02/07 19:37:29 Ok. I've updated the comment with an example that
646 } else {
647 AbbrevOp = Op;
648 NextOp++;
649 }
650 }
651 };
652
653 NaClBitCodeAbbrevOp UnrolledAbbreviation::ArrayOp(NaClBitCodeAbbrevOp::Array);
654 NaClBitCodeAbbrevOp UnrolledAbbreviation::VbrOp(NaClBitCodeAbbrevOp::VBR, 6);
655
656 /// Models a candidate block abbreviation, which is a blockID, the
657 /// corresponding abbreviation to be considered for addition, and the
658 /// number of records the abbreviation applies to. Note: Constructors
659 /// and assignment take ownership of the abbreviation.
660 class CandBlockAbbrev {
Karl 2014/02/06 19:46:28 Realized that putting NumInstances in this class w
661 public:
662 CandBlockAbbrev(unsigned NumInstances, unsigned BlockID,
663 NaClBitCodeAbbrev *Abbrev)
664 : NumInstances(NumInstances), BlockID(BlockID), Abbrev(Abbrev) {
665 }
666
667 explicit CandBlockAbbrev(const CandBlockAbbrev &BlockAbbrev)
668 : NumInstances(BlockAbbrev.NumInstances),
669 BlockID(BlockAbbrev.BlockID),
670 Abbrev(BlockAbbrev.Abbrev) {
671 Abbrev->addRef();
672 }
673
674 void operator=(const CandBlockAbbrev &BlockAbbrev) {
675 Abbrev->dropRef();
676 NumInstances = BlockAbbrev.NumInstances;
677 BlockID = BlockAbbrev.BlockID;
678 Abbrev = BlockAbbrev.Abbrev;
679 Abbrev->addRef();
680 }
681
682 ~CandBlockAbbrev() {
683 Abbrev->dropRef();
684 }
685
686 /// Returns the number of instances associated with the candidate
687 /// abbreviation.
688 unsigned GetNumInstances() const {
689 return NumInstances;
690 }
691
692 /// The block ID of the candidate abbreviation.
693 unsigned GetBlockID() const {
694 return BlockID;
695 }
696
697 /// The abbreviation of the candidate abbreviation.
698 const NaClBitCodeAbbrev *GetAbbrev() const {
699 return Abbrev;
700 }
701
702 /// orders this against the candidate abbreviation.
703 int Compare(const CandBlockAbbrev &CandAbbrev) const {
704 if (NumInstances < CandAbbrev.NumInstances) return -1;
705 if (NumInstances > CandAbbrev.NumInstances) return 1;
706 unsigned diff = BlockID - CandAbbrev.BlockID;
707 if (diff) return diff;
708 return Abbrev->Compare(*CandAbbrev.Abbrev);
709 }
710
711 /// Prints the candidate abbreviation to the given stream.
712 void Print(raw_ostream &Stream) const {
713 Stream << format("%12u", NumInstances) << ": ";
714 PrintAbbrev(Stream, BlockID, Abbrev);
715 }
716
717 private:
718 // The number of records that use this abbreviation.
719 unsigned NumInstances;
720 // The block the abbreviation applies to.
721 unsigned BlockID;
722 // The candidate abbreviation.
723 NaClBitCodeAbbrev *Abbrev;
724 };
725
726 static inline bool operator<(const CandBlockAbbrev &A1,
727 const CandBlockAbbrev &A2) {
728 return A1.Compare(A2);
jvoung (off chromium) 2014/02/05 00:24:16 shouldn't this check if Compare returns -1 or some
Karl 2014/02/06 19:46:28 My bad. Fixing.
729 }
730
731 /// Models the minimum number of instances for a candidate abbreviation
732 /// before we will even consider it a potential candidate abbrevation.
jvoung (off chromium) 2014/02/05 00:24:16 abb
Karl 2014/02/06 19:46:28 Done.
733 static unsigned MinNumInstancesForNewAbbrevs = 100;
734
735 /// Models the set of candidate abbreviations being considered.
736 /// Filters based on the number of instances of each candidate
737 /// abbrevaition. This filter is done to only keep candidates that
jvoung (off chromium) 2014/02/05 00:24:16 abbreviation
Karl 2014/02/06 19:46:28 Done.
738 /// most likely will reduce bitcode size.
739 class CandidateAbbrevs {
Karl 2014/02/06 19:46:28 Modified this class. In particular, modified that
740 public:
741 typedef std::set<CandBlockAbbrev> Set;
742 typedef Set::const_iterator const_iterator;
743
744 /// Creates an empty set of candidate abbreviations, to be
745 /// (potentially) added to the given set of block abbreviations.
746 /// Argument is the (global) block abbreviations map, which is
747 /// used to determine if it already exists.
748 CandidateAbbrevs(BlockAbbrevsMapType &BlockAbbrevsMap)
749 : MaxInstances(0), BlockAbbrevsMap(BlockAbbrevsMap)
750 {}
751
752 ~CandidateAbbrevs() {
753 Abbrevs.clear();
754 }
755
756 /// Adds the given (unrolled) abbreviation as a candidate
757 /// abbreviation to the given block. NumInstances is the number of
758 /// instances expected to use this candidate abbreviation. Returns
759 /// true if the corresponding candidate abbreviation is added to this
760 /// set of candidate abbreviations. Note: depending on the value
761 /// of NumInstances, methods GetMaxInstances and GetMinInstances
762 /// may change to include NumInstances, if the candidate abbreviation
763 /// is added.
764 bool Add(unsigned BlockID,
765 UnrolledAbbreviation &UnrolledAbbrev,
766 unsigned NumInstances);
767
768 /// Returns the upper range (of number instances) that is currently
769 /// associated with this set of candidate abbreviations.
770 unsigned GetMaxInstances() const {
771 return MaxInstances;
772 }
773
774 /// Returns the lower range (of number instances) that is currently
775 /// associated with this set of candidate abbreviations.
jvoung (off chromium) 2014/02/05 00:24:16 This to adjust the lower bound set by "MinNumInsta
Karl 2014/02/06 19:46:28 Yes. We set the maximum based on the number of bit
776 unsigned GetMinInstances() const {
777 if (MaxInstances <= 1024) {
778 return 0;
779 } else
780 return MaxInstances >> 4;
781 }
782
783 /// Returns the list of candidate abbreviations in this set.
784 const Set &GetAbbrevs() const {
785 return Abbrevs;
786 }
787
788 /// Prints out the current contents of this set.
789 void Print(raw_ostream &Stream, const char *Title = "Candidates") const {
790 Stream << "-- " << Title << ": \n";
791 for (CandidateAbbrevs::const_iterator
792 Iter = Abbrevs.begin(), IterEnd = Abbrevs.end();
793 Iter != IterEnd; ++Iter) {
794 Iter->Print(Stream);
795 }
796 Stream << "--\n";
797 }
798
799 private:
800 // The upper bound of instances in this set.
801 unsigned MaxInstances;
802
803 // The set of abbreviations in this set.
804 Set Abbrevs;
805
806 // The map of (global) abbreviations already associated with each block.
807 BlockAbbrevsMapType &BlockAbbrevsMap;
808 };
809
810 bool CandidateAbbrevs::Add(unsigned BlockID,
811 UnrolledAbbreviation &UnrolledAbbrev,
812 unsigned NumInstances) {
813 // First see if we shouldn't add.
814 if (NumInstances < MinNumInstancesForNewAbbrevs) return false;
815 if (NumInstances < GetMinInstances()) return false;
816 NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.roll();
jvoung (off chromium) 2014/02/05 00:24:16 Is Unroll() and roll() and lossy because of the cu
Karl 2014/02/06 19:46:28 Fixing the code for class UnrolledAbbreviation so
817 if (BlockAbbrevs* Abbrevs = BlockAbbrevsMap[BlockID]) {
818 if (Abbrevs->FindAbbreviation(Abbrev) !=
819 BlockAbbrevs::NO_SUCH_ABBREVIATION) {
820 Abbrev->dropRef();
821 return false;
822 }
823 }
824
825 // If reached, we should add the given abbrevation to the set of
jvoung (off chromium) 2014/02/05 00:24:16 abbreviation
Karl 2014/02/06 19:46:28 Done.
826 // candidate abbreviations.
827 if (NumInstances > GetMaxInstances()) {
828 // New range of candidates found. Throw out candidate
829 // abbreviations that no longer apply, due to the adjustment of
830 // the min/max ranges.
831 Set OldAbbrevs;
832 swap(Abbrevs, OldAbbrevs);
833
834 unsigned Bits = NaClBitsNeededForValue(NumInstances);
835 if (Bits > 31) {
836 MaxInstances = ~0U; // i.e. max unsigned value.
837 } else {
838 MaxInstances = 1U << Bits;
839 }
840
841 for (Set::const_iterator
842 Iter = OldAbbrevs.begin(), IterEnd = OldAbbrevs.end();
843 Iter != IterEnd; ++Iter) {
844 if (Iter->GetNumInstances() >= GetMinInstances()) {
845 Abbrevs.insert(*Iter);
846 }
847 }
848 }
849 Abbrevs.insert(CandBlockAbbrev(NumInstances, BlockID, Abbrev));
850 return true;
851 }
852
853 // Look for new abbreviations in block BlockID, considering it was
854 // read with the given abbreviation Abbrev, and considering changing
855 // the abbreviation opererator for value Index. ValueDist is how
856 // values at Index are distributed. Any found abbreviations are added
857 // to the candidate abbreviations CandAbbrevs. Returns true only if we
858 // have added new candidate abbrevations to CandAbbrevs.
jvoung (off chromium) 2014/02/05 00:24:16 abbreviations
Karl 2014/02/06 19:46:28 Done.
859 static bool AddNewAbbreviations(unsigned BlockID,
860 UnrolledAbbreviation &Abbrev,
861 unsigned Index,
862 NaClBitcodeValueDist &ValueDist,
863 CandidateAbbrevs &CandAbbrevs) {
864 // TODO(kschimpf): Add code to try and find a better encoding for
865 // the values, based on the distribution.
866
867 // If this index is already a literal abbreviation, no improvements can
868 // be made.
869 const NaClBitCodeAbbrevOp Op = Abbrev.AbbrevOps[Index];
870 if (Op.isLiteral()) return false;
871
872 // Search based on sorted distribution, which sorts by number of
873 // instances. Start by trying to find possible constants to use.
874 const NaClBitcodeDist::Distribution *
875 Distribution = ValueDist.GetDistribution();
876 for (NaClBitcodeDist::Distribution::const_iterator
877 Iter = Distribution->begin(), IterEnd = Distribution->end();
878 Iter != IterEnd; ++Iter) {
879 NaClValueRangeType Range = GetNaClValueRange(Iter->second);
880 if (Range.first != Range.second) continue; // not a constant.
881
882 // Defines a constant. Try as new candidate range. In addition,
883 // don't try any more constant values, since this is the one with
884 // the greatest number of instances.
885 NaClBitcodeDistElement *Elmt = ValueDist.at(Range.first);
886 Abbrev.AbbrevOps[Index] = NaClBitCodeAbbrevOp(Range.first);
887 return CandAbbrevs.Add(BlockID, Abbrev, Elmt->GetNumInstances());
888 }
889 return false;
890 }
891
892 // Look for new abbreviations in block BlockID, considering it was
893 // read with the given abbreviation Abbrev. IndexDist is the
jvoung (off chromium) 2014/02/05 00:24:16 Any idea how IndexDist looks like? I would think
Karl 2014/02/06 19:46:28 Not sure what is being asked here. First, the (bcc
jvoung (off chromium) 2014/02/06 23:19:36 Sorry, just lost a bit of how the whole thing was
894 // corresponding distribution of value indices associated with the
895 // abbreviation. Any found abbreviations are added to the candidate
896 // abbreviations CandAbbrevs.
897 static void AddNewAbbreviations(unsigned BlockID,
898 UnrolledAbbreviation &Abbrev,
899 NaClBitcodeDist &IndexDist,
900 CandidateAbbrevs &CandAbbrevs) {
901 // Search based on sorted distribution, which sorts based on heuristic
902 // of best index to fix first.
903 const NaClBitcodeDist::Distribution *
904 IndexDistribution = IndexDist.GetDistribution();
905 for (NaClBitcodeDist::Distribution::const_iterator
906 IndexIter = IndexDistribution->begin(),
907 IndexIterEnd = IndexDistribution->end();
908 IndexIter != IndexIterEnd; ++IndexIter) {
909 unsigned Index = static_cast<unsigned>(IndexIter->second);
910 if (AddNewAbbreviations(
911 BlockID, Abbrev, Index,
912 cast<NaClBitcodeValueIndexDistElement>(IndexDist.at(Index))
913 ->GetValueDist(),
914 CandAbbrevs)) {
915 return;
916 }
917 }
918 }
919
920 // Look for new abbrevaitions in block BlockID, considering it was
jvoung (off chromium) 2014/02/05 00:24:16 abbreviations
Karl 2014/02/06 19:46:28 Done.
921 // read with the given abbreviation Abbrev, and the given record Code.
922 // SizeDist is the corresponding distribution of sizes associated with
923 // the abbreviation. Any found abbreviations are added to the
924 // candidate abbreviations CandAbbrevs.
925 static void AddNewAbbreviations(unsigned BlockID,
926 NaClBitCodeAbbrev *Abbrev,
927 unsigned Code,
928 NaClBitcodeDist &SizeDist,
929 CandidateAbbrevs &CandAbbrevs) {
930 const NaClBitcodeDist::Distribution *
931 SizeDistribution = SizeDist.GetDistribution();
932 for (NaClBitcodeDist::Distribution::const_iterator
933 SizeIter = SizeDistribution->begin(),
934 SizeIterEnd = SizeDistribution->end();
935 SizeIter != SizeIterEnd; ++SizeIter) {
936 unsigned Size = static_cast<unsigned>(SizeIter->second);
937 UnrolledAbbreviation UnrolledAbbrev(Abbrev, Size+1); // Add code!
938 if (!UnrolledAbbrev.CodeOp.isLiteral()) {
939 // Try making the code a literal.
940 UnrolledAbbrev.CodeOp = NaClBitCodeAbbrevOp(Code);
941 CandAbbrevs.Add(BlockID, UnrolledAbbrev,
942 SizeDist.at(Size)->GetNumInstances());
943 }
944 // Now process value indices to find candidate abbreviations.
945 AddNewAbbreviations(
946 BlockID, UnrolledAbbrev,
947 cast<NaClBitcodeSizeDistElement>(SizeDist.at(Size))
948 ->GetValueIndexDist(),
949 CandAbbrevs);
950 }
951 }
952
953 // Look for new abbreviations in block BlockID. Abbrevs is the map of
954 // read (globally defined) abbreviations. AbbrevDist is the
jvoung (off chromium) 2014/02/05 00:24:16 Abbrevs is already known to be associated with the
Karl 2014/02/06 19:46:28 yes. Added qualifier to comment.
955 // distribution map of abbreviations associated with BlockID. Any
956 // found abbreviations are added to the candidate abbreviations
957 // CandAbbrevs.
958 static void AddNewAbbreviations(unsigned BlockID,
959 BlockAbbrevs *Abbrevs,
960 NaClBitcodeDist &AbbrevDist,
961 CandidateAbbrevs &CandAbbrevs) {
962 const NaClBitcodeDist::Distribution *
963 AbbrevDistribution = AbbrevDist.GetDistribution();
964 for (NaClBitcodeDist::Distribution::const_iterator
965 AbbrevIter = AbbrevDistribution->begin(),
966 AbbrevIterEnd = AbbrevDistribution->end();
967 AbbrevIter != AbbrevIterEnd; ++AbbrevIter) {
968 NaClBitcodeDistValue AbbrevIndex = AbbrevIter->second;
969 NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(AbbrevIndex);
jvoung (off chromium) 2014/02/05 00:24:16 It seems like an *existing* Abbrev gets pulled out
Karl 2014/02/06 19:46:28 Yes. Unroll creates a copy of the abbreviation, an
970 NaClBitcodeAbbrevDistElement *AbbrevElmt =
971 cast<NaClBitcodeAbbrevDistElement>(AbbrevDist.at(AbbrevIndex));
972 NaClBitcodeDist &CodeDist = AbbrevElmt->GetCodeDist();
973
974 const NaClBitcodeDist::Distribution *
975 CodeDistribution = CodeDist.GetDistribution();
976 for (NaClBitcodeDist::Distribution::const_iterator
977 CodeIter = CodeDistribution->begin(),
978 CodeIterEnd = CodeDistribution->end();
979 CodeIter != CodeIterEnd; ++CodeIter) {
980 unsigned Code = static_cast<unsigned>(CodeIter->second);
981 AddNewAbbreviations(
982 BlockID,
983 Abbrev,
984 Code,
985 cast<NaClCompressCodeDistElement>(CodeDist.at(CodeIter->second))
986 ->GetSizeDist(),
987 CandAbbrevs);
988 }
989 }
990 }
991
992 // Look for new abbreviations in the given block distribution map
993 // BlockDist. BlockAbbrevsMap contains the set of read global
994 // abbrevations. Any found abbreviations are added to the set of
jvoung (off chromium) 2014/02/05 00:24:16 abb.
Karl 2014/02/06 19:46:28 Done.
995 // candidate abbreviations CandAbbrevs.
996 static void AddNewAbbreviations(NaClBitcodeBlockDist &BlockDist,
997 BlockAbbrevsMapType &BlockAbbrevsMap,
998 CandidateAbbrevs &CandAbbrevs) {
999 // Start by collecting candidate abbreviations.
1000 const NaClBitcodeDist::Distribution *
1001 Distribution = BlockDist.GetDistribution();
1002 for (NaClBitcodeDist::Distribution::const_iterator
1003 Iter = Distribution->begin(), IterEnd = Distribution->end();
1004 Iter != IterEnd; ++Iter) {
1005 unsigned BlockID = static_cast<unsigned>(Iter->second);
1006 AddNewAbbreviations(
1007 BlockID,
1008 BlockAbbrevsMap[BlockID],
1009 cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))
1010 ->GetAbbrevDist(),
1011 CandAbbrevs);
1012 }
1013 // Install candidate abbreviations.
Karl 2014/02/06 19:46:28 Modified this code to sort the abbreviations by th
jvoung (off chromium) 2014/02/06 23:19:36 What are some examples of when the two *different*
Karl 2014/02/07 19:37:29 It may be the case that value index 3 has 300 inst
1014 if (TraceGeneratedAbbreviations) {
1015 CandAbbrevs.Print(errs(), "New abbreviations");
1016 }
1017 for (CandidateAbbrevs::const_iterator
1018 Iter = CandAbbrevs.GetAbbrevs().begin(),
1019 IterEnd = CandAbbrevs.GetAbbrevs().end();
1020 Iter != IterEnd; ++Iter) {
1021 CandBlockAbbrev CandAbbrev(*Iter);
1022 BlockAbbrevs *Abbrevs = BlockAbbrevsMap[CandAbbrev.GetBlockID()];
1023 Abbrevs->AddAbbreviation(CandAbbrev.GetAbbrev()->Copy());
1024 }
1025 }
1026
511 // Read in bitcode, analyze data, and figure out set of abbreviations 1027 // Read in bitcode, analyze data, and figure out set of abbreviations
512 // to use, from memory buffer MemBuf containing the input bitcode file. 1028 // to use, from memory buffer MemBuf containing the input bitcode file.
513 static bool AnalyzeBitcode(OwningPtr<MemoryBuffer> &MemBuf, 1029 static bool AnalyzeBitcode(OwningPtr<MemoryBuffer> &MemBuf,
514 BlockAbbrevsMapType &BlockAbbrevsMap) { 1030 BlockAbbrevsMapType &BlockAbbrevsMap) {
515 // TODO(kschimpf): The current code only extracts abbreviations 1031 // TODO(kschimpf): The current code only extracts abbreviations
516 // defined in the bitcode file. This code needs to be updated to 1032 // defined in the bitcode file. This code needs to be updated to
517 // collect data distributions and figure out better (global) 1033 // collect data distributions and figure out better (global)
518 // abbreviations to use. 1034 // abbreviations to use.
519 1035
520 const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart(); 1036 const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
(...skipping 14 matching lines...) Expand all
535 if (Parser.Parse()) return true; 1051 if (Parser.Parse()) return true;
536 } 1052 }
537 1053
538 if (ShowValueDistributions) { 1054 if (ShowValueDistributions) {
539 // To make shell redirection of this trace easier, print it to 1055 // To make shell redirection of this trace easier, print it to
540 // stdout unless stdout is being used to contain the compressed 1056 // stdout unless stdout is being used to contain the compressed
541 // bitcode file. In the latter case, use stderr. 1057 // bitcode file. In the latter case, use stderr.
542 Parser.BlockDist.Print(OutputFilename == "-" ? errs() : outs()); 1058 Parser.BlockDist.Print(OutputFilename == "-" ? errs() : outs());
543 } 1059 }
544 1060
1061 CandidateAbbrevs CandAbbrevs(BlockAbbrevsMap);
1062 AddNewAbbreviations(Parser.BlockDist, BlockAbbrevsMap, CandAbbrevs);
Karl 2014/02/06 19:46:28 After reading the code more carefully, I realized
545 return false; 1063 return false;
546 } 1064 }
547 1065
548 /// Parses the input bitcode file and generates the corresponding 1066 /// Parses the input bitcode file and generates the corresponding
549 /// compressed bitcode file, by replacing abbreviations in the input 1067 /// compressed bitcode file, by replacing abbreviations in the input
550 /// file with the corresponding abbreviations defined in 1068 /// file with the corresponding abbreviations defined in
551 /// BlockAbbrevsMap. 1069 /// BlockAbbrevsMap.
552 class NaClBitcodeCopyParser : public NaClBitcodeParser { 1070 class NaClBitcodeCopyParser : public NaClBitcodeParser {
553 public: 1071 public:
554 // Top-level constructor to build the appropriate block parser 1072 // Top-level constructor to build the appropriate block parser
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
616 NaClBlockCopyParser *EnclosingParser) 1134 NaClBlockCopyParser *EnclosingParser)
617 : NaClBitcodeParser(BlockID, EnclosingParser), 1135 : NaClBitcodeParser(BlockID, EnclosingParser),
618 Context(EnclosingParser->Context) 1136 Context(EnclosingParser->Context)
619 {} 1137 {}
620 1138
621 virtual bool Error(const std::string &Message) { 1139 virtual bool Error(const std::string &Message) {
622 // Use local error routine so that all errors are treated uniformly. 1140 // Use local error routine so that all errors are treated uniformly.
623 return ::Error(Message); 1141 return ::Error(Message);
624 } 1142 }
625 1143
1144 /// Returns the set of (global) block abbreviations defined for the
1145 /// given block ID.
1146 BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) {
1147 BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID];
1148 if (Abbrevs == 0) {
1149 Abbrevs = new BlockAbbrevs(BlockID);
1150 Context->BlockAbbrevsMap[BlockID] = Abbrevs;
1151 }
1152 return Abbrevs;
1153 }
1154
626 virtual bool ParseBlock(unsigned BlockID) { 1155 virtual bool ParseBlock(unsigned BlockID) {
627 NaClBlockCopyParser Parser(BlockID, this); 1156 NaClBlockCopyParser Parser(BlockID, this);
628 return Parser.ParseThisBlock(); 1157 return Parser.ParseThisBlock();
629 } 1158 }
630 1159
631 virtual void EnterBlock(unsigned NumWords) { 1160 virtual void EnterBlock(unsigned NumWords) {
632 unsigned BlockID = GetBlockID(); 1161 unsigned BlockID = GetBlockID();
633 BlockAbbreviations = Context->BlockAbbrevsMap[BlockID]; 1162 BlockAbbreviations = GetGlobalAbbrevs(BlockID);
634 1163
635 // Enter the subblock. 1164 // Enter the subblock.
636 NaClBitcodeSelectorAbbrev 1165 NaClBitcodeSelectorAbbrev
637 Selector(BlockAbbreviations->GetNumberAbbreviations()-1); 1166 Selector(BlockAbbreviations->GetNumberAbbreviations()-1);
638 Context->Writer.EnterSubblock(BlockID, Selector); 1167 Context->Writer.EnterSubblock(BlockID, Selector);
1168
1169 // Note: We must dump module abbreviations as local
1170 // abbreviations, because they are in a yet to be
1171 // dumped BlockInfoBlock.
jvoung (off chromium) 2014/02/05 00:24:16 So the BlockInfoBlock will be skipped?
Karl 2014/02/06 19:46:28 No. There are generated by method ExitBlockInfo be
1172 if (BlockID == naclbitc::MODULE_BLOCK_ID) {
1173 BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(naclbitc::MODULE_BLOCK_ID);
1174 if (Abbrevs == 0) return;
1175 for (unsigned i = 0; i < Abbrevs->GetNumberAbbreviations(); ++i) {
1176 Context->Writer.EmitAbbrev(Abbrevs->GetIndexedAbbrev(i)->Copy());
1177 }
1178 }
639 } 1179 }
640 1180
641 virtual void ExitBlock() { 1181 virtual void ExitBlock() {
642 Context->Writer.ExitBlock(); 1182 Context->Writer.ExitBlock();
643 } 1183 }
644 1184
645 virtual void ExitBlockInfo() { 1185 virtual void ExitBlockInfo() {
646 assert(!Context->FoundFirstBlockInfo && 1186 assert(!Context->FoundFirstBlockInfo &&
647 "Input bitcode has more that one BlockInfoBlock"); 1187 "Input bitcode has more that one BlockInfoBlock");
648 Context->FoundFirstBlockInfo = true; 1188 Context->FoundFirstBlockInfo = true;
649 1189
650 // Generate global abbreviations within a blockinfo block. 1190 // Generate global abbreviations within a blockinfo block.
651 Context->Writer.EnterBlockInfoBlock(); 1191 Context->Writer.EnterBlockInfoBlock();
652 for (BlockAbbrevsMapType::const_iterator 1192 for (BlockAbbrevsMapType::const_iterator
653 Iter = Context->BlockAbbrevsMap.begin(), 1193 Iter = Context->BlockAbbrevsMap.begin(),
654 IterEnd = Context->BlockAbbrevsMap.end(); 1194 IterEnd = Context->BlockAbbrevsMap.end();
655 Iter != IterEnd; ++Iter) { 1195 Iter != IterEnd; ++Iter) {
656 unsigned BlockID = Iter->first; 1196 unsigned BlockID = Iter->first;
1197 // Don't emit module abbreviations, since they have been
1198 // emitted as local abbreviations.
1199 if (BlockID == naclbitc::MODULE_BLOCK_ID) continue;
1200
657 BlockAbbrevs *Abbrevs = Iter->second; 1201 BlockAbbrevs *Abbrevs = Iter->second;
658 if (Abbrevs == 0) continue; 1202 if (Abbrevs == 0) continue;
659 for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation(); 1203 for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation();
660 i < Abbrevs->GetNumberAbbreviations(); ++i) { 1204 i < Abbrevs->GetNumberAbbreviations(); ++i) {
661 NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(i); 1205 NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(i);
662 Context->Writer.EmitBlockInfoAbbrev(BlockID, Abbrev); 1206 Context->Writer.EmitBlockInfoAbbrev(BlockID, Abbrev);
663 } 1207 }
664 } 1208 }
665 Context->Writer.ExitBlock(); 1209 Context->Writer.ExitBlock();
666 } 1210 }
(...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after
948 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. 1492 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
949 cl::ParseCommandLineOptions(argc, argv, "pnacl-bccompress file analyzer\n"); 1493 cl::ParseCommandLineOptions(argc, argv, "pnacl-bccompress file analyzer\n");
950 1494
951 OwningPtr<MemoryBuffer> MemBuf; 1495 OwningPtr<MemoryBuffer> MemBuf;
952 if (ReadAndBuffer(MemBuf)) return 1; 1496 if (ReadAndBuffer(MemBuf)) return 1;
953 BlockAbbrevsMapType BlockAbbrevsMap; 1497 BlockAbbrevsMapType BlockAbbrevsMap;
954 if (AnalyzeBitcode(MemBuf, BlockAbbrevsMap)) return 1; 1498 if (AnalyzeBitcode(MemBuf, BlockAbbrevsMap)) return 1;
955 if (CopyBitcode(MemBuf, BlockAbbrevsMap)) return 1; 1499 if (CopyBitcode(MemBuf, BlockAbbrevsMap)) return 1;
956 return 0; 1500 return 0;
957 } 1501 }
OLDNEW
« no previous file with comments | « lib/Bitcode/NaCl/Reader/NaClBitcodeDist.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698