Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: tools/pnacl-bccompress/pnacl-bccompress.cpp

Issue 154603002: Make pnacl-bccompress add abbreviations for obvious constants. (Closed) Base URL: http://git.chromium.org/native_client/pnacl-llvm.git@master
Patch Set: Fix issues raised in Patch Set 3. Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===-- pnacl-bccompress.cpp - Bitcode (abbrev) compression ---------------===// 1 //===-- pnacl-bccompress.cpp - Bitcode (abbrev) compression ---------------===//
2 // 2 //
3 // The LLVM Compiler Infrastructure 3 // The LLVM Compiler Infrastructure
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This tool may be invoked in the following manner: 10 // This tool may be invoked in the following manner:
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
93 cl::desc("Show collected value distributions in bitcode records."), 93 cl::desc("Show collected value distributions in bitcode records."),
94 cl::init(false)); 94 cl::init(false));
95 95
96 /// Error - All bitcode analysis errors go through this function, 96 /// Error - All bitcode analysis errors go through this function,
97 /// making this a good place to breakpoint if debugging. 97 /// making this a good place to breakpoint if debugging.
98 static bool Error(const std::string &Err) { 98 static bool Error(const std::string &Err) {
99 errs() << Err << "\n"; 99 errs() << Err << "\n";
100 return true; 100 return true;
101 } 101 }
102 102
103 // For debugging. Prints out the abbreviation in readable form to errs(). 103 // Prints out the abbreviation in readable form to the given Stream.
104 static void PrintAbbrev(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { 104 static void PrintAbbrev(raw_ostream &Stream,
105 errs() << "Abbrev(block " << BlockID << "): ["; 105 unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
106 // ContinuationCount>0 implies that the current operand is a 106 Stream << "Abbrev(block " << BlockID << "): ";
107 // continuation of previous operand(s). 107 Abbrev->Print(Stream);
108 unsigned ContinuationCount = 0;
109 for (unsigned i = 0; i < Abbrev->getNumOperandInfos(); ++i) {
110 if (i > 0 && ContinuationCount == 0) errs() << ", ";
111 const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(i);
112 if (Op.isLiteral()) {
113 errs() << Op.getLiteralValue();
114 } else if (Op.isEncoding()) {
115 switch (Op.getEncoding()) {
116 case NaClBitCodeAbbrevOp::Fixed:
117 errs() << "Fixed(" << Op.getEncodingData() << ")";
118 break;
119 case NaClBitCodeAbbrevOp::VBR:
120 errs() << "VBR(" << Op.getEncodingData() << ")";
121 break;
122 case NaClBitCodeAbbrevOp::Array:
123 errs() << "Array:";
124 ++ContinuationCount;
125 continue;
126 case NaClBitCodeAbbrevOp::Char6:
127 errs() << "Char6";
128 break;
129 case NaClBitCodeAbbrevOp::Blob:
130 errs() << "Blob";
131 break;
132 default:
133 errs() << "??";
134 break;
135 }
136 } else {
137 errs() << "??";
138 }
139 if (ContinuationCount) --ContinuationCount;
140 }
141 errs() << "]\n";
142 } 108 }
143 109
144 // Reads the input file into the given buffer. 110 // Reads the input file into the given buffer.
145 static bool ReadAndBuffer(OwningPtr<MemoryBuffer> &MemBuf) { 111 static bool ReadAndBuffer(OwningPtr<MemoryBuffer> &MemBuf) {
146 if (error_code ec = 112 if (error_code ec =
147 MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), MemBuf)) { 113 MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), MemBuf)) {
148 return Error("Error reading '" + InputFilename + "': " + ec.message()); 114 return Error("Error reading '" + InputFilename + "': " + ec.message());
149 } 115 }
150 116
151 if (MemBuf->getBufferSize() % 4 != 0) 117 if (MemBuf->getBufferSize() % 4 != 0)
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
208 public: 174 public:
209 // Vector to hold the (ordered) list of abbreviations. 175 // Vector to hold the (ordered) list of abbreviations.
210 typedef SmallVector<NaClBitCodeAbbrev*, 32> AbbrevVector; 176 typedef SmallVector<NaClBitCodeAbbrev*, 32> AbbrevVector;
211 177
212 BlockAbbrevs(unsigned BlockID) 178 BlockAbbrevs(unsigned BlockID)
213 : BlockID(BlockID) { 179 : BlockID(BlockID) {
214 // backfill internal indices that don't correspond to bitstream 180 // backfill internal indices that don't correspond to bitstream
215 // application abbreviations, so that added abbreviations have 181 // application abbreviations, so that added abbreviations have
216 // valid abbreviation indices. 182 // valid abbreviation indices.
217 for (unsigned i = 0; i < naclbitc::FIRST_APPLICATION_ABBREV; ++i) { 183 for (unsigned i = 0; i < naclbitc::FIRST_APPLICATION_ABBREV; ++i) {
218 Abbrevs.push_back(new NaClBitCodeAbbrev()); 184 // Make all non-application abbreviations look like the default
185 // abbreviation.
186 NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();
187 Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
188 Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
189 Abbrevs.push_back(Abbrev);
219 } 190 }
220 GlobalAbbrevBitstreamToInternalMap. 191 GlobalAbbrevBitstreamToInternalMap.
221 SetNextBitstreamAbbrevIndex(Abbrevs.size()); 192 SetNextBitstreamAbbrevIndex(Abbrevs.size());
222 } 193 }
223 194
224 ~BlockAbbrevs() { 195 ~BlockAbbrevs() {
225 for (AbbrevVector::const_iterator 196 for (AbbrevVector::const_iterator
226 Iter = Abbrevs.begin(), IterEnd = Abbrevs.end(); 197 Iter = Abbrevs.begin(), IterEnd = Abbrevs.end();
227 Iter != IterEnd; ++Iter) { 198 Iter != IterEnd; ++Iter) {
228 (*Iter)->dropRef(); 199 (*Iter)->dropRef();
(...skipping 27 matching lines...) Expand all
256 Abbrev->dropRef(); 227 Abbrev->dropRef();
257 return false; 228 return false;
258 } 229 }
259 230
260 // New abbreviation. Add. 231 // New abbreviation. Add.
261 Index = Abbrevs.size(); 232 Index = Abbrevs.size();
262 Abbrevs.push_back(Abbrev); 233 Abbrevs.push_back(Abbrev);
263 return true; 234 return true;
264 } 235 }
265 236
237 /// Adds the given abbreviation to the set of global abbreviations
238 /// defined for the block. Guarantees that duplicate abbreviations
239 /// are not added to the block. Note: Code takes ownership of
240 /// the given abbreviation. Returns true if new abbreviation.
241 bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev) {
242 int Index;
243 return AddAbbreviation(Abbrev, Index);
244 }
245
266 /// The block ID associated with the block. 246 /// The block ID associated with the block.
267 unsigned GetBlockID() const { 247 unsigned GetBlockID() const {
268 return BlockID; 248 return BlockID;
269 } 249 }
270 250
271 /// Returns the index of the frist application abbreviation for the 251 /// Returns the index of the frist application abbreviation for the
272 /// block. 252 /// block.
273 unsigned GetFirstApplicationAbbreviation() const { 253 unsigned GetFirstApplicationAbbreviation() const {
274 return naclbitc::FIRST_APPLICATION_ABBREV; 254 return naclbitc::FIRST_APPLICATION_ABBREV;
275 } 255 }
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after
410 cast<NaClCompressBlockDistElement>( 390 cast<NaClCompressBlockDistElement>(
411 Context->BlockDist.GetElement(Record.GetBlockID())) 391 Context->BlockDist.GetElement(Record.GetBlockID()))
412 ->GetAbbrevDist().AddRecord(Record); 392 ->GetAbbrevDist().AddRecord(Record);
413 } 393 }
414 394
415 virtual void ProcessRecordAbbrev() { 395 virtual void ProcessRecordAbbrev() {
416 // Convert the local abbreviation to a corresponding global 396 // Convert the local abbreviation to a corresponding global
417 // abbreviation. 397 // abbreviation.
418 const NaClBitCodeAbbrev *Abbrev = Record.GetCursor().GetNewestAbbrev(); 398 const NaClBitCodeAbbrev *Abbrev = Record.GetCursor().GetNewestAbbrev();
419 int Index; 399 int Index;
420 AddAbbreviation(GetBlockID(), Abbrev->Copy(), Index); 400 AddAbbreviation(GetBlockID(), Abbrev->Simplify(), Index);
421 LocalAbbrevBitstreamToInternalMap.InstallNewBitstreamAbbrevIndex(Index); 401 LocalAbbrevBitstreamToInternalMap.InstallNewBitstreamAbbrevIndex(Index);
422 } 402 }
423 403
424 virtual void ExitBlockInfo() { 404 virtual void ExitBlockInfo() {
425 // Now extract out global abbreviations and put into corresponding 405 // Now extract out global abbreviations and put into corresponding
426 // block abbreviations map, so that they will be used when the 406 // block abbreviations map, so that they will be used when the
427 // bitcode is compressed. 407 // bitcode is compressed.
428 NaClBitstreamReader &Reader = Record.GetReader(); 408 NaClBitstreamReader &Reader = Record.GetReader();
429 SmallVector<unsigned, 12> BlockIDs; 409 SmallVector<unsigned, 12> BlockIDs;
430 Reader.GetBlockInfoBlockIDs(BlockIDs); 410 Reader.GetBlockInfoBlockIDs(BlockIDs);
431 for (SmallVectorImpl<unsigned>::const_iterator 411 for (SmallVectorImpl<unsigned>::const_iterator
432 IDIter = BlockIDs.begin(), IDIterEnd = BlockIDs.end(); 412 IDIter = BlockIDs.begin(), IDIterEnd = BlockIDs.end();
433 IDIter != IDIterEnd; ++IDIter) { 413 IDIter != IDIterEnd; ++IDIter) {
434 unsigned BlockID = *IDIter; 414 unsigned BlockID = *IDIter;
435 BlockAbbrevs* BlkAbbrevs = GetGlobalAbbrevs(BlockID); 415 BlockAbbrevs* BlkAbbrevs = GetGlobalAbbrevs(BlockID);
436 if (const NaClBitstreamReader::BlockInfo *Info = 416 if (const NaClBitstreamReader::BlockInfo *Info =
437 Reader.getBlockInfo(BlockID)) { 417 Reader.getBlockInfo(BlockID)) {
438 for (std::vector<NaClBitCodeAbbrev*>::const_iterator 418 for (std::vector<NaClBitCodeAbbrev*>::const_iterator
439 AbbrevIter = Info->Abbrevs.begin(), 419 AbbrevIter = Info->Abbrevs.begin(),
440 AbbrevIterEnd = Info->Abbrevs.end(); 420 AbbrevIterEnd = Info->Abbrevs.end();
441 AbbrevIter != AbbrevIterEnd; 421 AbbrevIter != AbbrevIterEnd;
442 ++AbbrevIter) { 422 ++AbbrevIter) {
443 NaClBitCodeAbbrev *Abbrev = *AbbrevIter; 423 NaClBitCodeAbbrev *Abbrev = *AbbrevIter;
444 int Index; 424 int Index;
445 AddAbbreviation(BlockID, Abbrev->Copy(), Index); 425 AddAbbreviation(BlockID, Abbrev->Simplify(), Index);
446 BlkAbbrevs->GetGlobalAbbrevBitstreamToInternalMap(). 426 BlkAbbrevs->GetGlobalAbbrevBitstreamToInternalMap().
447 InstallNewBitstreamAbbrevIndex(Index); 427 InstallNewBitstreamAbbrevIndex(Index);
448 } 428 }
449 } 429 }
450 } 430 }
451 } 431 }
452 432
453 protected: 433 protected:
454 // The context (i.e. top-level) parser. 434 // The context (i.e. top-level) parser.
455 NaClAnalyzeParser *Context; 435 NaClAnalyzeParser *Context;
(...skipping 22 matching lines...) Expand all
478 // Adds the abbreviation to the list of abbreviations for the given 458 // Adds the abbreviation to the list of abbreviations for the given
479 // block. 459 // block.
480 void AddAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev, 460 void AddAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev,
481 int &Index) { 461 int &Index) {
482 // Get block abbreviations. 462 // Get block abbreviations.
483 BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(BlockID); 463 BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(BlockID);
484 464
485 // Read abbreviation and add as a global abbreviation. 465 // Read abbreviation and add as a global abbreviation.
486 if (Abbrevs->AddAbbreviation(Abbrev, Index) 466 if (Abbrevs->AddAbbreviation(Abbrev, Index)
487 && TraceGeneratedAbbreviations) { 467 && TraceGeneratedAbbreviations) {
488 PrintAbbrev(BlockID, Abbrev); 468 PrintAbbrev(errs(), BlockID, Abbrev);
489 } 469 }
490 } 470 }
491 471
492 /// Finds the index to the corresponding internal block abbrevation 472 /// Finds the index to the corresponding internal block abbreviation
493 /// for the given abbreviation. 473 /// for the given abbreviation.
494 int FindAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { 474 int FindAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
495 return GetGlobalAbbrevs(BlockID)->FindAbbreviation(Abbrev); 475 return GetGlobalAbbrevs(BlockID)->FindAbbreviation(Abbrev);
496 } 476 }
497 477
498 void Init() { 478 void Init() {
499 GlobalBlockAbbrevs = GetGlobalAbbrevs(GetBlockID()); 479 GlobalBlockAbbrevs = GetGlobalAbbrevs(GetBlockID());
500 LocalAbbrevBitstreamToInternalMap.SetNextBitstreamAbbrevIndex( 480 LocalAbbrevBitstreamToInternalMap.SetNextBitstreamAbbrevIndex(
501 GlobalBlockAbbrevs-> 481 GlobalBlockAbbrevs->
502 GetGlobalAbbrevBitstreamToInternalMap().GetNextBitstreamAbbrevIndex()); 482 GetGlobalAbbrevBitstreamToInternalMap().GetNextBitstreamAbbrevIndex());
503 } 483 }
504 }; 484 };
505 485
506 bool NaClAnalyzeParser::ParseBlock(unsigned BlockID) { 486 bool NaClAnalyzeParser::ParseBlock(unsigned BlockID) {
507 NaClBlockAnalyzeParser Parser(BlockID, this); 487 NaClBlockAnalyzeParser Parser(BlockID, this);
508 return Parser.ParseThisBlock(); 488 return Parser.ParseThisBlock();
509 } 489 }
510 490
491 /// Models the unrolling of an abbreviation into its sequence of
492 /// individual operators. That is, unrolling arrays to match the width
493 /// of the abbreviation.
494 ///
495 /// For example, consider the abbreviation [Array(VBR(6))]. If the
496 /// distribution map has data for records of size 3, and the
497 /// distribution map suggests that a constant 4 appears as the second
498 /// element in the record, it is nontrivial to figure out how to
499 /// encorporate this into this abbrevation. Hence, we unroll the array
500 /// (3 times) to get [VBR(6), VBR(6), VBR(6), Array(VBR(6))]. To
501 /// update the second element to match the literal 4, we only need to
502 /// replace the second element in the unrolled abbreviation resulting
503 /// in [VBR(6), Lit(4), VBR(6), Array(VBR(6))].
504 ///
505 /// After we have done appropriate substitutions, we can simplify the
506 /// unrolled abbreviation by calling method Restore.
507 ///
508 /// Note: We unroll in the form that best matches the distribution
509 /// map. Hence, the code is stored as a separate operator. We also
510 /// keep the array abbreviation op, for untracked elements within the
511 /// distribution maps.
512 class UnrolledAbbreviation {
513 void operator=(const UnrolledAbbreviation&) LLVM_DELETED_FUNCTION;
514 public:
515 /// Unroll the given abbreviation, assuming it has the given size
516 /// (as specified in the distribution maps).
517 ///
518 /// If argument CanBeBigger is true, then we do not assume that we
519 /// can remove the trailing array when expanding, because the
520 /// actual size of the corresponding record using this abbreviation
521 /// may be bigger.
522 UnrolledAbbreviation(NaClBitCodeAbbrev *Abbrev, unsigned Size,
523 bool CanBeBigger = false)
524 : CodeOp(0) {
525 unsigned NextOp = 0;
526 UnrollAbbrevOp(CodeOp, Abbrev, NextOp);
527 --Size;
528 for (unsigned i = 0; i < Size; ++i) {
529 // Create slot and then fill with appropriate operator.
530 AbbrevOps.push_back(CodeOp);
531 UnrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp);
532 }
533 if (CanBeBigger) {
534 for (; NextOp < Abbrev->getNumOperandInfos(); ++NextOp) {
535 MoreOps.push_back(Abbrev->getOperandInfo(NextOp));
536 }
537 } else if (NextOp < Abbrev->getNumOperandInfos() &&
538 !Abbrev->getOperandInfo(NextOp).isArrayOp()) {
539 errs() << (Size+1) << ": ";
540 Abbrev->Print(errs());
541 llvm_unreachable("Malformed abbreviation/size pair");
542 }
543 }
544
545 explicit UnrolledAbbreviation(const UnrolledAbbreviation &Abbrev)
546 : CodeOp(Abbrev.CodeOp),
547 AbbrevOps(Abbrev.AbbrevOps),
548 MoreOps(Abbrev.MoreOps) {
549 }
550
551 /// Prints out the abbreviation modeled by the unrolled
552 /// abbreviation.
553 void Print(raw_ostream &Stream) const {
554 NaClBitCodeAbbrev *Abbrev = Restore(false);
555 Abbrev->Print(Stream);
556 Abbrev->dropRef();
557 }
558
559 /// Converts the unrolled abbreviation back into a regular
560 /// abbreviation. If simplify is true, we simplify the
jvoung (off chromium) 2014/02/07 23:50:27 first simplify -> Simplify
Karl 2014/02/10 16:47:01 Done.
561 /// unrolled abbreviation as well.
562 NaClBitCodeAbbrev *Restore(bool Simplify=true) const {
563 NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();
564 Abbrev->Add(CodeOp);
565 for (std::vector<NaClBitCodeAbbrevOp>::const_iterator
566 Iter = AbbrevOps.begin(), IterEnd = AbbrevOps.end();
567 Iter != IterEnd; ++Iter) {
568 Abbrev->Add(*Iter);
569 }
570 for (std::vector<NaClBitCodeAbbrevOp>::const_iterator
571 Iter = MoreOps.begin(), IterEnd = MoreOps.end();
572 Iter != IterEnd; ++Iter) {
573 Abbrev->Add(*Iter);
574 }
575 if (Simplify) {
576 NaClBitCodeAbbrev *SimpAbbrev = Abbrev->Simplify();
577 Abbrev->dropRef();
578 return SimpAbbrev;
579 } else {
580 return Abbrev;
581 }
582 }
583
584 // The abbreviation used for the record code.
585 NaClBitCodeAbbrevOp CodeOp;
586
587 // The abbreviations used for each tracked value index.
588 std::vector<NaClBitCodeAbbrevOp> AbbrevOps;
589
590 private:
591 // Any remaining abbreviation operators not part of the unrolling.
592 std::vector<NaClBitCodeAbbrevOp> MoreOps;
593
594 // Extracts out the next abbreviation operator from the abbreviation
595 // Abbrev, given the index NextOp, and assigns it to AbbrevOp
596 void UnrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp,
597 NaClBitCodeAbbrev *Abbrev,
598 unsigned &NextOp) {
599 assert(NextOp < Abbrev->getNumOperandInfos());
600 const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(NextOp);
601 if (Op.isEncoding() && Op.getEncoding() == NaClBitCodeAbbrevOp::Array) {
602 // Do not advance. The array operator assumes that all remaining
603 // elements should match its argument.
604 AbbrevOp = Abbrev->getOperandInfo(NextOp+1);
605 } else {
606 AbbrevOp = Op;
607 NextOp++;
608 }
609 }
610 };
611
612 /// Models a candidate block abbreviation, which is a blockID, and the
613 /// corresponding abbreviation to be considered for addition. Note:
614 /// Constructors and assignment take ownership of the abbreviation.
615 class CandBlockAbbrev {
616 public:
617 CandBlockAbbrev(unsigned BlockID, NaClBitCodeAbbrev *Abbrev)
618 : BlockID(BlockID), Abbrev(Abbrev) {
619 }
620
621 CandBlockAbbrev(const CandBlockAbbrev &BlockAbbrev)
622 : BlockID(BlockAbbrev.BlockID),
623 Abbrev(BlockAbbrev.Abbrev) {
624 Abbrev->addRef();
625 }
626
627 void operator=(const CandBlockAbbrev &BlockAbbrev) {
628 Abbrev->dropRef();
629 BlockID = BlockAbbrev.BlockID;
630 Abbrev = BlockAbbrev.Abbrev;
631 Abbrev->addRef();
632 }
633
634 ~CandBlockAbbrev() {
635 Abbrev->dropRef();
636 }
637
638 /// The block ID of the candidate abbreviation.
639 unsigned GetBlockID() const {
640 return BlockID;
641 }
642
643 /// The abbreviation of the candidate abbreviation.
644 const NaClBitCodeAbbrev *GetAbbrev() const {
645 return Abbrev;
646 }
647
648 /// orders this against the candidate abbreviation.
649 int Compare(const CandBlockAbbrev &CandAbbrev) const {
650 unsigned diff = BlockID - CandAbbrev.BlockID;
651 if (diff) return diff;
652 return Abbrev->Compare(*CandAbbrev.Abbrev);
653 }
654
655 /// Prints the candidate abbreviation to the given stream.
656 void Print(raw_ostream &Stream) const {
657 PrintAbbrev(Stream, BlockID, Abbrev);
658 }
659
660 private:
661 // The block the abbreviation applies to.
662 unsigned BlockID;
663 // The candidate abbreviation.
664 NaClBitCodeAbbrev *Abbrev;
665 };
666
667 static inline bool operator<(const CandBlockAbbrev &A1,
668 const CandBlockAbbrev &A2) {
669 return A1.Compare(A2) < 0;
670 }
671
672 /// Models the minimum number of instances for a candidate abbreviation
673 /// before we will even consider it a potential candidate abbreviation.
674 static unsigned MinNumInstancesForNewAbbrevs = 100;
675
676 /// Models the set of candidate abbreviations being considered, and
677 /// the number of abbreviations associated with each candidate
678 /// Abbreviation.
679 ///
680 /// Note: Because we may have abbreviation refinements of A->B->C and
681 /// A->D->C, we need to accumulate instance counts in such cases.
682 class CandidateAbbrevs {
683 public:
684 // Map from candidate abbreviations to the corresponding number of
685 // instances.
686 typedef std::map<CandBlockAbbrev, unsigned> AbbrevCountMap;
687 typedef AbbrevCountMap::const_iterator const_iterator;
688
689 /// Creates an empty set of candidate abbreviations, to be
690 /// (potentially) added to the given set of block abbreviations.
691 /// Argument is the (global) block abbreviations map, which is
692 /// used to determine if it already exists.
693 CandidateAbbrevs(BlockAbbrevsMapType &BlockAbbrevsMap)
694 : BlockAbbrevsMap(BlockAbbrevsMap)
695 {}
696
697 /// Adds the given (unrolled) abbreviation as a candidate
698 /// abbreviation to the given block. NumInstances is the number of
699 /// instances expected to use this candidate abbreviation. Returns
700 /// true if the corresponding candidate abbreviation is added to this
701 /// set of candidate abbreviations.
702 bool Add(unsigned BlockID,
703 UnrolledAbbreviation &UnrolledAbbrev,
704 unsigned NumInstances);
705
706 /// Returns the list of candidate abbreviations in this set.
707 const AbbrevCountMap &GetAbbrevsMap() const {
708 return AbbrevsMap;
709 }
710
711 /// Prints out the current contents of this set.
712 void Print(raw_ostream &Stream, const char *Title = "Candidates") const {
713 Stream << "-- " << Title << ": \n";
714 for (const_iterator Iter = AbbrevsMap.begin(), IterEnd = AbbrevsMap.end();
715 Iter != IterEnd; ++Iter) {
716 Stream << format("%12u", Iter->second) << ": ";
717 Iter->first.Print(Stream);
718 }
719 Stream << "--\n";
720 }
721
722 private:
723 // The set of abbreviations and corresponding number instances.
724 AbbrevCountMap AbbrevsMap;
725
726 // The map of (global) abbreviations already associated with each block.
727 BlockAbbrevsMapType &BlockAbbrevsMap;
728 };
729
730 bool CandidateAbbrevs::Add(unsigned BlockID,
731 UnrolledAbbreviation &UnrolledAbbrev,
732 unsigned NumInstances) {
733 // Drop if it corresponds to an existing global abbreviation.
734 NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.Restore();
735 if (BlockAbbrevs* Abbrevs = BlockAbbrevsMap[BlockID]) {
736 if (Abbrevs->FindAbbreviation(Abbrev) !=
737 BlockAbbrevs::NO_SUCH_ABBREVIATION) {
738 Abbrev->dropRef();
739 return false;
740 }
741 }
742
743 CandBlockAbbrev CandAbbrev(BlockID, Abbrev);
744 AbbrevCountMap::iterator Pos = AbbrevsMap.find(CandAbbrev);
745 if (Pos == AbbrevsMap.end()) {
746 AbbrevsMap[CandAbbrev] = NumInstances;
747 } else {
748 Pos->second += NumInstances;
749 }
750 return true;
751 }
752
753 // Look for new abbreviations in block BlockID, considering it was
754 // read with the given abbreviation Abbrev, and considering changing
755 // the abbreviation opererator for value Index. ValueDist is how
756 // values at Index are distributed. Any found abbreviations are added
757 // to the candidate abbreviations CandAbbrevs. Returns true only if we
758 // have added new candidate abbreviations to CandAbbrevs.
759 static bool AddNewAbbreviations(unsigned BlockID,
760 const UnrolledAbbreviation &Abbrev,
761 unsigned Index,
762 NaClBitcodeValueDist &ValueDist,
763 CandidateAbbrevs &CandAbbrevs) {
764 // TODO(kschimpf): Add code to try and find a better encoding for
765 // the values, based on the distribution.
766
767 // If this index is already a literal abbreviation, no improvements can
768 // be made.
769 const NaClBitCodeAbbrevOp Op = Abbrev.AbbrevOps[Index];
770 if (Op.isLiteral()) return false;
771
772 // Search based on sorted distribution, which sorts by number of
773 // instances. Start by trying to find possible constants to use.
774 const NaClBitcodeDist::Distribution *
775 Distribution = ValueDist.GetDistribution();
776 for (NaClBitcodeDist::Distribution::const_iterator
777 Iter = Distribution->begin(), IterEnd = Distribution->end();
778 Iter != IterEnd; ++Iter) {
779 NaClValueRangeType Range = GetNaClValueRange(Iter->second);
780 if (Range.first != Range.second) continue; // not a constant.
781
782 // Defines a constant. Try as new candidate range. In addition,
783 // don't try any more constant values, since this is the one with
784 // the greatest number of instances.
785 NaClBitcodeDistElement *Elmt = ValueDist.at(Range.first);
786 UnrolledAbbreviation CandAbbrev(Abbrev);
787 CandAbbrev.AbbrevOps[Index] = NaClBitCodeAbbrevOp(Range.first);
788 return CandAbbrevs.Add(BlockID, CandAbbrev, Elmt->GetNumInstances());
789 }
790 return false;
791 }
792
793 // Look for new abbreviations in block BlockID, considering it was
794 // read with the given abbreviation Abbrev. IndexDist is the
795 // corresponding distribution of value indices associated with the
796 // abbreviation. Any found abbreviations are added to the candidate
797 // abbreviations CandAbbrevs.
798 static void AddNewAbbreviations(unsigned BlockID,
799 const UnrolledAbbreviation &Abbrev,
800 NaClBitcodeDist &IndexDist,
801 CandidateAbbrevs &CandAbbrevs) {
802 // Search based on sorted distribution, which sorts based on heuristic
803 // of best index to fix first.
804 const NaClBitcodeDist::Distribution *
805 IndexDistribution = IndexDist.GetDistribution();
806 for (NaClBitcodeDist::Distribution::const_iterator
807 IndexIter = IndexDistribution->begin(),
808 IndexIterEnd = IndexDistribution->end();
809 IndexIter != IndexIterEnd; ++IndexIter) {
810 unsigned Index = static_cast<unsigned>(IndexIter->second);
811 if (AddNewAbbreviations(
812 BlockID, Abbrev, Index,
813 cast<NaClBitcodeValueIndexDistElement>(IndexDist.at(Index))
814 ->GetValueDist(),
815 CandAbbrevs)) {
816 return;
817 }
818 }
819 }
820
821 // Look for new abbreviations in block BlockID, considering it was
822 // read with the given abbreviation Abbrev, and the given record Code.
823 // SizeDist is the corresponding distribution of sizes associated with
824 // the abbreviation. Any found abbreviations are added to the
825 // candidate abbreviations CandAbbrevs.
826 static void AddNewAbbreviations(unsigned BlockID,
827 NaClBitCodeAbbrev *Abbrev,
828 unsigned Code,
829 NaClBitcodeDist &SizeDist,
830 CandidateAbbrevs &CandAbbrevs) {
831 const NaClBitcodeDist::Distribution *
832 SizeDistribution = SizeDist.GetDistribution();
833 for (NaClBitcodeDist::Distribution::const_iterator
834 SizeIter = SizeDistribution->begin(),
835 SizeIterEnd = SizeDistribution->end();
836 SizeIter != SizeIterEnd; ++SizeIter) {
837 unsigned Size = static_cast<unsigned>(SizeIter->second);
838 UnrolledAbbreviation UnrolledAbbrev(Abbrev, Size+1 /* Add code! */,
839 Size >= NaClValueIndexCutoff);
840 if (!UnrolledAbbrev.CodeOp.isLiteral()) {
841 // Try making the code a literal.
842 UnrolledAbbreviation CandAbbrev(UnrolledAbbrev);
843 CandAbbrev.CodeOp = NaClBitCodeAbbrevOp(Code);
844 CandAbbrevs.Add(BlockID, CandAbbrev,
845 SizeDist.at(Size)->GetNumInstances());
846 }
847 // Now process value indices to find candidate abbreviations.
848 AddNewAbbreviations(
849 BlockID, UnrolledAbbrev,
850 cast<NaClBitcodeSizeDistElement>(SizeDist.at(Size))
851 ->GetValueIndexDist(),
852 CandAbbrevs);
853 }
854 }
855
856 // Look for new abbreviations in block BlockID. Abbrevs is the map of
857 // read (globally defined) abbreviations associated with the
858 // BlockID. AbbrevDist is the distribution map of abbreviations
859 // associated with BlockID. Any found abbreviations are added to the
860 // candidate abbreviations CandAbbrevs.
861 static void AddNewAbbreviations(unsigned BlockID,
862 BlockAbbrevs *Abbrevs,
863 NaClBitcodeDist &AbbrevDist,
864 CandidateAbbrevs &CandAbbrevs) {
865 const NaClBitcodeDist::Distribution *
866 AbbrevDistribution = AbbrevDist.GetDistribution();
867 for (NaClBitcodeDist::Distribution::const_iterator
868 AbbrevIter = AbbrevDistribution->begin(),
869 AbbrevIterEnd = AbbrevDistribution->end();
870 AbbrevIter != AbbrevIterEnd; ++AbbrevIter) {
871 NaClBitcodeDistValue AbbrevIndex = AbbrevIter->second;
872 NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(AbbrevIndex);
873 NaClBitcodeAbbrevDistElement *AbbrevElmt =
874 cast<NaClBitcodeAbbrevDistElement>(AbbrevDist.at(AbbrevIndex));
875 NaClBitcodeDist &CodeDist = AbbrevElmt->GetCodeDist();
876
877 const NaClBitcodeDist::Distribution *
878 CodeDistribution = CodeDist.GetDistribution();
879 for (NaClBitcodeDist::Distribution::const_iterator
880 CodeIter = CodeDistribution->begin(),
881 CodeIterEnd = CodeDistribution->end();
882 CodeIter != CodeIterEnd; ++CodeIter) {
883 unsigned Code = static_cast<unsigned>(CodeIter->second);
884 AddNewAbbreviations(
885 BlockID,
886 Abbrev,
887 Code,
888 cast<NaClCompressCodeDistElement>(CodeDist.at(CodeIter->second))
889 ->GetSizeDist(),
890 CandAbbrevs);
891 }
892 }
893 }
894
895 typedef std::pair<unsigned, CandBlockAbbrev> CountedAbbrevType;
896
897 // Look for new abbreviations in the given block distribution map
898 // BlockDist. BlockAbbrevsMap contains the set of read global
899 // abbreviations. Adds found candidate abbreviations to the set of
900 // known global abbreviations.
901 static void AddNewAbbreviations(NaClBitcodeBlockDist &BlockDist,
902 BlockAbbrevsMapType &BlockAbbrevsMap) {
903 CandidateAbbrevs CandAbbrevs(BlockAbbrevsMap);
904 // Start by collecting candidate abbreviations.
905 const NaClBitcodeDist::Distribution *
906 Distribution = BlockDist.GetDistribution();
907 for (NaClBitcodeDist::Distribution::const_iterator
908 Iter = Distribution->begin(), IterEnd = Distribution->end();
909 Iter != IterEnd; ++Iter) {
910 unsigned BlockID = static_cast<unsigned>(Iter->second);
911 AddNewAbbreviations(
912 BlockID,
913 BlockAbbrevsMap[BlockID],
914 cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))
915 ->GetAbbrevDist(),
916 CandAbbrevs);
917 }
918 // Install candidate abbreviations.
919 //
920 // Sort the candidate abbreviations by number of instances, so that
921 // if multiple abbreviations apply, the one with the largest number
922 // of instances will be chosen when compressing a file.
923 //
924 // For example, we may have just generated two abbreviations. The
925 // first has replaced the 3rd element with the constant 4 while the
926 // second replaced the 4th element with the constant 5. The first
927 // abbreviation can apply to 300 records while the second can apply
928 // to 1000 records. Assuming that both abbreviations shrink the
929 // record by the same number of bits, we clearly want the tool to
930 // choose the second abbreviation when selecting the abbreviation
931 // index to choose (via method GetRecordAbbrevIndex).
932 //
933 // Selecting the second is important in that abbreviation are
934 // refined by successive calls to this tool. We do not want to
935 // restrict downstream refinements prematurely. Hence, we want the
936 // tool to choose the abbreviation with the most candidates first.
937 //
938 // Since method GetRecordAbbrevIndex chooses the first abbreviation
939 // that generates the least number of bits, we clearly want to make
940 // sure that the second abbreviation occurs before the first.
941 std::vector<CountedAbbrevType> Candidates;
942 for (CandidateAbbrevs::const_iterator
943 Iter = CandAbbrevs.GetAbbrevsMap().begin(),
944 IterEnd = CandAbbrevs.GetAbbrevsMap().end();
945 Iter != IterEnd; ++Iter) {
946 Candidates.push_back(CountedAbbrevType(Iter->second,Iter->first));
947 }
948 std::sort(Candidates.begin(), Candidates.end());
949 std::vector<CountedAbbrevType>::const_reverse_iterator
950 Iter = Candidates.rbegin(), IterEnd = Candidates.rend();
951 if (Iter == IterEnd) return;
952
953 if (TraceGeneratedAbbreviations) {
954 errs() << "-- New abbrevations:\n";
955 }
956 unsigned Min = (Iter->first >> 2);
957 for (; Iter != IterEnd; ++Iter) {
958 if (Iter->first < Min) break;
959 unsigned BlockID = Iter->second.GetBlockID();
960 BlockAbbrevs *Abbrevs = BlockAbbrevsMap[BlockID];
961 NaClBitCodeAbbrev *Abbrev = Iter->second.GetAbbrev()->Copy();
962 if (TraceGeneratedAbbreviations) {
963 errs() <<format("%12u", Iter->first) << ": ";
964 PrintAbbrev(errs(), BlockID, Abbrev);
965 }
966 Abbrevs->AddAbbreviation(Abbrev);
967 }
968 if (TraceGeneratedAbbreviations) {
969 errs() << "--\n";
970 }
971 }
972
511 // Read in bitcode, analyze data, and figure out set of abbreviations 973 // Read in bitcode, analyze data, and figure out set of abbreviations
512 // to use, from memory buffer MemBuf containing the input bitcode file. 974 // to use, from memory buffer MemBuf containing the input bitcode file.
513 static bool AnalyzeBitcode(OwningPtr<MemoryBuffer> &MemBuf, 975 static bool AnalyzeBitcode(OwningPtr<MemoryBuffer> &MemBuf,
514 BlockAbbrevsMapType &BlockAbbrevsMap) { 976 BlockAbbrevsMapType &BlockAbbrevsMap) {
515 // TODO(kschimpf): The current code only extracts abbreviations 977 // TODO(kschimpf): The current code only extracts abbreviations
516 // defined in the bitcode file. This code needs to be updated to 978 // defined in the bitcode file. This code needs to be updated to
517 // collect data distributions and figure out better (global) 979 // collect data distributions and figure out better (global)
518 // abbreviations to use. 980 // abbreviations to use.
519 981
520 const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart(); 982 const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
(...skipping 14 matching lines...) Expand all
535 if (Parser.Parse()) return true; 997 if (Parser.Parse()) return true;
536 } 998 }
537 999
538 if (ShowValueDistributions) { 1000 if (ShowValueDistributions) {
539 // To make shell redirection of this trace easier, print it to 1001 // To make shell redirection of this trace easier, print it to
540 // stdout unless stdout is being used to contain the compressed 1002 // stdout unless stdout is being used to contain the compressed
541 // bitcode file. In the latter case, use stderr. 1003 // bitcode file. In the latter case, use stderr.
542 Parser.BlockDist.Print(OutputFilename == "-" ? errs() : outs()); 1004 Parser.BlockDist.Print(OutputFilename == "-" ? errs() : outs());
543 } 1005 }
544 1006
1007 AddNewAbbreviations(Parser.BlockDist, BlockAbbrevsMap);
545 return false; 1008 return false;
546 } 1009 }
547 1010
548 /// Parses the input bitcode file and generates the corresponding 1011 /// Parses the input bitcode file and generates the corresponding
549 /// compressed bitcode file, by replacing abbreviations in the input 1012 /// compressed bitcode file, by replacing abbreviations in the input
550 /// file with the corresponding abbreviations defined in 1013 /// file with the corresponding abbreviations defined in
551 /// BlockAbbrevsMap. 1014 /// BlockAbbrevsMap.
552 class NaClBitcodeCopyParser : public NaClBitcodeParser { 1015 class NaClBitcodeCopyParser : public NaClBitcodeParser {
553 public: 1016 public:
554 // Top-level constructor to build the appropriate block parser 1017 // Top-level constructor to build the appropriate block parser
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
616 NaClBlockCopyParser *EnclosingParser) 1079 NaClBlockCopyParser *EnclosingParser)
617 : NaClBitcodeParser(BlockID, EnclosingParser), 1080 : NaClBitcodeParser(BlockID, EnclosingParser),
618 Context(EnclosingParser->Context) 1081 Context(EnclosingParser->Context)
619 {} 1082 {}
620 1083
621 virtual bool Error(const std::string &Message) { 1084 virtual bool Error(const std::string &Message) {
622 // Use local error routine so that all errors are treated uniformly. 1085 // Use local error routine so that all errors are treated uniformly.
623 return ::Error(Message); 1086 return ::Error(Message);
624 } 1087 }
625 1088
1089 /// Returns the set of (global) block abbreviations defined for the
1090 /// given block ID.
1091 BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) {
1092 BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID];
1093 if (Abbrevs == 0) {
1094 Abbrevs = new BlockAbbrevs(BlockID);
1095 Context->BlockAbbrevsMap[BlockID] = Abbrevs;
1096 }
1097 return Abbrevs;
1098 }
1099
626 virtual bool ParseBlock(unsigned BlockID) { 1100 virtual bool ParseBlock(unsigned BlockID) {
627 NaClBlockCopyParser Parser(BlockID, this); 1101 NaClBlockCopyParser Parser(BlockID, this);
628 return Parser.ParseThisBlock(); 1102 return Parser.ParseThisBlock();
629 } 1103 }
630 1104
631 virtual void EnterBlock(unsigned NumWords) { 1105 virtual void EnterBlock(unsigned NumWords) {
632 unsigned BlockID = GetBlockID(); 1106 unsigned BlockID = GetBlockID();
633 BlockAbbreviations = Context->BlockAbbrevsMap[BlockID]; 1107 BlockAbbreviations = GetGlobalAbbrevs(BlockID);
634 1108
635 // Enter the subblock. 1109 // Enter the subblock.
636 NaClBitcodeSelectorAbbrev 1110 NaClBitcodeSelectorAbbrev
637 Selector(BlockAbbreviations->GetNumberAbbreviations()-1); 1111 Selector(BlockAbbreviations->GetNumberAbbreviations()-1);
638 Context->Writer.EnterSubblock(BlockID, Selector); 1112 Context->Writer.EnterSubblock(BlockID, Selector);
1113
1114 // Note: We must dump module abbreviations as local
1115 // abbreviations, because they are in a yet to be
1116 // dumped BlockInfoBlock.
1117 if (BlockID == naclbitc::MODULE_BLOCK_ID) {
1118 BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(naclbitc::MODULE_BLOCK_ID);
1119 for (unsigned i = 0; i < Abbrevs->GetNumberAbbreviations(); ++i) {
1120 Context->Writer.EmitAbbrev(Abbrevs->GetIndexedAbbrev(i)->Copy());
1121 }
1122 }
639 } 1123 }
640 1124
641 virtual void ExitBlock() { 1125 virtual void ExitBlock() {
642 Context->Writer.ExitBlock(); 1126 Context->Writer.ExitBlock();
643 } 1127 }
644 1128
645 virtual void ExitBlockInfo() { 1129 virtual void ExitBlockInfo() {
646 assert(!Context->FoundFirstBlockInfo && 1130 assert(!Context->FoundFirstBlockInfo &&
647 "Input bitcode has more that one BlockInfoBlock"); 1131 "Input bitcode has more that one BlockInfoBlock");
648 Context->FoundFirstBlockInfo = true; 1132 Context->FoundFirstBlockInfo = true;
649 1133
650 // Generate global abbreviations within a blockinfo block. 1134 // Generate global abbreviations within a blockinfo block.
651 Context->Writer.EnterBlockInfoBlock(); 1135 Context->Writer.EnterBlockInfoBlock();
652 for (BlockAbbrevsMapType::const_iterator 1136 for (BlockAbbrevsMapType::const_iterator
653 Iter = Context->BlockAbbrevsMap.begin(), 1137 Iter = Context->BlockAbbrevsMap.begin(),
654 IterEnd = Context->BlockAbbrevsMap.end(); 1138 IterEnd = Context->BlockAbbrevsMap.end();
655 Iter != IterEnd; ++Iter) { 1139 Iter != IterEnd; ++Iter) {
656 unsigned BlockID = Iter->first; 1140 unsigned BlockID = Iter->first;
1141 // Don't emit module abbreviations, since they have been
1142 // emitted as local abbreviations.
1143 if (BlockID == naclbitc::MODULE_BLOCK_ID) continue;
1144
657 BlockAbbrevs *Abbrevs = Iter->second; 1145 BlockAbbrevs *Abbrevs = Iter->second;
658 if (Abbrevs == 0) continue; 1146 if (Abbrevs == 0) continue;
659 for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation(); 1147 for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation();
660 i < Abbrevs->GetNumberAbbreviations(); ++i) { 1148 i < Abbrevs->GetNumberAbbreviations(); ++i) {
661 NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(i); 1149 NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(i);
662 Context->Writer.EmitBlockInfoAbbrev(BlockID, Abbrev); 1150 Context->Writer.EmitBlockInfoAbbrev(BlockID, Abbrev);
663 } 1151 }
664 } 1152 }
665 Context->Writer.ExitBlock(); 1153 Context->Writer.ExitBlock();
666 } 1154 }
(...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after
948 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. 1436 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
949 cl::ParseCommandLineOptions(argc, argv, "pnacl-bccompress file analyzer\n"); 1437 cl::ParseCommandLineOptions(argc, argv, "pnacl-bccompress file analyzer\n");
950 1438
951 OwningPtr<MemoryBuffer> MemBuf; 1439 OwningPtr<MemoryBuffer> MemBuf;
952 if (ReadAndBuffer(MemBuf)) return 1; 1440 if (ReadAndBuffer(MemBuf)) return 1;
953 BlockAbbrevsMapType BlockAbbrevsMap; 1441 BlockAbbrevsMapType BlockAbbrevsMap;
954 if (AnalyzeBitcode(MemBuf, BlockAbbrevsMap)) return 1; 1442 if (AnalyzeBitcode(MemBuf, BlockAbbrevsMap)) return 1;
955 if (CopyBitcode(MemBuf, BlockAbbrevsMap)) return 1; 1443 if (CopyBitcode(MemBuf, BlockAbbrevsMap)) return 1;
956 return 0; 1444 return 0;
957 } 1445 }
OLDNEW
« lib/Bitcode/NaCl/Reader/NaClBitCodes.cpp ('K') | « lib/Bitcode/NaCl/Reader/NaClBitcodeDist.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698