 Chromium Code Reviews
 Chromium Code Reviews Issue 154603002:
  Make pnacl-bccompress add abbreviations for obvious constants.  (Closed) 
  Base URL: http://git.chromium.org/native_client/pnacl-llvm.git@master
    
  
    Issue 154603002:
  Make pnacl-bccompress add abbreviations for obvious constants.  (Closed) 
  Base URL: http://git.chromium.org/native_client/pnacl-llvm.git@master| Index: tools/pnacl-bccompress/pnacl-bccompress.cpp | 
| diff --git a/tools/pnacl-bccompress/pnacl-bccompress.cpp b/tools/pnacl-bccompress/pnacl-bccompress.cpp | 
| index 996d079f725e25101cf29650c21468f7f9918077..d476c79d85e99ca096903b24ce9de3ebd95c5213 100644 | 
| --- a/tools/pnacl-bccompress/pnacl-bccompress.cpp | 
| +++ b/tools/pnacl-bccompress/pnacl-bccompress.cpp | 
| @@ -100,45 +100,61 @@ static bool Error(const std::string &Err) { | 
| return true; | 
| } | 
| -// For debugging. Prints out the abbreviation in readable form to errs(). | 
| -static void PrintAbbrev(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { | 
| - errs() << "Abbrev(block " << BlockID << "): ["; | 
| - // ContinuationCount>0 implies that the current operand is a | 
| +// Prints out the abbreviation in a readable form to the given stream. | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
document return bool value?
 
Karl
2014/02/06 19:46:28
Removed the need for this by adding method NumArgu
 | 
| +static bool PrintAbbrevOp(raw_ostream& Stream, | 
| + const NaClBitCodeAbbrevOp &Op) { | 
| + if (Op.isLiteral()) { | 
| + Stream << Op.getLiteralValue(); | 
| + } else if (Op.isEncoding()) { | 
| + switch (Op.getEncoding()) { | 
| + case NaClBitCodeAbbrevOp::Fixed: | 
| + Stream << "Fixed(" << Op.getEncodingData() << ")"; | 
| + break; | 
| + case NaClBitCodeAbbrevOp::VBR: | 
| + Stream << "VBR(" << Op.getEncodingData() << ")"; | 
| + break; | 
| + case NaClBitCodeAbbrevOp::Array: | 
| + Stream << "Array:"; | 
| + return true; | 
| + case NaClBitCodeAbbrevOp::Char6: | 
| + Stream << "Char6"; | 
| + break; | 
| + case NaClBitCodeAbbrevOp::Blob: | 
| + Stream << "Blob"; | 
| + break; | 
| + default: | 
| + Stream << "??"; | 
| + break; | 
| + } | 
| + } else { | 
| + Stream << "??"; | 
| + } | 
| + return false; | 
| +} | 
| + | 
| +// Prints out the abbreviation in readable form to the given stream. | 
| +static void PrintAbbrev(raw_ostream &Stream, const NaClBitCodeAbbrev *Abbrev) { | 
| + Stream << "["; | 
| + // ContinuationCount > 0 implies that the current operand is a | 
| // continuation of previous operand(s). | 
| unsigned ContinuationCount = 0; | 
| for (unsigned i = 0; i < Abbrev->getNumOperandInfos(); ++i) { | 
| - if (i > 0 && ContinuationCount == 0) errs() << ", "; | 
| + if (i > 0 && ContinuationCount == 0) Stream << ", "; | 
| const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(i); | 
| - if (Op.isLiteral()) { | 
| - errs() << Op.getLiteralValue(); | 
| - } else if (Op.isEncoding()) { | 
| - switch (Op.getEncoding()) { | 
| - case NaClBitCodeAbbrevOp::Fixed: | 
| - errs() << "Fixed(" << Op.getEncodingData() << ")"; | 
| - break; | 
| - case NaClBitCodeAbbrevOp::VBR: | 
| - errs() << "VBR(" << Op.getEncodingData() << ")"; | 
| - break; | 
| - case NaClBitCodeAbbrevOp::Array: | 
| - errs() << "Array:"; | 
| - ++ContinuationCount; | 
| - continue; | 
| - case NaClBitCodeAbbrevOp::Char6: | 
| - errs() << "Char6"; | 
| - break; | 
| - case NaClBitCodeAbbrevOp::Blob: | 
| - errs() << "Blob"; | 
| - break; | 
| - default: | 
| - errs() << "??"; | 
| - break; | 
| - } | 
| - } else { | 
| - errs() << "??"; | 
| + if (PrintAbbrevOp(Stream, Op)) { | 
| + ++ContinuationCount; | 
| + continue; | 
| } | 
| if (ContinuationCount) --ContinuationCount; | 
| } | 
| - errs() << "]\n"; | 
| + Stream << "]\n"; | 
| +} | 
| + | 
| +// Prints out the abbreviation in readable form to the given Stream. | 
| +static void PrintAbbrev(raw_ostream &Stream, | 
| + unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { | 
| + Stream << "Abbrev(block " << BlockID << "): "; | 
| + PrintAbbrev(Stream, Abbrev); | 
| } | 
| // Reads the input file into the given buffer. | 
| @@ -215,7 +231,12 @@ public: | 
| // application abbreviations, so that added abbreviations have | 
| // valid abbreviation indices. | 
| for (unsigned i = 0; i < naclbitc::FIRST_APPLICATION_ABBREV; ++i) { | 
| - Abbrevs.push_back(new NaClBitCodeAbbrev()); | 
| + // Make all non-application abbreviations look like the default | 
| + // abbreviation. | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
what goes wrong if the abbrev isn't like the defau
 
Karl
2014/02/06 19:46:28
The main reason for doing this is so that we don't
 | 
| + NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev(); | 
| + Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array)); | 
| + Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); | 
| + Abbrevs.push_back(Abbrev); | 
| } | 
| GlobalAbbrevBitstreamToInternalMap. | 
| SetNextBitstreamAbbrevIndex(Abbrevs.size()); | 
| @@ -263,6 +284,17 @@ public: | 
| return true; | 
| } | 
| + /// Adds the given abbreviation to the set of global abbreviations | 
| + /// defined for the block. Guarantees that duplicate abbreviations | 
| + /// are not added to the block. Note: Code takes ownership of | 
| + /// the given abbreviation. Returns true if new abbreviation. | 
| + /// Updates Index to the index where the abbreviation is located | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
The documentation for "Updates Index" doesn't seem
 
Karl
2014/02/06 19:46:28
Sorry, I really should be more careful when cuttin
 | 
| + /// in the set of abbreviations. | 
| + bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev) { | 
| + int Index; | 
| + return AddAbbreviation(Abbrev, Index); | 
| + } | 
| + | 
| /// The block ID associated with the block. | 
| unsigned GetBlockID() const { | 
| return BlockID; | 
| @@ -485,7 +517,7 @@ protected: | 
| // Read abbreviation and add as a global abbreviation. | 
| if (Abbrevs->AddAbbreviation(Abbrev, Index) | 
| && TraceGeneratedAbbreviations) { | 
| - PrintAbbrev(BlockID, Abbrev); | 
| + PrintAbbrev(errs(), BlockID, Abbrev); | 
| } | 
| } | 
| @@ -508,6 +540,490 @@ bool NaClAnalyzeParser::ParseBlock(unsigned BlockID) { | 
| return Parser.ParseThisBlock(); | 
| } | 
| +// Models the unrolling of an abbreviation into its sequence of | 
| +// individual operators. That is, unrolling arrays to match the width | 
| +// of the abbreviation. Note: We unroll in the form that best matches | 
| +// the distribution map. Hence, the code is stored as a separate | 
| +// operator. We also keep the array abbreviation op, for untracked | 
| +// elements within the distribution maps. | 
| +class UnrolledAbbreviation { | 
| +public: | 
| + /// Unroll the given abbreviation, assuming it has the given size | 
| + /// (as specified in the distribution maps). | 
| + UnrolledAbbreviation(NaClBitCodeAbbrev *Abbrev, unsigned Size) | 
| + : CodeOp(0), | 
| + HasMore(Size >= NaClValueIndexCutoff) { | 
| + unsigned NextOp = 0; | 
| + UnrollAbbrevOp(CodeOp, Abbrev, NextOp); | 
| + --Size; | 
| + for (unsigned i = 0; i < Size; ++i) { | 
| + // Create slot and then fill with appropriate operator. | 
| + AbbrevOps.push_back(CodeOp); | 
| + UnrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp); | 
| + } | 
| + } | 
| + | 
| + /// Prints out the abbreviation modeled by the unrolled | 
| + /// abbreviation. | 
| + void Print(raw_ostream &Stream) const { | 
| + Stream << "["; | 
| + PrintAbbrevOp(Stream, CodeOp); | 
| + for (std::vector<NaClBitCodeAbbrevOp>::const_iterator | 
| + Iter = AbbrevOps.begin(), IterEnd = AbbrevOps.end(); | 
| + Iter != IterEnd; ++Iter) { | 
| + Stream << ", "; | 
| + PrintAbbrevOp(Stream, *Iter); | 
| + } | 
| + if (HasMore) { | 
| + Stream << ", "; | 
| + PrintAbbrevOp(Stream, ArrayOp); | 
| + Stream << ":"; | 
| + PrintAbbrevOp(Stream, VbrOp); | 
| + } | 
| + Stream << "]\n"; | 
| + } | 
| + | 
| + /// Converts the unrolled abbreviation back into a regular | 
| + /// abbreviation. | 
| + NaClBitCodeAbbrev *roll() const { | 
| + NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev(); | 
| + std::vector<NaClBitCodeAbbrevOp> Ops(AbbrevOps); | 
| + | 
| + // If it ends with an array, move back over elements that | 
| + // don't make a difference. | 
| + if (HasMore) { | 
| + while (!Ops.empty() && Ops.back() == VbrOp) { | 
| + Ops.pop_back(); | 
| + } | 
| + if (Ops.empty() && CodeOp == VbrOp) { | 
| + // Just use array. | 
| + Abbrev->Add(ArrayOp); | 
| + Abbrev->Add(VbrOp); | 
| + return Abbrev; | 
| + } | 
| + } | 
| + | 
| + // Now add remaining abbreviation operators. | 
| + Abbrev->Add(CodeOp); | 
| + for (std::vector<NaClBitCodeAbbrevOp>::const_iterator | 
| + Iter = Ops.begin(), IterEnd = Ops.end(); | 
| + Iter != IterEnd; ++Iter) { | 
| + Abbrev->Add(*Iter); | 
| + } | 
| + if (HasMore) { | 
| + Abbrev->Add(ArrayOp); | 
| + Abbrev->Add(VbrOp); | 
| + } | 
| + return Abbrev; | 
| + } | 
| + | 
| + // The abbreviation used for the record code. | 
| + NaClBitCodeAbbrevOp CodeOp; | 
| + | 
| + // The abbreviations used for each tracked value index. | 
| + std::vector<NaClBitCodeAbbrevOp> AbbrevOps; | 
| + | 
| +private: | 
| + | 
| + // True if the distribution map has untracked values in | 
| + // the distribution map. | 
| + bool HasMore; | 
| + | 
| + // The symbol abbreviation operators to add to capture untracked | 
| + // values in the distribution map. | 
| + static NaClBitCodeAbbrevOp ArrayOp; | 
| + static NaClBitCodeAbbrevOp VbrOp; | 
| + | 
| + // Extracts out the next abbreviation operator from the abbreviation | 
| + // Abbrev, given the index NextOp, and assigns it to AbbrevOp | 
| + void UnrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp, | 
| + NaClBitCodeAbbrev *Abbrev, | 
| + unsigned &NextOp) { | 
| + assert(NextOp < Abbrev->getNumOperandInfos()); | 
| + const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(NextOp); | 
| + if (!Op.isLiteral() && Op.getEncoding() == NaClBitCodeAbbrevOp::Array) { | 
| + AbbrevOp = Abbrev->getOperandInfo(NextOp+1); | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
How come NextOp isn't bumped for Array?
 
Karl
2014/02/06 19:46:28
When array appears in an abbreviation expression,
 
jvoung (off chromium)
2014/02/06 23:19:36
Okay, so this is to peel out of the array during u
 
Karl
2014/02/07 19:37:29
Ok. I've updated the comment with an example that
 | 
| + } else { | 
| + AbbrevOp = Op; | 
| + NextOp++; | 
| + } | 
| + } | 
| +}; | 
| + | 
| +NaClBitCodeAbbrevOp UnrolledAbbreviation::ArrayOp(NaClBitCodeAbbrevOp::Array); | 
| +NaClBitCodeAbbrevOp UnrolledAbbreviation::VbrOp(NaClBitCodeAbbrevOp::VBR, 6); | 
| + | 
| +/// Models a candidate block abbreviation, which is a blockID, the | 
| +/// corresponding abbreviation to be considered for addition, and the | 
| +/// number of records the abbreviation applies to. Note: Constructors | 
| +/// and assignment take ownership of the abbreviation. | 
| +class CandBlockAbbrev { | 
| 
Karl
2014/02/06 19:46:28
Realized that putting NumInstances in this class w
 | 
| +public: | 
| + CandBlockAbbrev(unsigned NumInstances, unsigned BlockID, | 
| + NaClBitCodeAbbrev *Abbrev) | 
| + : NumInstances(NumInstances), BlockID(BlockID), Abbrev(Abbrev) { | 
| + } | 
| + | 
| + explicit CandBlockAbbrev(const CandBlockAbbrev &BlockAbbrev) | 
| + : NumInstances(BlockAbbrev.NumInstances), | 
| + BlockID(BlockAbbrev.BlockID), | 
| + Abbrev(BlockAbbrev.Abbrev) { | 
| + Abbrev->addRef(); | 
| + } | 
| + | 
| + void operator=(const CandBlockAbbrev &BlockAbbrev) { | 
| + Abbrev->dropRef(); | 
| + NumInstances = BlockAbbrev.NumInstances; | 
| + BlockID = BlockAbbrev.BlockID; | 
| + Abbrev = BlockAbbrev.Abbrev; | 
| + Abbrev->addRef(); | 
| + } | 
| + | 
| + ~CandBlockAbbrev() { | 
| + Abbrev->dropRef(); | 
| + } | 
| + | 
| + /// Returns the number of instances associated with the candidate | 
| + /// abbreviation. | 
| + unsigned GetNumInstances() const { | 
| + return NumInstances; | 
| + } | 
| + | 
| + /// The block ID of the candidate abbreviation. | 
| + unsigned GetBlockID() const { | 
| + return BlockID; | 
| + } | 
| + | 
| + /// The abbreviation of the candidate abbreviation. | 
| + const NaClBitCodeAbbrev *GetAbbrev() const { | 
| + return Abbrev; | 
| + } | 
| + | 
| + /// orders this against the candidate abbreviation. | 
| + int Compare(const CandBlockAbbrev &CandAbbrev) const { | 
| + if (NumInstances < CandAbbrev.NumInstances) return -1; | 
| + if (NumInstances > CandAbbrev.NumInstances) return 1; | 
| + unsigned diff = BlockID - CandAbbrev.BlockID; | 
| + if (diff) return diff; | 
| + return Abbrev->Compare(*CandAbbrev.Abbrev); | 
| + } | 
| + | 
| + /// Prints the candidate abbreviation to the given stream. | 
| + void Print(raw_ostream &Stream) const { | 
| + Stream << format("%12u", NumInstances) << ": "; | 
| + PrintAbbrev(Stream, BlockID, Abbrev); | 
| + } | 
| + | 
| +private: | 
| + // The number of records that use this abbreviation. | 
| + unsigned NumInstances; | 
| + // The block the abbreviation applies to. | 
| + unsigned BlockID; | 
| + // The candidate abbreviation. | 
| + NaClBitCodeAbbrev *Abbrev; | 
| +}; | 
| + | 
| +static inline bool operator<(const CandBlockAbbrev &A1, | 
| + const CandBlockAbbrev &A2) { | 
| + return A1.Compare(A2); | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
shouldn't this check if Compare returns -1 or some
 
Karl
2014/02/06 19:46:28
My bad. Fixing.
 | 
| +} | 
| + | 
| +/// Models the minimum number of instances for a candidate abbreviation | 
| +/// before we will even consider it a potential candidate abbrevation. | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
abb
 
Karl
2014/02/06 19:46:28
Done.
 | 
| +static unsigned MinNumInstancesForNewAbbrevs = 100; | 
| + | 
| +/// Models the set of candidate abbreviations being considered. | 
| +/// Filters based on the number of instances of each candidate | 
| +/// abbrevaition. This filter is done to only keep candidates that | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
abbreviation
 
Karl
2014/02/06 19:46:28
Done.
 | 
| +/// most likely will reduce bitcode size. | 
| +class CandidateAbbrevs { | 
| 
Karl
2014/02/06 19:46:28
Modified this class. In particular, modified that
 | 
| +public: | 
| + typedef std::set<CandBlockAbbrev> Set; | 
| + typedef Set::const_iterator const_iterator; | 
| + | 
| + /// Creates an empty set of candidate abbreviations, to be | 
| + /// (potentially) added to the given set of block abbreviations. | 
| + /// Argument is the (global) block abbreviations map, which is | 
| + /// used to determine if it already exists. | 
| + CandidateAbbrevs(BlockAbbrevsMapType &BlockAbbrevsMap) | 
| + : MaxInstances(0), BlockAbbrevsMap(BlockAbbrevsMap) | 
| + {} | 
| + | 
| + ~CandidateAbbrevs() { | 
| + Abbrevs.clear(); | 
| + } | 
| + | 
| + /// Adds the given (unrolled) abbreviation as a candidate | 
| + /// abbreviation to the given block. NumInstances is the number of | 
| + /// instances expected to use this candidate abbreviation. Returns | 
| + /// true if the corresponding candidate abbreviation is added to this | 
| + /// set of candidate abbreviations. Note: depending on the value | 
| + /// of NumInstances, methods GetMaxInstances and GetMinInstances | 
| + /// may change to include NumInstances, if the candidate abbreviation | 
| + /// is added. | 
| + bool Add(unsigned BlockID, | 
| + UnrolledAbbreviation &UnrolledAbbrev, | 
| + unsigned NumInstances); | 
| + | 
| + /// Returns the upper range (of number instances) that is currently | 
| + /// associated with this set of candidate abbreviations. | 
| + unsigned GetMaxInstances() const { | 
| + return MaxInstances; | 
| + } | 
| + | 
| + /// Returns the lower range (of number instances) that is currently | 
| + /// associated with this set of candidate abbreviations. | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
This to adjust the lower bound set by "MinNumInsta
 
Karl
2014/02/06 19:46:28
Yes. We set the maximum based on the number of bit
 | 
| + unsigned GetMinInstances() const { | 
| + if (MaxInstances <= 1024) { | 
| + return 0; | 
| + } else | 
| + return MaxInstances >> 4; | 
| + } | 
| + | 
| + /// Returns the list of candidate abbreviations in this set. | 
| + const Set &GetAbbrevs() const { | 
| + return Abbrevs; | 
| + } | 
| + | 
| + /// Prints out the current contents of this set. | 
| + void Print(raw_ostream &Stream, const char *Title = "Candidates") const { | 
| + Stream << "-- " << Title << ": \n"; | 
| + for (CandidateAbbrevs::const_iterator | 
| + Iter = Abbrevs.begin(), IterEnd = Abbrevs.end(); | 
| + Iter != IterEnd; ++Iter) { | 
| + Iter->Print(Stream); | 
| + } | 
| + Stream << "--\n"; | 
| + } | 
| + | 
| +private: | 
| + // The upper bound of instances in this set. | 
| + unsigned MaxInstances; | 
| + | 
| + // The set of abbreviations in this set. | 
| + Set Abbrevs; | 
| + | 
| + // The map of (global) abbreviations already associated with each block. | 
| + BlockAbbrevsMapType &BlockAbbrevsMap; | 
| +}; | 
| + | 
| +bool CandidateAbbrevs::Add(unsigned BlockID, | 
| + UnrolledAbbreviation &UnrolledAbbrev, | 
| + unsigned NumInstances) { | 
| + // First see if we shouldn't add. | 
| + if (NumInstances < MinNumInstancesForNewAbbrevs) return false; | 
| + if (NumInstances < GetMinInstances()) return false; | 
| + NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.roll(); | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
Is Unroll() and roll() and lossy because of the cu
 
Karl
2014/02/06 19:46:28
Fixing the code for class UnrolledAbbreviation so
 | 
| + if (BlockAbbrevs* Abbrevs = BlockAbbrevsMap[BlockID]) { | 
| + if (Abbrevs->FindAbbreviation(Abbrev) != | 
| + BlockAbbrevs::NO_SUCH_ABBREVIATION) { | 
| + Abbrev->dropRef(); | 
| + return false; | 
| + } | 
| + } | 
| + | 
| + // If reached, we should add the given abbrevation to the set of | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
abbreviation
 
Karl
2014/02/06 19:46:28
Done.
 | 
| + // candidate abbreviations. | 
| + if (NumInstances > GetMaxInstances()) { | 
| + // New range of candidates found. Throw out candidate | 
| + // abbreviations that no longer apply, due to the adjustment of | 
| + // the min/max ranges. | 
| + Set OldAbbrevs; | 
| + swap(Abbrevs, OldAbbrevs); | 
| + | 
| + unsigned Bits = NaClBitsNeededForValue(NumInstances); | 
| + if (Bits > 31) { | 
| + MaxInstances = ~0U; // i.e. max unsigned value. | 
| + } else { | 
| + MaxInstances = 1U << Bits; | 
| + } | 
| + | 
| + for (Set::const_iterator | 
| + Iter = OldAbbrevs.begin(), IterEnd = OldAbbrevs.end(); | 
| + Iter != IterEnd; ++Iter) { | 
| + if (Iter->GetNumInstances() >= GetMinInstances()) { | 
| + Abbrevs.insert(*Iter); | 
| + } | 
| + } | 
| + } | 
| + Abbrevs.insert(CandBlockAbbrev(NumInstances, BlockID, Abbrev)); | 
| + return true; | 
| +} | 
| + | 
| +// Look for new abbreviations in block BlockID, considering it was | 
| +// read with the given abbreviation Abbrev, and considering changing | 
| +// the abbreviation opererator for value Index. ValueDist is how | 
| +// values at Index are distributed. Any found abbreviations are added | 
| +// to the candidate abbreviations CandAbbrevs. Returns true only if we | 
| +// have added new candidate abbrevations to CandAbbrevs. | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
abbreviations
 
Karl
2014/02/06 19:46:28
Done.
 | 
| +static bool AddNewAbbreviations(unsigned BlockID, | 
| + UnrolledAbbreviation &Abbrev, | 
| + unsigned Index, | 
| + NaClBitcodeValueDist &ValueDist, | 
| + CandidateAbbrevs &CandAbbrevs) { | 
| + // TODO(kschimpf): Add code to try and find a better encoding for | 
| + // the values, based on the distribution. | 
| + | 
| + // If this index is already a literal abbreviation, no improvements can | 
| + // be made. | 
| + const NaClBitCodeAbbrevOp Op = Abbrev.AbbrevOps[Index]; | 
| + if (Op.isLiteral()) return false; | 
| + | 
| + // Search based on sorted distribution, which sorts by number of | 
| + // instances. Start by trying to find possible constants to use. | 
| + const NaClBitcodeDist::Distribution * | 
| + Distribution = ValueDist.GetDistribution(); | 
| + for (NaClBitcodeDist::Distribution::const_iterator | 
| + Iter = Distribution->begin(), IterEnd = Distribution->end(); | 
| + Iter != IterEnd; ++Iter) { | 
| + NaClValueRangeType Range = GetNaClValueRange(Iter->second); | 
| + if (Range.first != Range.second) continue; // not a constant. | 
| + | 
| + // Defines a constant. Try as new candidate range. In addition, | 
| + // don't try any more constant values, since this is the one with | 
| + // the greatest number of instances. | 
| + NaClBitcodeDistElement *Elmt = ValueDist.at(Range.first); | 
| + Abbrev.AbbrevOps[Index] = NaClBitCodeAbbrevOp(Range.first); | 
| + return CandAbbrevs.Add(BlockID, Abbrev, Elmt->GetNumInstances()); | 
| + } | 
| + return false; | 
| +} | 
| + | 
| +// Look for new abbreviations in block BlockID, considering it was | 
| +// read with the given abbreviation Abbrev. IndexDist is the | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
Any idea how IndexDist looks like?
I would think
 
Karl
2014/02/06 19:46:28
Not sure what is being asked here. First, the (bcc
 
jvoung (off chromium)
2014/02/06 23:19:36
Sorry, just lost a bit of how the whole thing was
 | 
| +// corresponding distribution of value indices associated with the | 
| +// abbreviation. Any found abbreviations are added to the candidate | 
| +// abbreviations CandAbbrevs. | 
| +static void AddNewAbbreviations(unsigned BlockID, | 
| + UnrolledAbbreviation &Abbrev, | 
| + NaClBitcodeDist &IndexDist, | 
| + CandidateAbbrevs &CandAbbrevs) { | 
| + // Search based on sorted distribution, which sorts based on heuristic | 
| + // of best index to fix first. | 
| + const NaClBitcodeDist::Distribution * | 
| + IndexDistribution = IndexDist.GetDistribution(); | 
| + for (NaClBitcodeDist::Distribution::const_iterator | 
| + IndexIter = IndexDistribution->begin(), | 
| + IndexIterEnd = IndexDistribution->end(); | 
| + IndexIter != IndexIterEnd; ++IndexIter) { | 
| + unsigned Index = static_cast<unsigned>(IndexIter->second); | 
| + if (AddNewAbbreviations( | 
| + BlockID, Abbrev, Index, | 
| + cast<NaClBitcodeValueIndexDistElement>(IndexDist.at(Index)) | 
| + ->GetValueDist(), | 
| + CandAbbrevs)) { | 
| + return; | 
| + } | 
| + } | 
| +} | 
| + | 
| +// Look for new abbrevaitions in block BlockID, considering it was | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
abbreviations
 
Karl
2014/02/06 19:46:28
Done.
 | 
| +// read with the given abbreviation Abbrev, and the given record Code. | 
| +// SizeDist is the corresponding distribution of sizes associated with | 
| +// the abbreviation. Any found abbreviations are added to the | 
| +// candidate abbreviations CandAbbrevs. | 
| +static void AddNewAbbreviations(unsigned BlockID, | 
| + NaClBitCodeAbbrev *Abbrev, | 
| + unsigned Code, | 
| + NaClBitcodeDist &SizeDist, | 
| + CandidateAbbrevs &CandAbbrevs) { | 
| + const NaClBitcodeDist::Distribution * | 
| + SizeDistribution = SizeDist.GetDistribution(); | 
| + for (NaClBitcodeDist::Distribution::const_iterator | 
| + SizeIter = SizeDistribution->begin(), | 
| + SizeIterEnd = SizeDistribution->end(); | 
| + SizeIter != SizeIterEnd; ++SizeIter) { | 
| + unsigned Size = static_cast<unsigned>(SizeIter->second); | 
| + UnrolledAbbreviation UnrolledAbbrev(Abbrev, Size+1); // Add code! | 
| + if (!UnrolledAbbrev.CodeOp.isLiteral()) { | 
| + // Try making the code a literal. | 
| + UnrolledAbbrev.CodeOp = NaClBitCodeAbbrevOp(Code); | 
| + CandAbbrevs.Add(BlockID, UnrolledAbbrev, | 
| + SizeDist.at(Size)->GetNumInstances()); | 
| + } | 
| + // Now process value indices to find candidate abbreviations. | 
| + AddNewAbbreviations( | 
| + BlockID, UnrolledAbbrev, | 
| + cast<NaClBitcodeSizeDistElement>(SizeDist.at(Size)) | 
| + ->GetValueIndexDist(), | 
| + CandAbbrevs); | 
| + } | 
| +} | 
| + | 
| +// Look for new abbreviations in block BlockID. Abbrevs is the map of | 
| +// read (globally defined) abbreviations. AbbrevDist is the | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
Abbrevs is already known to be associated with the
 
Karl
2014/02/06 19:46:28
yes. Added qualifier to comment.
 | 
| +// distribution map of abbreviations associated with BlockID. Any | 
| +// found abbreviations are added to the candidate abbreviations | 
| +// CandAbbrevs. | 
| +static void AddNewAbbreviations(unsigned BlockID, | 
| + BlockAbbrevs *Abbrevs, | 
| + NaClBitcodeDist &AbbrevDist, | 
| + CandidateAbbrevs &CandAbbrevs) { | 
| + const NaClBitcodeDist::Distribution * | 
| + AbbrevDistribution = AbbrevDist.GetDistribution(); | 
| + for (NaClBitcodeDist::Distribution::const_iterator | 
| + AbbrevIter = AbbrevDistribution->begin(), | 
| + AbbrevIterEnd = AbbrevDistribution->end(); | 
| + AbbrevIter != AbbrevIterEnd; ++AbbrevIter) { | 
| + NaClBitcodeDistValue AbbrevIndex = AbbrevIter->second; | 
| + NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(AbbrevIndex); | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
It seems like an *existing* Abbrev gets pulled out
 
Karl
2014/02/06 19:46:28
Yes. Unroll creates a copy of the abbreviation, an
 | 
| + NaClBitcodeAbbrevDistElement *AbbrevElmt = | 
| + cast<NaClBitcodeAbbrevDistElement>(AbbrevDist.at(AbbrevIndex)); | 
| + NaClBitcodeDist &CodeDist = AbbrevElmt->GetCodeDist(); | 
| + | 
| + const NaClBitcodeDist::Distribution * | 
| + CodeDistribution = CodeDist.GetDistribution(); | 
| + for (NaClBitcodeDist::Distribution::const_iterator | 
| + CodeIter = CodeDistribution->begin(), | 
| + CodeIterEnd = CodeDistribution->end(); | 
| + CodeIter != CodeIterEnd; ++CodeIter) { | 
| + unsigned Code = static_cast<unsigned>(CodeIter->second); | 
| + AddNewAbbreviations( | 
| + BlockID, | 
| + Abbrev, | 
| + Code, | 
| + cast<NaClCompressCodeDistElement>(CodeDist.at(CodeIter->second)) | 
| + ->GetSizeDist(), | 
| + CandAbbrevs); | 
| + } | 
| + } | 
| +} | 
| + | 
| +// Look for new abbreviations in the given block distribution map | 
| +// BlockDist. BlockAbbrevsMap contains the set of read global | 
| +// abbrevations. Any found abbreviations are added to the set of | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
abb.
 
Karl
2014/02/06 19:46:28
Done.
 | 
| +// candidate abbreviations CandAbbrevs. | 
| +static void AddNewAbbreviations(NaClBitcodeBlockDist &BlockDist, | 
| + BlockAbbrevsMapType &BlockAbbrevsMap, | 
| + CandidateAbbrevs &CandAbbrevs) { | 
| + // Start by collecting candidate abbreviations. | 
| + const NaClBitcodeDist::Distribution * | 
| + Distribution = BlockDist.GetDistribution(); | 
| + for (NaClBitcodeDist::Distribution::const_iterator | 
| + Iter = Distribution->begin(), IterEnd = Distribution->end(); | 
| + Iter != IterEnd; ++Iter) { | 
| + unsigned BlockID = static_cast<unsigned>(Iter->second); | 
| + AddNewAbbreviations( | 
| + BlockID, | 
| + BlockAbbrevsMap[BlockID], | 
| + cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID)) | 
| + ->GetAbbrevDist(), | 
| + CandAbbrevs); | 
| + } | 
| + // Install candidate abbreviations. | 
| 
Karl
2014/02/06 19:46:28
Modified this code to sort the abbreviations by th
 
jvoung (off chromium)
2014/02/06 23:19:36
What are some examples of when the two *different*
 
Karl
2014/02/07 19:37:29
It may be the case that value index 3 has 300 inst
 | 
| + if (TraceGeneratedAbbreviations) { | 
| + CandAbbrevs.Print(errs(), "New abbreviations"); | 
| + } | 
| + for (CandidateAbbrevs::const_iterator | 
| + Iter = CandAbbrevs.GetAbbrevs().begin(), | 
| + IterEnd = CandAbbrevs.GetAbbrevs().end(); | 
| + Iter != IterEnd; ++Iter) { | 
| + CandBlockAbbrev CandAbbrev(*Iter); | 
| + BlockAbbrevs *Abbrevs = BlockAbbrevsMap[CandAbbrev.GetBlockID()]; | 
| + Abbrevs->AddAbbreviation(CandAbbrev.GetAbbrev()->Copy()); | 
| + } | 
| +} | 
| + | 
| // Read in bitcode, analyze data, and figure out set of abbreviations | 
| // to use, from memory buffer MemBuf containing the input bitcode file. | 
| static bool AnalyzeBitcode(OwningPtr<MemoryBuffer> &MemBuf, | 
| @@ -542,6 +1058,8 @@ static bool AnalyzeBitcode(OwningPtr<MemoryBuffer> &MemBuf, | 
| Parser.BlockDist.Print(OutputFilename == "-" ? errs() : outs()); | 
| } | 
| + CandidateAbbrevs CandAbbrevs(BlockAbbrevsMap); | 
| + AddNewAbbreviations(Parser.BlockDist, BlockAbbrevsMap, CandAbbrevs); | 
| 
Karl
2014/02/06 19:46:28
After reading the code more carefully, I realized
 | 
| return false; | 
| } | 
| @@ -623,6 +1141,17 @@ protected: | 
| return ::Error(Message); | 
| } | 
| + /// Returns the set of (global) block abbreviations defined for the | 
| + /// given block ID. | 
| + BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) { | 
| + BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID]; | 
| + if (Abbrevs == 0) { | 
| + Abbrevs = new BlockAbbrevs(BlockID); | 
| + Context->BlockAbbrevsMap[BlockID] = Abbrevs; | 
| + } | 
| + return Abbrevs; | 
| + } | 
| + | 
| virtual bool ParseBlock(unsigned BlockID) { | 
| NaClBlockCopyParser Parser(BlockID, this); | 
| return Parser.ParseThisBlock(); | 
| @@ -630,12 +1159,23 @@ protected: | 
| virtual void EnterBlock(unsigned NumWords) { | 
| unsigned BlockID = GetBlockID(); | 
| - BlockAbbreviations = Context->BlockAbbrevsMap[BlockID]; | 
| + BlockAbbreviations = GetGlobalAbbrevs(BlockID); | 
| // Enter the subblock. | 
| NaClBitcodeSelectorAbbrev | 
| Selector(BlockAbbreviations->GetNumberAbbreviations()-1); | 
| Context->Writer.EnterSubblock(BlockID, Selector); | 
| + | 
| + // Note: We must dump module abbreviations as local | 
| + // abbreviations, because they are in a yet to be | 
| + // dumped BlockInfoBlock. | 
| 
jvoung (off chromium)
2014/02/05 00:24:16
So the BlockInfoBlock will be skipped?
 
Karl
2014/02/06 19:46:28
No. There are generated by method ExitBlockInfo be
 | 
| + if (BlockID == naclbitc::MODULE_BLOCK_ID) { | 
| + BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(naclbitc::MODULE_BLOCK_ID); | 
| + if (Abbrevs == 0) return; | 
| + for (unsigned i = 0; i < Abbrevs->GetNumberAbbreviations(); ++i) { | 
| + Context->Writer.EmitAbbrev(Abbrevs->GetIndexedAbbrev(i)->Copy()); | 
| + } | 
| + } | 
| } | 
| virtual void ExitBlock() { | 
| @@ -654,6 +1194,10 @@ protected: | 
| IterEnd = Context->BlockAbbrevsMap.end(); | 
| Iter != IterEnd; ++Iter) { | 
| unsigned BlockID = Iter->first; | 
| + // Don't emit module abbreviations, since they have been | 
| + // emitted as local abbreviations. | 
| + if (BlockID == naclbitc::MODULE_BLOCK_ID) continue; | 
| + | 
| BlockAbbrevs *Abbrevs = Iter->second; | 
| if (Abbrevs == 0) continue; | 
| for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation(); |