Chromium Code Reviews| Index: tools/pnacl-bccompress/pnacl-bccompress.cpp |
| diff --git a/tools/pnacl-bccompress/pnacl-bccompress.cpp b/tools/pnacl-bccompress/pnacl-bccompress.cpp |
| index 996d079f725e25101cf29650c21468f7f9918077..4274f69c115ec78efa866449413bdb80585fb7c7 100644 |
| --- a/tools/pnacl-bccompress/pnacl-bccompress.cpp |
| +++ b/tools/pnacl-bccompress/pnacl-bccompress.cpp |
| @@ -100,45 +100,11 @@ static bool Error(const std::string &Err) { |
| return true; |
| } |
| -// For debugging. Prints out the abbreviation in readable form to errs(). |
| -static void PrintAbbrev(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { |
| - errs() << "Abbrev(block " << BlockID << "): ["; |
| - // ContinuationCount>0 implies that the current operand is a |
| - // continuation of previous operand(s). |
| - unsigned ContinuationCount = 0; |
| - for (unsigned i = 0; i < Abbrev->getNumOperandInfos(); ++i) { |
| - if (i > 0 && ContinuationCount == 0) errs() << ", "; |
| - const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(i); |
| - if (Op.isLiteral()) { |
| - errs() << Op.getLiteralValue(); |
| - } else if (Op.isEncoding()) { |
| - switch (Op.getEncoding()) { |
| - case NaClBitCodeAbbrevOp::Fixed: |
| - errs() << "Fixed(" << Op.getEncodingData() << ")"; |
| - break; |
| - case NaClBitCodeAbbrevOp::VBR: |
| - errs() << "VBR(" << Op.getEncodingData() << ")"; |
| - break; |
| - case NaClBitCodeAbbrevOp::Array: |
| - errs() << "Array:"; |
| - ++ContinuationCount; |
| - continue; |
| - case NaClBitCodeAbbrevOp::Char6: |
| - errs() << "Char6"; |
| - break; |
| - case NaClBitCodeAbbrevOp::Blob: |
| - errs() << "Blob"; |
| - break; |
| - default: |
| - errs() << "??"; |
| - break; |
| - } |
| - } else { |
| - errs() << "??"; |
| - } |
| - if (ContinuationCount) --ContinuationCount; |
| - } |
| - errs() << "]\n"; |
| +// Prints out the abbreviation in readable form to the given Stream. |
| +static void PrintAbbrev(raw_ostream &Stream, |
| + unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { |
| + Stream << "Abbrev(block " << BlockID << "): "; |
| + Abbrev->Print(Stream); |
| } |
| // Reads the input file into the given buffer. |
| @@ -215,7 +181,12 @@ public: |
| // application abbreviations, so that added abbreviations have |
| // valid abbreviation indices. |
| for (unsigned i = 0; i < naclbitc::FIRST_APPLICATION_ABBREV; ++i) { |
| - Abbrevs.push_back(new NaClBitCodeAbbrev()); |
| + // Make all non-application abbreviations look like the default |
| + // abbreviation. |
| + NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev(); |
| + Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array)); |
| + Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); |
| + Abbrevs.push_back(Abbrev); |
| } |
| GlobalAbbrevBitstreamToInternalMap. |
| SetNextBitstreamAbbrevIndex(Abbrevs.size()); |
| @@ -263,6 +234,15 @@ public: |
| return true; |
| } |
| + /// Adds the given abbreviation to the set of global abbreviations |
| + /// defined for the block. Guarantees that duplicate abbreviations |
| + /// are not added to the block. Note: Code takes ownership of |
| + /// the given abbreviation. Returns true if new abbreviation. |
| + bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev) { |
| + int Index; |
| + return AddAbbreviation(Abbrev, Index); |
| + } |
| + |
| /// The block ID associated with the block. |
| unsigned GetBlockID() const { |
| return BlockID; |
| @@ -417,7 +397,7 @@ public: |
| // abbreviation. |
| const NaClBitCodeAbbrev *Abbrev = Record.GetCursor().GetNewestAbbrev(); |
| int Index; |
| - AddAbbreviation(GetBlockID(), Abbrev->Copy(), Index); |
| + AddAbbreviation(GetBlockID(), Abbrev->Simplify(), Index); |
| LocalAbbrevBitstreamToInternalMap.InstallNewBitstreamAbbrevIndex(Index); |
| } |
| @@ -442,7 +422,7 @@ public: |
| ++AbbrevIter) { |
| NaClBitCodeAbbrev *Abbrev = *AbbrevIter; |
| int Index; |
| - AddAbbreviation(BlockID, Abbrev->Copy(), Index); |
| + AddAbbreviation(BlockID, Abbrev->Simplify(), Index); |
| BlkAbbrevs->GetGlobalAbbrevBitstreamToInternalMap(). |
| InstallNewBitstreamAbbrevIndex(Index); |
| } |
| @@ -485,11 +465,11 @@ protected: |
| // Read abbreviation and add as a global abbreviation. |
| if (Abbrevs->AddAbbreviation(Abbrev, Index) |
| && TraceGeneratedAbbreviations) { |
| - PrintAbbrev(BlockID, Abbrev); |
| + PrintAbbrev(errs(), BlockID, Abbrev); |
| } |
| } |
| - /// Finds the index to the corresponding internal block abbrevation |
| + /// Finds the index to the corresponding internal block abbreviation |
| /// for the given abbreviation. |
| int FindAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { |
| return GetGlobalAbbrevs(BlockID)->FindAbbreviation(Abbrev); |
| @@ -508,6 +488,479 @@ bool NaClAnalyzeParser::ParseBlock(unsigned BlockID) { |
| return Parser.ParseThisBlock(); |
| } |
| +// Models the unrolling of an abbreviation into its sequence of |
| +// individual operators. That is, unrolling arrays to match the width |
| +// of the abbreviation. Note: We unroll in the form that best matches |
| +// the distribution map. Hence, the code is stored as a separate |
| +// operator. We also keep the array abbreviation op, for untracked |
| +// elements within the distribution maps. |
| +class UnrolledAbbreviation { |
| + void operator=(const UnrolledAbbreviation&) LLVM_DELETED_FUNCTION; |
| +public: |
| + /// Unroll the given abbreviation, assuming it has the given size |
| + /// (as specified in the distribution maps). |
| + UnrolledAbbreviation(NaClBitCodeAbbrev *Abbrev, unsigned Size) |
| + : CodeOp(0) { |
| + unsigned NextOp = 0; |
| + UnrollAbbrevOp(CodeOp, Abbrev, NextOp); |
| + --Size; |
| + for (unsigned i = 0; i < Size; ++i) { |
| + // Create slot and then fill with appropriate operator. |
| + AbbrevOps.push_back(CodeOp); |
| + UnrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp); |
| + } |
| + for (; NextOp < Abbrev->getNumOperandInfos(); ++NextOp) { |
| + MoreOps.push_back(Abbrev->getOperandInfo(NextOp)); |
| + } |
| + } |
| + |
| + explicit UnrolledAbbreviation(const UnrolledAbbreviation &Abbrev) |
| + : CodeOp(Abbrev.CodeOp), |
| + AbbrevOps(Abbrev.AbbrevOps), |
| + MoreOps(Abbrev.MoreOps) { |
| + } |
| + |
| + /// Prints out the abbreviation modeled by the unrolled |
| + /// abbreviation. |
| + void Print(raw_ostream &Stream) const { |
| + NaClBitCodeAbbrev *Abbrev = roll(); |
| + Abbrev->Print(Stream); |
| + Abbrev->dropRef(); |
| + } |
| + |
| + /// Converts the unrolled abbreviation back into a regular |
| + /// abbreviation. |
| + NaClBitCodeAbbrev *roll() const { |
| + NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev(); |
| + Abbrev->Add(CodeOp); |
| + for (std::vector<NaClBitCodeAbbrevOp>::const_iterator |
| + Iter = AbbrevOps.begin(), IterEnd = AbbrevOps.end(); |
| + Iter != IterEnd; ++Iter) { |
| + Abbrev->Add(*Iter); |
| + } |
| + for (std::vector<NaClBitCodeAbbrevOp>::const_iterator |
| + Iter = MoreOps.begin(), IterEnd = MoreOps.end(); |
| + Iter != IterEnd; ++Iter) { |
| + Abbrev->Add(*Iter); |
| + } |
| + NaClBitCodeAbbrev *SimpAbbrev = Abbrev->Simplify(); |
| + Abbrev->dropRef(); |
| + return SimpAbbrev; |
| + } |
| + |
| + // The abbreviation used for the record code. |
| + NaClBitCodeAbbrevOp CodeOp; |
| + |
| + // The abbreviations used for each tracked value index. |
| + std::vector<NaClBitCodeAbbrevOp> AbbrevOps; |
| + |
| +private: |
| + // Any remaining abbreviation operators not part of the unrolling. |
| + std::vector<NaClBitCodeAbbrevOp> MoreOps; |
| + |
| + // Extracts out the next abbreviation operator from the abbreviation |
| + // Abbrev, given the index NextOp, and assigns it to AbbrevOp |
| + void UnrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp, |
| + NaClBitCodeAbbrev *Abbrev, |
| + unsigned &NextOp) { |
| + assert(NextOp < Abbrev->getNumOperandInfos()); |
| + const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(NextOp); |
| + if (Op.isEncoding() && Op.getEncoding() == NaClBitCodeAbbrevOp::Array) { |
| + // Do not advance. The array operator assumes that all remaining |
| + // elements should match its argument. |
| + AbbrevOp = Abbrev->getOperandInfo(NextOp+1); |
| + } else { |
| + AbbrevOp = Op; |
| + NextOp++; |
| + } |
| + } |
| +}; |
| + |
| +/// Models a candidate block abbreviation, which is a blockID, the |
| +/// corresponding abbreviation to be considered for addition. Note: |
| +/// Constructors and assignment take ownership of the abbreviation. |
| +class CandBlockAbbrev { |
| +public: |
| + CandBlockAbbrev(unsigned BlockID, NaClBitCodeAbbrev *Abbrev) |
| + : BlockID(BlockID), Abbrev(Abbrev) { |
| + } |
| + |
| + CandBlockAbbrev(const CandBlockAbbrev &BlockAbbrev) |
| + : BlockID(BlockAbbrev.BlockID), |
| + Abbrev(BlockAbbrev.Abbrev) { |
| + Abbrev->addRef(); |
| + } |
| + |
| + void operator=(const CandBlockAbbrev &BlockAbbrev) { |
| + Abbrev->dropRef(); |
| + BlockID = BlockAbbrev.BlockID; |
| + Abbrev = BlockAbbrev.Abbrev; |
| + Abbrev->addRef(); |
| + } |
| + |
| + ~CandBlockAbbrev() { |
| + Abbrev->dropRef(); |
| + } |
| + |
| + /// The block ID of the candidate abbreviation. |
| + unsigned GetBlockID() const { |
| + return BlockID; |
| + } |
| + |
| + /// The abbreviation of the candidate abbreviation. |
| + const NaClBitCodeAbbrev *GetAbbrev() const { |
| + return Abbrev; |
| + } |
| + |
| + /// orders this against the candidate abbreviation. |
| + int Compare(const CandBlockAbbrev &CandAbbrev) const { |
| + unsigned diff = BlockID - CandAbbrev.BlockID; |
| + if (diff) return diff; |
| + return Abbrev->Compare(*CandAbbrev.Abbrev); |
| + } |
| + |
| + /// Prints the candidate abbreviation to the given stream. |
| + void Print(raw_ostream &Stream) const { |
| + PrintAbbrev(Stream, BlockID, Abbrev); |
| + } |
| + |
| +private: |
| + // The block the abbreviation applies to. |
| + unsigned BlockID; |
| + // The candidate abbreviation. |
| + NaClBitCodeAbbrev *Abbrev; |
| +}; |
| + |
| +static inline bool operator<(const CandBlockAbbrev &A1, |
| + const CandBlockAbbrev &A2) { |
| + return A1.Compare(A2) < 0; |
| +} |
| + |
| +/// Models the minimum number of instances for a candidate abbreviation |
| +/// before we will even consider it a potential candidate abbreviation. |
| +static unsigned MinNumInstancesForNewAbbrevs = 100; |
| + |
| +/// Models the set of candidate abbreviations being considered. |
| +/// Filters based on the number of instances of each candidate |
| +/// abbreviation. This filter is done to only keep candidates that |
| +/// most likely will reduce bitcode size. |
| +class CandidateAbbrevs { |
| +public: |
| + // Map from candidate abbreviations to the corresponding number of |
| + // instances. |
| + typedef std::map<CandBlockAbbrev, unsigned> AbbrevCountMap; |
| + typedef AbbrevCountMap::const_iterator const_iterator; |
| + |
| + /// Creates an empty set of candidate abbreviations, to be |
| + /// (potentially) added to the given set of block abbreviations. |
| + /// Argument is the (global) block abbreviations map, which is |
| + /// used to determine if it already exists. |
| + CandidateAbbrevs(BlockAbbrevsMapType &BlockAbbrevsMap) |
| + : MaxInstances(0), BlockAbbrevsMap(BlockAbbrevsMap) |
| + {} |
| + |
| + /// Adds the given (unrolled) abbreviation as a candidate |
| + /// abbreviation to the given block. NumInstances is the number of |
| + /// instances expected to use this candidate abbreviation. Returns |
| + /// true if the corresponding candidate abbreviation is added to this |
| + /// set of candidate abbreviations. Note: depending on the value |
| + /// of NumInstances, methods GetMaxInstances and GetMinInstances |
| + /// may change to include NumInstances, if the candidate abbreviation |
| + /// is added. |
| + bool Add(unsigned BlockID, |
| + UnrolledAbbreviation &UnrolledAbbrev, |
| + unsigned NumInstances); |
| + |
| + /// Returns the upper range (of number instances) that is currently |
| + /// associated with this set of candidate abbreviations. |
| + unsigned GetMaxInstances() const { |
| + return MaxInstances; |
| + } |
| + |
| + /// Returns the lower range (of number instances) that is currently |
| + /// associated with this set of candidate abbreviations. Note: |
| + /// The current implementation is a guesstimate for chunking out the |
| + /// best abbreviations. |
| + unsigned GetMinInstances() const { |
| + if (MaxInstances <= 1024) { |
| + return 0; |
| + } else |
| + return MaxInstances >> 4; |
| + } |
| + |
| + /// Returns the list of candidate abbreviations in this set. |
| + const AbbrevCountMap &GetAbbrevsMap() const { |
| + return AbbrevsMap; |
| + } |
| + |
| + /// Prints out the current contents of this set. |
| + void Print(raw_ostream &Stream, const char *Title = "Candidates") const { |
| + Stream << "-- " << Title << ": \n"; |
| + for (const_iterator Iter = AbbrevsMap.begin(), IterEnd = AbbrevsMap.end(); |
| + Iter != IterEnd; ++Iter) { |
| + Stream << format("%12u", Iter->second) << ": "; |
| + Iter->first.Print(Stream); |
| + } |
| + Stream << "--\n"; |
| + } |
| + |
| +private: |
| + // The upper bound of instances in this set. |
| + unsigned MaxInstances; |
| + |
| + // The set of abbreviations and corresponding number instancs. |
|
jvoung (off chromium)
2014/02/06 23:19:37
instances
Karl
2014/02/07 19:37:29
Done.
jvoung (off chromium)
2014/02/07 21:34:10
sorry missed this in last comment: number *of* ins
|
| + AbbrevCountMap AbbrevsMap; |
| + |
| + // The map of (global) abbreviations already associated with each block. |
| + BlockAbbrevsMapType &BlockAbbrevsMap; |
| +}; |
| + |
| +bool CandidateAbbrevs::Add(unsigned BlockID, |
| + UnrolledAbbreviation &UnrolledAbbrev, |
| + unsigned NumInstances) { |
| + // First see if we shouldn't add, because not enough instances apply. |
| + if (NumInstances < MinNumInstancesForNewAbbrevs) return false; |
| + if (NumInstances < GetMinInstances()) return false; |
| + |
| + // Drop if it corresponds to an existing global abbreviation. |
| + NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.roll(); |
| + if (BlockAbbrevs* Abbrevs = BlockAbbrevsMap[BlockID]) { |
| + if (Abbrevs->FindAbbreviation(Abbrev) != |
| + BlockAbbrevs::NO_SUCH_ABBREVIATION) { |
| + Abbrev->dropRef(); |
| + return false; |
| + } |
| + } |
| + |
| + // Drop if we have already recorded this abbreviation with at least |
| + // this number of instances. |
|
jvoung (off chromium)
2014/02/06 23:19:37
When do collisions happen? Examples?
How come it
Karl
2014/02/07 19:37:29
After thinking about this some more, the whole con
|
| + CandBlockAbbrev CandAbbrev(BlockID, Abbrev); |
| + AbbrevCountMap::iterator Pos = AbbrevsMap.find(CandAbbrev); |
| + if (Pos != AbbrevsMap.end() && Pos->second >= NumInstances) { |
| + Abbrev->dropRef(); |
| + return false; |
| + } |
| + |
| + // If reached, we should add the given abbreviation to the set of |
| + // candidate abbreviations. |
| + if (NumInstances > GetMaxInstances()) { |
| + // New range of candidates found. Throw out candidate |
| + // abbreviations that no longer apply, due to the adjustment of |
| + // the min/max ranges. |
| + unsigned Bits = NaClBitsNeededForValue(NumInstances); |
| + if (Bits > 31) { |
| + MaxInstances = ~0U; // i.e. max unsigned value. |
| + } else { |
| + MaxInstances = 1U << Bits; |
| + } |
| + |
| + std::vector<CandBlockAbbrev> ToRemove; |
| + for (const_iterator |
| + Iter = AbbrevsMap.begin(), IterEnd = AbbrevsMap.end(); |
| + Iter != IterEnd; ++Iter) { |
| + if (Iter->second < GetMinInstances()) { |
| + ToRemove.push_back(Iter->first); |
| + } |
| + } |
| + |
| + for (std::vector<CandBlockAbbrev>::const_iterator |
| + Iter = ToRemove.begin(), IterEnd = ToRemove.end(); |
| + Iter != IterEnd; ++Iter) { |
| + AbbrevsMap.erase(*Iter); |
| + } |
| + } |
| + |
| + AbbrevsMap[CandAbbrev] = NumInstances; |
| + return true; |
| +} |
| + |
| +// Look for new abbreviations in block BlockID, considering it was |
| +// read with the given abbreviation Abbrev, and considering changing |
| +// the abbreviation opererator for value Index. ValueDist is how |
| +// values at Index are distributed. Any found abbreviations are added |
| +// to the candidate abbreviations CandAbbrevs. Returns true only if we |
| +// have added new candidate abbreviations to CandAbbrevs. |
| +static bool AddNewAbbreviations(unsigned BlockID, |
| + const UnrolledAbbreviation &Abbrev, |
| + unsigned Index, |
| + NaClBitcodeValueDist &ValueDist, |
| + CandidateAbbrevs &CandAbbrevs) { |
| + // TODO(kschimpf): Add code to try and find a better encoding for |
| + // the values, based on the distribution. |
| + |
| + // If this index is already a literal abbreviation, no improvements can |
| + // be made. |
| + const NaClBitCodeAbbrevOp Op = Abbrev.AbbrevOps[Index]; |
| + if (Op.isLiteral()) return false; |
| + |
| + // Search based on sorted distribution, which sorts by number of |
| + // instances. Start by trying to find possible constants to use. |
| + const NaClBitcodeDist::Distribution * |
| + Distribution = ValueDist.GetDistribution(); |
| + for (NaClBitcodeDist::Distribution::const_iterator |
| + Iter = Distribution->begin(), IterEnd = Distribution->end(); |
| + Iter != IterEnd; ++Iter) { |
| + NaClValueRangeType Range = GetNaClValueRange(Iter->second); |
| + if (Range.first != Range.second) continue; // not a constant. |
| + |
| + // Defines a constant. Try as new candidate range. In addition, |
| + // don't try any more constant values, since this is the one with |
| + // the greatest number of instances. |
| + NaClBitcodeDistElement *Elmt = ValueDist.at(Range.first); |
| + UnrolledAbbreviation CandAbbrev(Abbrev); |
| + CandAbbrev.AbbrevOps[Index] = NaClBitCodeAbbrevOp(Range.first); |
| + return CandAbbrevs.Add(BlockID, CandAbbrev, Elmt->GetNumInstances()); |
| + } |
| + return false; |
| +} |
| + |
| +// Look for new abbreviations in block BlockID, considering it was |
| +// read with the given abbreviation Abbrev. IndexDist is the |
| +// corresponding distribution of value indices associated with the |
| +// abbreviation. Any found abbreviations are added to the candidate |
| +// abbreviations CandAbbrevs. |
| +static void AddNewAbbreviations(unsigned BlockID, |
| + const UnrolledAbbreviation &Abbrev, |
| + NaClBitcodeDist &IndexDist, |
| + CandidateAbbrevs &CandAbbrevs) { |
| + // Search based on sorted distribution, which sorts based on heuristic |
| + // of best index to fix first. |
| + const NaClBitcodeDist::Distribution * |
| + IndexDistribution = IndexDist.GetDistribution(); |
| + for (NaClBitcodeDist::Distribution::const_iterator |
| + IndexIter = IndexDistribution->begin(), |
| + IndexIterEnd = IndexDistribution->end(); |
| + IndexIter != IndexIterEnd; ++IndexIter) { |
| + unsigned Index = static_cast<unsigned>(IndexIter->second); |
| + if (AddNewAbbreviations( |
| + BlockID, Abbrev, Index, |
| + cast<NaClBitcodeValueIndexDistElement>(IndexDist.at(Index)) |
| + ->GetValueDist(), |
| + CandAbbrevs)) { |
| + return; |
| + } |
| + } |
| +} |
| + |
| +// Look for new abbreviations in block BlockID, considering it was |
| +// read with the given abbreviation Abbrev, and the given record Code. |
| +// SizeDist is the corresponding distribution of sizes associated with |
| +// the abbreviation. Any found abbreviations are added to the |
| +// candidate abbreviations CandAbbrevs. |
| +static void AddNewAbbreviations(unsigned BlockID, |
| + NaClBitCodeAbbrev *Abbrev, |
| + unsigned Code, |
| + NaClBitcodeDist &SizeDist, |
| + CandidateAbbrevs &CandAbbrevs) { |
| + const NaClBitcodeDist::Distribution * |
| + SizeDistribution = SizeDist.GetDistribution(); |
| + for (NaClBitcodeDist::Distribution::const_iterator |
| + SizeIter = SizeDistribution->begin(), |
| + SizeIterEnd = SizeDistribution->end(); |
| + SizeIter != SizeIterEnd; ++SizeIter) { |
| + unsigned Size = static_cast<unsigned>(SizeIter->second); |
| + UnrolledAbbreviation UnrolledAbbrev(Abbrev, Size+1); // Add code! |
| + if (!UnrolledAbbrev.CodeOp.isLiteral()) { |
| + // Try making the code a literal. |
| + UnrolledAbbreviation CandAbbrev(UnrolledAbbrev); |
| + CandAbbrev.CodeOp = NaClBitCodeAbbrevOp(Code); |
| + CandAbbrevs.Add(BlockID, CandAbbrev, |
| + SizeDist.at(Size)->GetNumInstances()); |
| + } |
| + // Now process value indices to find candidate abbreviations. |
| + AddNewAbbreviations( |
| + BlockID, UnrolledAbbrev, |
| + cast<NaClBitcodeSizeDistElement>(SizeDist.at(Size)) |
| + ->GetValueIndexDist(), |
| + CandAbbrevs); |
| + } |
| +} |
| + |
| +// Look for new abbreviations in block BlockID. Abbrevs is the map of |
| +// read (globally defined) abbreviations associated with the |
| +// BlockID. AbbrevDist is the distribution map of abbreviations |
| +// associated with BlockID. Any found abbreviations are added to the |
| +// candidate abbreviations CandAbbrevs. |
| +static void AddNewAbbreviations(unsigned BlockID, |
| + BlockAbbrevs *Abbrevs, |
| + NaClBitcodeDist &AbbrevDist, |
| + CandidateAbbrevs &CandAbbrevs) { |
| + const NaClBitcodeDist::Distribution * |
| + AbbrevDistribution = AbbrevDist.GetDistribution(); |
| + for (NaClBitcodeDist::Distribution::const_iterator |
| + AbbrevIter = AbbrevDistribution->begin(), |
| + AbbrevIterEnd = AbbrevDistribution->end(); |
| + AbbrevIter != AbbrevIterEnd; ++AbbrevIter) { |
| + NaClBitcodeDistValue AbbrevIndex = AbbrevIter->second; |
| + NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(AbbrevIndex); |
| + NaClBitcodeAbbrevDistElement *AbbrevElmt = |
| + cast<NaClBitcodeAbbrevDistElement>(AbbrevDist.at(AbbrevIndex)); |
| + NaClBitcodeDist &CodeDist = AbbrevElmt->GetCodeDist(); |
| + |
| + const NaClBitcodeDist::Distribution * |
| + CodeDistribution = CodeDist.GetDistribution(); |
| + for (NaClBitcodeDist::Distribution::const_iterator |
| + CodeIter = CodeDistribution->begin(), |
| + CodeIterEnd = CodeDistribution->end(); |
| + CodeIter != CodeIterEnd; ++CodeIter) { |
| + unsigned Code = static_cast<unsigned>(CodeIter->second); |
| + AddNewAbbreviations( |
| + BlockID, |
| + Abbrev, |
| + Code, |
| + cast<NaClCompressCodeDistElement>(CodeDist.at(CodeIter->second)) |
| + ->GetSizeDist(), |
| + CandAbbrevs); |
| + } |
| + } |
| +} |
| + |
| +typedef std::pair<unsigned, CandBlockAbbrev> CountedAbbrevType; |
| + |
| +// Look for new abbreviations in the given block distribution map |
| +// BlockDist. BlockAbbrevsMap contains the set of read global |
| +// abbreviations. Adds found candidate abbreviations to the set of |
| +// known global abbreviations. |
| +static void AddNewAbbreviations(NaClBitcodeBlockDist &BlockDist, |
| + BlockAbbrevsMapType &BlockAbbrevsMap) { |
| + CandidateAbbrevs CandAbbrevs(BlockAbbrevsMap); |
| + // Start by collecting candidate abbreviations. |
| + const NaClBitcodeDist::Distribution * |
| + Distribution = BlockDist.GetDistribution(); |
| + for (NaClBitcodeDist::Distribution::const_iterator |
| + Iter = Distribution->begin(), IterEnd = Distribution->end(); |
| + Iter != IterEnd; ++Iter) { |
| + unsigned BlockID = static_cast<unsigned>(Iter->second); |
| + AddNewAbbreviations( |
| + BlockID, |
| + BlockAbbrevsMap[BlockID], |
| + cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID)) |
| + ->GetAbbrevDist(), |
| + CandAbbrevs); |
| + } |
| + // Install candidate abbreviations. |
| + if (TraceGeneratedAbbreviations) { |
| + CandAbbrevs.Print(errs(), "New abbreviations"); |
| + } |
| + // Sort the candidate abbreviations by number of instances, so |
| + // that if multiple abbreviations apply, the one with the largest |
| + // number of instances will be chosen when generating compressed file. |
| + std::vector<CountedAbbrevType> Candidates; |
| + for (CandidateAbbrevs::const_iterator |
| + Iter = CandAbbrevs.GetAbbrevsMap().begin(), |
| + IterEnd = CandAbbrevs.GetAbbrevsMap().end(); |
| + Iter != IterEnd; ++Iter) { |
| + Candidates.push_back(CountedAbbrevType(Iter->second,Iter->first)); |
| + } |
| + std::sort(Candidates.begin(), Candidates.end()); |
| + for (std::vector<CountedAbbrevType>::const_reverse_iterator |
| + Iter = Candidates.rbegin(), IterEnd = Candidates.rend(); |
| + Iter != IterEnd; ++Iter) { |
| + BlockAbbrevs *Abbrevs = BlockAbbrevsMap[Iter->second.GetBlockID()]; |
| + Abbrevs->AddAbbreviation(Iter->second.GetAbbrev()->Copy()); |
| + } |
| +} |
| + |
| // Read in bitcode, analyze data, and figure out set of abbreviations |
| // to use, from memory buffer MemBuf containing the input bitcode file. |
| static bool AnalyzeBitcode(OwningPtr<MemoryBuffer> &MemBuf, |
| @@ -542,6 +995,7 @@ static bool AnalyzeBitcode(OwningPtr<MemoryBuffer> &MemBuf, |
| Parser.BlockDist.Print(OutputFilename == "-" ? errs() : outs()); |
| } |
| + AddNewAbbreviations(Parser.BlockDist, BlockAbbrevsMap); |
| return false; |
| } |
| @@ -623,6 +1077,17 @@ protected: |
| return ::Error(Message); |
| } |
| + /// Returns the set of (global) block abbreviations defined for the |
| + /// given block ID. |
| + BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) { |
| + BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID]; |
| + if (Abbrevs == 0) { |
| + Abbrevs = new BlockAbbrevs(BlockID); |
| + Context->BlockAbbrevsMap[BlockID] = Abbrevs; |
| + } |
| + return Abbrevs; |
| + } |
| + |
| virtual bool ParseBlock(unsigned BlockID) { |
| NaClBlockCopyParser Parser(BlockID, this); |
| return Parser.ParseThisBlock(); |
| @@ -630,12 +1095,23 @@ protected: |
| virtual void EnterBlock(unsigned NumWords) { |
| unsigned BlockID = GetBlockID(); |
| - BlockAbbreviations = Context->BlockAbbrevsMap[BlockID]; |
| + BlockAbbreviations = GetGlobalAbbrevs(BlockID); |
| // Enter the subblock. |
| NaClBitcodeSelectorAbbrev |
| Selector(BlockAbbreviations->GetNumberAbbreviations()-1); |
| Context->Writer.EnterSubblock(BlockID, Selector); |
| + |
| + // Note: We must dump module abbreviations as local |
| + // abbreviations, because they are in a yet to be |
| + // dumped BlockInfoBlock. |
| + if (BlockID == naclbitc::MODULE_BLOCK_ID) { |
| + BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(naclbitc::MODULE_BLOCK_ID); |
| + if (Abbrevs == 0) return; |
|
jvoung (off chromium)
2014/02/06 23:19:37
when can Abbrevs == 0, if Abbrevs is returned by G
Karl
2014/02/07 19:37:29
Removed.
|
| + for (unsigned i = 0; i < Abbrevs->GetNumberAbbreviations(); ++i) { |
| + Context->Writer.EmitAbbrev(Abbrevs->GetIndexedAbbrev(i)->Copy()); |
| + } |
| + } |
| } |
| virtual void ExitBlock() { |
| @@ -654,6 +1130,10 @@ protected: |
| IterEnd = Context->BlockAbbrevsMap.end(); |
| Iter != IterEnd; ++Iter) { |
| unsigned BlockID = Iter->first; |
| + // Don't emit module abbreviations, since they have been |
| + // emitted as local abbreviations. |
| + if (BlockID == naclbitc::MODULE_BLOCK_ID) continue; |
| + |
| BlockAbbrevs *Abbrevs = Iter->second; |
| if (Abbrevs == 0) continue; |
| for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation(); |