Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(448)

Unified Diff: tools/pnacl-bccompress/pnacl-bccompress.cpp

Issue 939073008: Rebased PNaCl localmods in LLVM to 223109 (Closed)
Patch Set: undo localmod Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/pnacl-bccompress/Makefile ('k') | tools/pnacl-bcdis/CMakeLists.txt » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/pnacl-bccompress/pnacl-bccompress.cpp
diff --git a/tools/pnacl-bccompress/pnacl-bccompress.cpp b/tools/pnacl-bccompress/pnacl-bccompress.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..193a81f13352fafff5f99c25cc507039b98b7d45
--- /dev/null
+++ b/tools/pnacl-bccompress/pnacl-bccompress.cpp
@@ -0,0 +1,1517 @@
+//===-- pnacl-bccompress.cpp - Bitcode (abbrev) compression ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool may be invoked in the following manner:
+// pnacl-bccompress [options] bcin.pexe -o bcout.pexe
+// - Read frozen PNaCl bitcode from the bcin.pexe and introduce
+// abbreviations to compress it into bcout.pexe.
+//
+// Options:
+// --help - Output information about command line switches
+//
+// This tool analyzes the data in bcin.pexe, and determines what
+// abbreviations can be added to compress the bitcode file. The result
+// is written to bcout.pexe.
+//
+// A bitcode file has two types of abbreviations. The first are Global
+// abbreviations that apply to all instances of a particular type of
+// block. These abbreviations appear in the BlockInfo block of the
+// bitcode file.
+//
+// The second type of abbreviations are local to a particular instance
+// of a block.
+//
+// In pnacl-bccompress, for simplicity, we will only add global
+// abbreviations. Local abbreviations are converted to corresponding
+// global abbreviations, so that they can be added as global
+// abbreviations.
+//
+// The process of compressing is done by reading the input file
+// twice. In the first round, the records are read and analyzed,
+// generating a set of (global) abbreviations to use to generate the
+// compressed output file. Then, the input file is read again, and for
+// each record, the best fitting global abbreviation is found (or it
+// figures out that leaving the record unabbreviated is best) and
+// writes the record out accordingly.
+//
+// TODO(kschimpf): The current implementation does a trivial solution
+// for the first round. It just converts all abbreviations (local and
+// global) into global abbreviations. Future refinements of this file
+// will generate better (and more intelligent) global abbreviations,
+// based on the records found on the first read of the bitcode file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Bitcode/NaCl/AbbrevTrieNode.h"
+#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
+#include "llvm/Bitcode/NaCl/NaClBitcodeParser.h"
+#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
+#include "llvm/Bitcode/NaCl/NaClBitstreamWriter.h"
+#include "llvm/Bitcode/NaCl/NaClCompressBlockDist.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/raw_ostream.h"
+#include <set>
+#include <map>
+#include <system_error>
+
+namespace {
+
+using namespace llvm;
+
+static cl::opt<bool>
+TraceGeneratedAbbreviations(
+ "abbreviations",
+ cl::desc("Trace abbreviations added to compressed file"),
+ cl::init(false));
+
+static cl::opt<std::string>
+ InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Specify output filename"),
+ cl::value_desc("filename"), cl::init("-"));
+
+static cl::opt<bool>
+ShowValueDistributions(
+ "show-distributions",
+ cl::desc("Show collected value distributions in bitcode records. "
+ "Turns off compression."),
+ cl::init(false));
+
+static cl::opt<bool>
+ShowAbbrevLookupTries(
+ "show-lookup-tries",
+ cl::desc("Show lookup tries used to minimize search for \n"
+ "matching abbreviations."),
+ cl::init(false));
+
+static cl::opt<bool>
+ShowAbbreviationFrequencies(
+ "show-abbreviation-frequencies",
+ cl::desc("Show how often each abbreviation is used. "
+ "Turns off compression."),
+ cl::init(false));
+
+// Note: When this flag is true, we still generate new abbreviations,
+// because we don't want to add the complexity of turning it off.
+// Rather, we simply make sure abbreviations are ignored when writing
+// out the final copy.
+static cl::opt<bool>
+RemoveAbbreviations(
+ "remove-abbreviations",
+ cl::desc("Remove abbreviations from input bitcode file."),
+ cl::init(false));
+
+/// Error - All bitcode analysis errors go through this function,
+/// making this a good place to breakpoint if debugging.
+static bool Error(const std::string &Err) {
+ errs() << Err << "\n";
+ return true;
+}
+
+// Prints out the abbreviation in readable form to the given Stream.
+static void PrintAbbrev(raw_ostream &Stream,
+ unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
+ Stream << "Abbrev(block " << BlockID << "): ";
+ Abbrev->Print(Stream);
+}
+
+// Reads the input file into the given buffer.
+static bool ReadAndBuffer(std::unique_ptr<MemoryBuffer> &MemBuf) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> ErrOrFile =
+ MemoryBuffer::getFileOrSTDIN(InputFilename);
+ if (std::error_code EC = ErrOrFile.getError())
+ return Error("Error reading '" + InputFilename + "': " + EC.message());
+
+ MemBuf.reset(ErrOrFile.get().release());
+ if (MemBuf->getBufferSize() % 4 != 0)
+ return Error("Bitcode stream should be a multiple of 4 bytes in length");
+ return false;
+}
+
+/// type map from bitstream abbreviation index to corresponding
+/// internal abbreviation index.
+typedef std::map<unsigned, unsigned> BitstreamToInternalAbbrevMapType;
+
+/// Defines a mapping from bitstream abbreviation indices, to
+/// corresponding internal abbreviation indices.
+class AbbrevBitstreamToInternalMap {
+public:
+
+ AbbrevBitstreamToInternalMap()
+ : NextBitstreamAbbrevIndex(0) {}
+
+ /// Returns the bitstream abbreviaion index that will be associated
+ /// with the next internal abbreviation index.
+ unsigned GetNextBitstreamAbbrevIndex() {
+ return NextBitstreamAbbrevIndex;
+ }
+
+ /// Changes the next bitstream abbreviation index to the given value.
+ void SetNextBitstreamAbbrevIndex(unsigned NextIndex) {
+ NextBitstreamAbbrevIndex = NextIndex;
+ }
+
+ /// Returns true if there is an internal abbreviation index for the
+ /// given bitstream abbreviation.
+ bool DefinesBitstreamAbbrevIndex(unsigned Index) {
+ return BitstreamToInternalAbbrevMap.find(Index) !=
+ BitstreamToInternalAbbrevMap.end();
+ }
+
+ /// Returns the internal abbreviation index for the given bitstream
+ /// abbreviation index.
+ unsigned GetInternalAbbrevIndex(unsigned Index) {
+ return BitstreamToInternalAbbrevMap.at(Index);
+ }
+
+ /// Installs the given internal abbreviation index using the next
+ /// available bitstream abbreviation index.
+ void InstallNewBitstreamAbbrevIndex(unsigned InternalIndex) {
+ BitstreamToInternalAbbrevMap[NextBitstreamAbbrevIndex++] = InternalIndex;
+ }
+
+private:
+ // The index of the next bitstream abbreviation to be defined.
+ unsigned NextBitstreamAbbrevIndex;
+ // Map from bitstream abbreviation index to corresponding internal
+ // abbreviation index.
+ BitstreamToInternalAbbrevMapType BitstreamToInternalAbbrevMap;
+};
+
+/// Defines the list of abbreviations associated with a block.
+class BlockAbbrevs {
+public:
+ // Vector to hold the (ordered) list of abbreviations.
+ typedef SmallVector<NaClBitCodeAbbrev*, 32> AbbrevVector;
+
+ BlockAbbrevs(unsigned BlockID)
+ : BlockID(BlockID) {
+ // backfill internal indices that don't correspond to bitstream
+ // application abbreviations, so that added abbreviations have
+ // valid abbreviation indices.
+ for (unsigned i = 0; i < naclbitc::FIRST_APPLICATION_ABBREV; ++i) {
+ // Make all non-application abbreviations look like the default
+ // abbreviation.
+ NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();
+ Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+ Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
+ Abbrevs.push_back(Abbrev);
+ }
+ GlobalAbbrevBitstreamToInternalMap.
+ SetNextBitstreamAbbrevIndex(Abbrevs.size());
+ }
+
+ ~BlockAbbrevs() {
+ for (AbbrevVector::const_iterator
+ Iter = Abbrevs.begin(), IterEnd = Abbrevs.end();
+ Iter != IterEnd; ++Iter) {
+ (*Iter)->dropRef();
+ }
+ for (AbbrevLookupSizeMap::const_iterator
+ Iter = LookupMap.begin(), IterEnd = LookupMap.end();
+ Iter != IterEnd; ++Iter) {
+ delete Iter->second;
+ }
+ }
+
+ // Constant used to denote that a given abbreviation is not in the
+ // set of abbreviations defined by the block.
+ static const int NO_SUCH_ABBREVIATION = -1;
+
+ // Returns the index to the corresponding application abbreviation,
+ // if it exists. Otherwise return NO_SUCH_ABBREVIATION;
+ int FindAbbreviation(const NaClBitCodeAbbrev *Abbrev) const {
+ for (unsigned i = GetFirstApplicationAbbreviation();
+ i < GetNumberAbbreviations(); ++i) {
+ if (*Abbrevs[i] == *Abbrev) return i;
+ }
+ return NO_SUCH_ABBREVIATION;
+ }
+
+ /// Adds the given abbreviation to the set of global abbreviations
+ /// defined for the block. Guarantees that duplicate abbreviations
+ /// are not added to the block. Note: Code takes ownership of
+ /// the given abbreviation. Returns true if new abbreviation.
+ /// Updates Index to the index where the abbreviation is located
+ /// in the set of abbreviations.
+ bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev, int &Index) {
+ Index = FindAbbreviation(Abbrev);
+ if (Index != NO_SUCH_ABBREVIATION) {
+ // Already defined, don't install.
+ Abbrev->dropRef();
+ return false;
+ }
+
+ // New abbreviation. Add.
+ Index = Abbrevs.size();
+ Abbrevs.push_back(Abbrev);
+ return true;
+ }
+
+ /// Adds the given abbreviation to the set of global abbreviations
+ /// defined for the block. Guarantees that duplicate abbreviations
+ /// are not added to the block. Note: Code takes ownership of
+ /// the given abbreviation. Returns true if new abbreviation.
+ bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev) {
+ int Index;
+ return AddAbbreviation(Abbrev, Index);
+ }
+
+ /// The block ID associated with the block.
+ unsigned GetBlockID() const {
+ return BlockID;
+ }
+
+ /// Returns the index of the frist application abbreviation for the
+ /// block.
+ unsigned GetFirstApplicationAbbreviation() const {
+ return naclbitc::FIRST_APPLICATION_ABBREV;
+ }
+
+ // Returns the number of abbreviations associated with the block.
+ unsigned GetNumberAbbreviations() const {
+ return Abbrevs.size();
+ }
+
+ /// Returns the abbreviation associated with the given abbreviation
+ /// index.
+ NaClBitCodeAbbrev *GetIndexedAbbrev(unsigned index) {
+ if (index >= Abbrevs.size()) return 0;
+ return Abbrevs[index];
+ }
+
+ // Builds the corresponding fast lookup map for finding abbreviations
+ // that applies to abbreviations in the block
+ void BuildAbbrevLookupSizeMap() {
+ NaClBuildAbbrevLookupMap(GetLookupMap(),
+ GetAbbrevs(),
+ GetFirstApplicationAbbreviation());
+ if (ShowAbbrevLookupTries) PrintLookupMap(errs());
+ }
+
+ AbbrevBitstreamToInternalMap &GetGlobalAbbrevBitstreamToInternalMap() {
+ return GlobalAbbrevBitstreamToInternalMap;
+ }
+
+ AbbrevLookupSizeMap &GetLookupMap() {
+ return LookupMap;
+ }
+
+ // Returns lower level vector of abbreviations.
+ const AbbrevVector &GetAbbrevs() const {
+ return Abbrevs;
+ }
+
+ // Returns the abbreviation (index) to use for the corresponding
+ // record, based on the abbreviations of this block. Note: Assumes
+ // that BuildAbbrevLookupSizeMap has already been called.
+ unsigned GetRecordAbbrevIndex(const NaClBitcodeRecordData &Record) {
+ unsigned BestIndex = 0; // Ignored unless found candidate.
+ unsigned BestScore = 0; // Number of bits associated with BestIndex.
+ bool FoundCandidate = false;
+ NaClBitcodeValues Values(Record);
+ size_t Size = Values.size();
+
+ if (Size > NaClValueIndexCutoff) Size = NaClValueIndexCutoff+1;
+ AbbrevLookupSizeMap::const_iterator Pos = LookupMap.find(Size);
+ if (Pos != LookupMap.end()) {
+ if (const AbbrevTrieNode *Node = Pos->second) {
+ if (const AbbrevTrieNode *MatchNode =
+ Node->MatchRecord(Record)) {
+ const std::set<AbbrevIndexPair> &Abbreviations =
+ MatchNode->GetAbbreviations();
+ for (std::set<AbbrevIndexPair>::const_iterator
+ Iter = Abbreviations.begin(),
+ IterEnd = Abbreviations.end();
+ Iter != IterEnd; ++Iter) {
+ uint64_t NumBits = 0;
+ if (CanUseAbbreviation(Values, Iter->second, NumBits)) {
+ if (!FoundCandidate || NumBits < BestScore) {
+ // Use this as candidate.
+ BestIndex = Iter->first;
+ BestScore = NumBits;
+ FoundCandidate = true;
+ }
+ }
+ }
+ }
+ }
+ }
+ if (FoundCandidate && BestScore <= UnabbreviatedSize(Record)) {
+ return BestIndex;
+ }
+ return naclbitc::UNABBREV_RECORD;
+ }
+
+ // Computes the number of bits that will be generated by the
+ // corresponding read record, if no abbreviation is used.
+ static uint64_t UnabbreviatedSize(const NaClBitcodeRecordData &Record) {
+ uint64_t NumBits = MatchVBRBits(Record.Code, DefaultVBRBits);
+ size_t NumValues = Record.Values.size();
+ NumBits += MatchVBRBits(NumValues, DefaultVBRBits);
+ for (size_t Index = 0; Index < NumValues; ++Index) {
+ NumBits += MatchVBRBits(Record.Values[Index], DefaultVBRBits);
+ }
+ return NumBits;
+ }
+
+ // Returns true if the given abbreviation can be used to represent the
+ // record. Sets NumBits to the number of bits the abbreviation will
+ // generate. Note: Value of NumBits is undefined if this function
+ // return false.
+ static bool CanUseAbbreviation(NaClBitcodeValues &Values,
+ NaClBitCodeAbbrev *Abbrev, uint64_t &NumBits) {
+ NumBits = 0;
+ unsigned OpIndex = 0;
+ unsigned OpIndexEnd = Abbrev->getNumOperandInfos();
+ size_t ValueIndex = 0;
+ size_t ValueIndexEnd = Values.size();
+ while (ValueIndex < ValueIndexEnd && OpIndex < OpIndexEnd) {
+ const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(OpIndex);
+ switch (Op.getEncoding()) {
+ case NaClBitCodeAbbrevOp::Literal:
+ if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
+ ++ValueIndex;
+ ++OpIndex;
+ continue;
+ } else {
+ return false;
+ }
+ case NaClBitCodeAbbrevOp::Array: {
+ assert(OpIndex+2 == OpIndexEnd);
+ const NaClBitCodeAbbrevOp &ElmtOp =
+ Abbrev->getOperandInfo(OpIndex+1);
+
+ // Add size of array.
+ NumBits += MatchVBRBits(Values.size()-ValueIndex, DefaultVBRBits);
+
+ // Add size of each field.
+ for (; ValueIndex != ValueIndexEnd; ++ValueIndex) {
+ uint64_t FieldBits=0;
+ if (!CanUseSimpleAbbrevOp(ElmtOp, Values[ValueIndex], FieldBits)) {
+ return false;
+ }
+ NumBits += FieldBits;
+ }
+ return true;
+ }
+ default: {
+ if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
+ ++ValueIndex;
+ ++OpIndex;
+ break;
+ }
+ return false;
+ }
+ }
+ }
+ return ValueIndex == ValueIndexEnd && OpIndex == OpIndexEnd;
+ }
+
+ // Returns true if the given abbreviation Op defines a single value,
+ // and can be applied to the given Val. Adds the number of bits the
+ // abbreviation Op will generate to NumBits if Op applies.
+ static bool CanUseSimpleAbbrevOp(const NaClBitCodeAbbrevOp &Op,
+ uint64_t Val,
+ uint64_t &NumBits) {
+ switch (Op.getEncoding()) {
+ case NaClBitCodeAbbrevOp::Literal:
+ return Val == Op.getValue();
+ case NaClBitCodeAbbrevOp::Array:
+ return false;
+ case NaClBitCodeAbbrevOp::Fixed: {
+ uint64_t Width = Op.getValue();
+ if (!MatchFixedBits(Val, Width))
+ return false;
+ NumBits += Width;
+ return true;
+ }
+ case NaClBitCodeAbbrevOp::VBR:
+ if (unsigned Width = MatchVBRBits(Val, Op.getValue())) {
+ NumBits += Width;
+ return true;
+ } else {
+ return false;
+ }
+ case NaClBitCodeAbbrevOp::Char6:
+ if (!NaClBitCodeAbbrevOp::isChar6(Val)) return false;
+ NumBits += 6;
+ return true;
+ }
+ llvm_unreachable("unhandled NaClBitCodeAbbrevOp encoding");
+ }
+
+ // Returns true if the given Val can be represented by abbreviation
+ // operand Fixed(Width).
+ static bool MatchFixedBits(uint64_t Val, unsigned Width) {
+ // Note: The reader only allows up to 32 bits for fixed values.
+ if (Val & Mask32) return false;
+ if (Val & ~(~0U >> (32-Width))) return false;
+ return true;
+ }
+
+ // Returns the number of bits needed to represent Val by abbreviation
+ // operand VBR(Width). Note: Returns 0 if Val can't be represented
+ // by VBR(Width).
+ static unsigned MatchVBRBits(uint64_t Val, unsigned Width) {
+ if (Width == 0) return 0;
+ unsigned NumBits = 0;
+ while (1) {
+ // values emitted Width-1 bits at a time (plus a continue bit).
+ NumBits += Width;
+ if ((Val & (1U << (Width-1))) == 0)
+ return NumBits;
+ Val >>= Width-1;
+ }
+ }
+
+private:
+ // Defines the number of bits used to print VBR array field values.
+ static const unsigned DefaultVBRBits = 6;
+ // Masks out the top-32 bits of a uint64_t value.
+ static const uint64_t Mask32 = 0xFFFFFFFF00000000;
+ // The block ID for which abbreviations are being associated.
+ unsigned BlockID;
+ // The list of abbreviations defined for the block.
+ AbbrevVector Abbrevs;
+ // The mapping from global bitstream abbreviations to the corresponding
+ // block abbreviation index (in Abbrevs).
+ AbbrevBitstreamToInternalMap GlobalAbbrevBitstreamToInternalMap;
+ // A fast lookup map for finding the abbreviation that applies
+ // to a record.
+ AbbrevLookupSizeMap LookupMap;
+
+ void PrintLookupMap(raw_ostream &Stream) {
+ Stream << "------------------------------\n";
+ Stream << "Block " << GetBlockID() << " abbreviation tries:\n";
+ bool IsFirstIteration = true;
+ for (AbbrevLookupSizeMap::const_iterator
+ Iter = LookupMap.begin(), IterEnd = LookupMap.end();
+ Iter != IterEnd; ++Iter) {
+ if (IsFirstIteration)
+ IsFirstIteration = false;
+ else
+ Stream << "-----\n";
+ if (Iter->second) {
+ Stream << "Index " << Iter->first << ":\n";
+ Iter->second->Print(Stream, " ");
+ }
+ }
+ Stream << "------------------------------\n";
+ }
+};
+
+/// Defines a map from block ID's to the corresponding abbreviation
+/// map to use.
+typedef DenseMap<unsigned, BlockAbbrevs*> BlockAbbrevsMapType;
+
+/// Parses the bitcode file, analyzes it, and generates the
+/// corresponding lists of global abbreviations to use in the
+/// generated (compressed) bitcode file.
+class NaClAnalyzeParser : public NaClBitcodeParser {
+ NaClAnalyzeParser(const NaClAnalyzeParser&)
+ LLVM_DELETED_FUNCTION;
+ void operator=(const NaClAnalyzeParser&)
+ LLVM_DELETED_FUNCTION;
+
+public:
+ // Creates the analysis parser, which will fill the given
+ // BlockAbbrevsMap with appropriate abbreviations, after
+ // analyzing the bitcode file defined by Cursor.
+ NaClAnalyzeParser(NaClBitstreamCursor &Cursor,
+ BlockAbbrevsMapType &BlockAbbrevsMap)
+ : NaClBitcodeParser(Cursor),
+ BlockAbbrevsMap(BlockAbbrevsMap),
+ BlockDist(&NaClCompressBlockDistElement::Sentinel),
+ AbbrevListener(this)
+ {
+ SetListener(&AbbrevListener);
+ }
+
+ virtual ~NaClAnalyzeParser() {}
+
+ virtual bool Error(const std::string &Message) {
+ // Use local error routine so that all errors are treated uniformly.
+ return ::Error(Message);
+ }
+
+ virtual bool ParseBlock(unsigned BlockID);
+
+ // Mapping from block ID's to the corresponding list of abbreviations
+ // associated with that block.
+ BlockAbbrevsMapType &BlockAbbrevsMap;
+
+ // Nested distribution capturing distribution of records in bitcode file.
+ NaClBitcodeBlockDist BlockDist;
+
+ // Listener used to get abbreviations as they are read.
+ NaClBitcodeParserListener AbbrevListener;
+};
+
+class NaClBlockAnalyzeParser : public NaClBitcodeParser {
+ NaClBlockAnalyzeParser(const NaClBlockAnalyzeParser&)
+ LLVM_DELETED_FUNCTION;
+ void operator=(NaClBlockAnalyzeParser&)
+ LLVM_DELETED_FUNCTION;
+
+public:
+ /// Top-level constructor to build the top-level block with the
+ /// given BlockID, and collect data (for compression) in that block.
+ NaClBlockAnalyzeParser(unsigned BlockID,
+ NaClAnalyzeParser *Context)
+ : NaClBitcodeParser(BlockID, Context), Context(Context) {
+ Init();
+ }
+
+ virtual ~NaClBlockAnalyzeParser() {
+ Context->BlockDist.AddBlock(GetBlock());
+ }
+
+protected:
+ /// Nested constructor to parse a block within a block. Creates a
+ /// block parser to parse a block with the given BlockID, and
+ /// collect data (for compression) in that block.
+ NaClBlockAnalyzeParser(unsigned BlockID,
+ NaClBlockAnalyzeParser *EnclosingParser)
+ : NaClBitcodeParser(BlockID, EnclosingParser),
+ Context(EnclosingParser->Context) {
+ Init();
+ }
+
+public:
+ virtual bool Error(const std::string &Message) {
+ // Use local error routine so that all errors are treated uniformly.
+ return ::Error(Message);
+ }
+
+ virtual bool ParseBlock(unsigned BlockID) {
+ NaClBlockAnalyzeParser Parser(BlockID, this);
+ return Parser.ParseThisBlock();
+ }
+
+ virtual void ProcessRecord() {
+ // Before processing the record, we need to rename the abbreviation
+ // index, so that we can look it up in the set of block abbreviations
+ // being defined.
+ if (Record.UsedAnAbbreviation()) {
+ unsigned AbbrevIndex = Record.GetAbbreviationIndex();
+ if (LocalAbbrevBitstreamToInternalMap.
+ DefinesBitstreamAbbrevIndex(AbbrevIndex)) {
+ Record.SetAbbreviationIndex(
+ LocalAbbrevBitstreamToInternalMap.
+ GetInternalAbbrevIndex(AbbrevIndex));
+ } else {
+ AbbrevBitstreamToInternalMap &GlobalAbbrevBitstreamToInternalMap =
+ GlobalBlockAbbrevs->GetGlobalAbbrevBitstreamToInternalMap();
+ if (GlobalAbbrevBitstreamToInternalMap.
+ DefinesBitstreamAbbrevIndex(AbbrevIndex)) {
+ Record.SetAbbreviationIndex(
+ GlobalAbbrevBitstreamToInternalMap.
+ GetInternalAbbrevIndex(AbbrevIndex));
+ } else {
+ report_fatal_error("Bad abbreviation index in file");
+ }
+ }
+ }
+
+ cast<NaClCompressBlockDistElement>(
+ Context->BlockDist.GetElement(Record.GetBlockID()))
+ ->GetAbbrevDist().AddRecord(Record);
+ }
+
+ virtual void ProcessAbbreviation(unsigned BlockID,
+ NaClBitCodeAbbrev *Abbrev,
+ bool IsLocal) {
+ int Index;
+ AddAbbreviation(BlockID, Abbrev->Simplify(), Index);
+ if (IsLocal) {
+ LocalAbbrevBitstreamToInternalMap.InstallNewBitstreamAbbrevIndex(Index);
+ } else {
+ GetGlobalAbbrevs(BlockID)->GetGlobalAbbrevBitstreamToInternalMap().
+ InstallNewBitstreamAbbrevIndex(Index);
+ }
+ }
+
+protected:
+ // The context (i.e. top-level) parser.
+ NaClAnalyzeParser *Context;
+
+ // The global block abbreviations associated with this block.
+ BlockAbbrevs *GlobalBlockAbbrevs;
+
+ // The local abbreviations associated with this block.
+ AbbrevBitstreamToInternalMap LocalAbbrevBitstreamToInternalMap;
+
+ /// Returns the set of (global) block abbreviations defined for the
+ /// given block ID.
+ BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) {
+ BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID];
+ if (Abbrevs == 0) {
+ Abbrevs = new BlockAbbrevs(BlockID);
+ Context->BlockAbbrevsMap[BlockID] = Abbrevs;
+ }
+ return Abbrevs;
+ }
+
+ void SetGlobalAbbrevs(unsigned BlockID, BlockAbbrevs *Abbrevs) {
+ Context->BlockAbbrevsMap[BlockID] = Abbrevs;
+ }
+
+ // Adds the abbreviation to the list of abbreviations for the given
+ // block.
+ void AddAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev,
+ int &Index) {
+ // Get block abbreviations.
+ BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(BlockID);
+
+ // Read abbreviation and add as a global abbreviation.
+ if (Abbrevs->AddAbbreviation(Abbrev, Index)
+ && TraceGeneratedAbbreviations) {
+ PrintAbbrev(errs(), BlockID, Abbrev);
+ }
+ }
+
+ /// Finds the index to the corresponding internal block abbreviation
+ /// for the given abbreviation.
+ int FindAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
+ return GetGlobalAbbrevs(BlockID)->FindAbbreviation(Abbrev);
+ }
+
+ void Init() {
+ GlobalBlockAbbrevs = GetGlobalAbbrevs(GetBlockID());
+ LocalAbbrevBitstreamToInternalMap.SetNextBitstreamAbbrevIndex(
+ GlobalBlockAbbrevs->
+ GetGlobalAbbrevBitstreamToInternalMap().GetNextBitstreamAbbrevIndex());
+ }
+};
+
+bool NaClAnalyzeParser::ParseBlock(unsigned BlockID) {
+ NaClBlockAnalyzeParser Parser(BlockID, this);
+ return Parser.ParseThisBlock();
+}
+
+/// Models the unrolling of an abbreviation into its sequence of
+/// individual operators. That is, unrolling arrays to match the width
+/// of the abbreviation.
+///
+/// For example, consider the abbreviation [Array(VBR(6))]. If the
+/// distribution map has data for records of size 3, and the
+/// distribution map suggests that a constant 4 appears as the second
+/// element in the record, it is nontrivial to figure out how to
+/// encorporate this into this abbrevation. Hence, we unroll the array
+/// (3 times) to get [VBR(6), VBR(6), VBR(6), Array(VBR(6))]. To
+/// update the second element to match the literal 4, we only need to
+/// replace the second element in the unrolled abbreviation resulting
+/// in [VBR(6), Lit(4), VBR(6), Array(VBR(6))].
+///
+/// After we have done appropriate substitutions, we can simplify the
+/// unrolled abbreviation by calling method Restore.
+///
+/// Note: We unroll in the form that best matches the distribution
+/// map. Hence, the code is stored as a separate operator. We also
+/// keep the array abbreviation op, for untracked elements within the
+/// distribution maps.
+class UnrolledAbbreviation {
+ void operator=(const UnrolledAbbreviation&) LLVM_DELETED_FUNCTION;
+public:
+ /// Unroll the given abbreviation, assuming it has the given size
+ /// (as specified in the distribution maps).
+ ///
+ /// If argument CanBeBigger is true, then we do not assume that we
+ /// can remove the trailing array when expanding, because the
+ /// actual size of the corresponding record using this abbreviation
+ /// may be bigger.
+ UnrolledAbbreviation(NaClBitCodeAbbrev *Abbrev, unsigned Size,
+ bool CanBeBigger = false)
+ : CodeOp(0) {
+ unsigned NextOp = 0;
+ UnrollAbbrevOp(CodeOp, Abbrev, NextOp);
+ --Size;
+ for (unsigned i = 0; i < Size; ++i) {
+ // Create slot and then fill with appropriate operator.
+ AbbrevOps.push_back(CodeOp);
+ UnrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp);
+ }
+ if (CanBeBigger) {
+ for (; NextOp < Abbrev->getNumOperandInfos(); ++NextOp) {
+ MoreOps.push_back(Abbrev->getOperandInfo(NextOp));
+ }
+ } else if (NextOp < Abbrev->getNumOperandInfos() &&
+ !Abbrev->getOperandInfo(NextOp).isArrayOp()) {
+ errs() << (Size+1) << ": ";
+ Abbrev->Print(errs());
+ llvm_unreachable("Malformed abbreviation/size pair");
+ }
+ }
+
+ explicit UnrolledAbbreviation(const UnrolledAbbreviation &Abbrev)
+ : CodeOp(Abbrev.CodeOp),
+ AbbrevOps(Abbrev.AbbrevOps),
+ MoreOps(Abbrev.MoreOps) {
+ }
+
+ /// Prints out the abbreviation modeled by the unrolled
+ /// abbreviation.
+ void Print(raw_ostream &Stream) const {
+ NaClBitCodeAbbrev *Abbrev = Restore(false);
+ Abbrev->Print(Stream);
+ Abbrev->dropRef();
+ }
+
+ /// Converts the unrolled abbreviation back into a regular
+ /// abbreviation. If Simplify is true, we simplify the
+ /// unrolled abbreviation as well.
+ NaClBitCodeAbbrev *Restore(bool Simplify=true) const {
+ NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();
+ Abbrev->Add(CodeOp);
+ for (std::vector<NaClBitCodeAbbrevOp>::const_iterator
+ Iter = AbbrevOps.begin(), IterEnd = AbbrevOps.end();
+ Iter != IterEnd; ++Iter) {
+ Abbrev->Add(*Iter);
+ }
+ for (std::vector<NaClBitCodeAbbrevOp>::const_iterator
+ Iter = MoreOps.begin(), IterEnd = MoreOps.end();
+ Iter != IterEnd; ++Iter) {
+ Abbrev->Add(*Iter);
+ }
+ if (Simplify) {
+ NaClBitCodeAbbrev *SimpAbbrev = Abbrev->Simplify();
+ Abbrev->dropRef();
+ return SimpAbbrev;
+ } else {
+ return Abbrev;
+ }
+ }
+
+ // The abbreviation used for the record code.
+ NaClBitCodeAbbrevOp CodeOp;
+
+ // The abbreviations used for each tracked value index.
+ std::vector<NaClBitCodeAbbrevOp> AbbrevOps;
+
+private:
+ // Any remaining abbreviation operators not part of the unrolling.
+ std::vector<NaClBitCodeAbbrevOp> MoreOps;
+
+ // Extracts out the next abbreviation operator from the abbreviation
+ // Abbrev, given the index NextOp, and assigns it to AbbrevOp
+ void UnrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp,
+ NaClBitCodeAbbrev *Abbrev,
+ unsigned &NextOp) {
+ assert(NextOp < Abbrev->getNumOperandInfos());
+ const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(NextOp);
+ if (Op.isArrayOp()) {
+ // Do not advance. The array operator assumes that all remaining
+ // elements should match its argument.
+ AbbrevOp = Abbrev->getOperandInfo(NextOp+1);
+ } else {
+ AbbrevOp = Op;
+ NextOp++;
+ }
+ }
+};
+
+/// Models a candidate block abbreviation, which is a blockID, and the
+/// corresponding abbreviation to be considered for addition. Note:
+/// Constructors and assignment take ownership of the abbreviation.
+class CandBlockAbbrev {
+public:
+ CandBlockAbbrev(unsigned BlockID, NaClBitCodeAbbrev *Abbrev)
+ : BlockID(BlockID), Abbrev(Abbrev) {
+ }
+
+ CandBlockAbbrev(const CandBlockAbbrev &BlockAbbrev)
+ : BlockID(BlockAbbrev.BlockID),
+ Abbrev(BlockAbbrev.Abbrev) {
+ Abbrev->addRef();
+ }
+
+ void operator=(const CandBlockAbbrev &BlockAbbrev) {
+ Abbrev->dropRef();
+ BlockID = BlockAbbrev.BlockID;
+ Abbrev = BlockAbbrev.Abbrev;
+ Abbrev->addRef();
+ }
+
+ ~CandBlockAbbrev() {
+ Abbrev->dropRef();
+ }
+
+ /// The block ID of the candidate abbreviation.
+ unsigned GetBlockID() const {
+ return BlockID;
+ }
+
+ /// The abbreviation of the candidate abbreviation.
+ const NaClBitCodeAbbrev *GetAbbrev() const {
+ return Abbrev;
+ }
+
+ /// orders this against the candidate abbreviation.
+ int Compare(const CandBlockAbbrev &CandAbbrev) const {
+ unsigned diff = BlockID - CandAbbrev.BlockID;
+ if (diff) return diff;
+ return Abbrev->Compare(*CandAbbrev.Abbrev);
+ }
+
+ /// Prints the candidate abbreviation to the given stream.
+ void Print(raw_ostream &Stream) const {
+ PrintAbbrev(Stream, BlockID, Abbrev);
+ }
+
+private:
+ // The block the abbreviation applies to.
+ unsigned BlockID;
+ // The candidate abbreviation.
+ NaClBitCodeAbbrev *Abbrev;
+};
+
+static inline bool operator<(const CandBlockAbbrev &A1,
+ const CandBlockAbbrev &A2) {
+ return A1.Compare(A2) < 0;
+}
+
+/// Models the set of candidate abbreviations being considered, and
+/// the number of abbreviations associated with each candidate
+/// Abbreviation.
+///
+/// Note: Because we may have abbreviation refinements of A->B->C and
+/// A->D->C, we need to accumulate instance counts in such cases.
+class CandidateAbbrevs {
+public:
+ // Map from candidate abbreviations to the corresponding number of
+ // instances.
+ typedef std::map<CandBlockAbbrev, unsigned> AbbrevCountMap;
+ typedef AbbrevCountMap::const_iterator const_iterator;
+
+ /// Creates an empty set of candidate abbreviations, to be
+ /// (potentially) added to the given set of block abbreviations.
+ /// Argument is the (global) block abbreviations map, which is
+ /// used to determine if it already exists.
+ CandidateAbbrevs(BlockAbbrevsMapType &BlockAbbrevsMap)
+ : BlockAbbrevsMap(BlockAbbrevsMap)
+ {}
+
+ /// Adds the given (unrolled) abbreviation as a candidate
+ /// abbreviation to the given block. NumInstances is the number of
+ /// instances expected to use this candidate abbreviation. Returns
+ /// true if the corresponding candidate abbreviation is added to this
+ /// set of candidate abbreviations.
+ bool Add(unsigned BlockID,
+ UnrolledAbbreviation &UnrolledAbbrev,
+ unsigned NumInstances);
+
+ /// Returns the list of candidate abbreviations in this set.
+ const AbbrevCountMap &GetAbbrevsMap() const {
+ return AbbrevsMap;
+ }
+
+ /// Prints out the current contents of this set.
+ void Print(raw_ostream &Stream, const char *Title = "Candidates") const {
+ Stream << "-- " << Title << ": \n";
+ for (const_iterator Iter = AbbrevsMap.begin(), IterEnd = AbbrevsMap.end();
+ Iter != IterEnd; ++Iter) {
+ Stream << format("%12u", Iter->second) << ": ";
+ Iter->first.Print(Stream);
+ }
+ Stream << "--\n";
+ }
+
+private:
+ // The set of abbreviations and corresponding number instances.
+ AbbrevCountMap AbbrevsMap;
+
+ // The map of (global) abbreviations already associated with each block.
+ BlockAbbrevsMapType &BlockAbbrevsMap;
+};
+
+bool CandidateAbbrevs::Add(unsigned BlockID,
+ UnrolledAbbreviation &UnrolledAbbrev,
+ unsigned NumInstances) {
+ // Drop if it corresponds to an existing global abbreviation.
+ NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.Restore();
+ if (BlockAbbrevs* Abbrevs = BlockAbbrevsMap[BlockID]) {
+ if (Abbrevs->FindAbbreviation(Abbrev) !=
+ BlockAbbrevs::NO_SUCH_ABBREVIATION) {
+ Abbrev->dropRef();
+ return false;
+ }
+ }
+
+ CandBlockAbbrev CandAbbrev(BlockID, Abbrev);
+ AbbrevCountMap::iterator Pos = AbbrevsMap.find(CandAbbrev);
+ if (Pos == AbbrevsMap.end()) {
+ AbbrevsMap[CandAbbrev] = NumInstances;
+ } else {
+ Pos->second += NumInstances;
+ }
+ return true;
+}
+
+// Look for new abbreviations in block BlockID, considering it was
+// read with the given abbreviation Abbrev, and considering changing
+// the abbreviation opererator for value Index. ValueDist is how
+// values at Index are distributed. Any found abbreviations are added
+// to the candidate abbreviations CandAbbrevs. Returns true only if we
+// have added new candidate abbreviations to CandAbbrevs.
+static bool AddNewAbbreviations(unsigned BlockID,
+ const UnrolledAbbreviation &Abbrev,
+ unsigned Index,
+ NaClBitcodeValueDist &ValueDist,
+ CandidateAbbrevs &CandAbbrevs) {
+ // TODO(kschimpf): Add code to try and find a better encoding for
+ // the values, based on the distribution.
+
+ // If this index is already a literal abbreviation, no improvements can
+ // be made.
+ const NaClBitCodeAbbrevOp Op = Abbrev.AbbrevOps[Index];
+ if (Op.isLiteral()) return false;
+
+ // Search based on sorted distribution, which sorts by number of
+ // instances. Start by trying to find possible constants to use.
+ const NaClBitcodeDist::Distribution *
+ Distribution = ValueDist.GetDistribution();
+ for (NaClBitcodeDist::Distribution::const_iterator
+ Iter = Distribution->begin(), IterEnd = Distribution->end();
+ Iter != IterEnd; ++Iter) {
+ NaClValueRangeType Range = GetNaClValueRange(Iter->second);
+ if (Range.first != Range.second) continue; // not a constant.
+
+ // Defines a constant. Try as new candidate range. In addition,
+ // don't try any more constant values, since this is the one with
+ // the greatest number of instances.
+ NaClBitcodeDistElement *Elmt = ValueDist.at(Range.first);
+ UnrolledAbbreviation CandAbbrev(Abbrev);
+ CandAbbrev.AbbrevOps[Index] = NaClBitCodeAbbrevOp(Range.first);
+ return CandAbbrevs.Add(BlockID, CandAbbrev, Elmt->GetNumInstances());
+ }
+ return false;
+}
+
+// Look for new abbreviations in block BlockID, considering it was
+// read with the given abbreviation Abbrev. IndexDist is the
+// corresponding distribution of value indices associated with the
+// abbreviation. Any found abbreviations are added to the candidate
+// abbreviations CandAbbrevs.
+static void AddNewAbbreviations(unsigned BlockID,
+ const UnrolledAbbreviation &Abbrev,
+ NaClBitcodeDist &IndexDist,
+ CandidateAbbrevs &CandAbbrevs) {
+ // Search based on sorted distribution, which sorts based on heuristic
+ // of best index to fix first.
+ const NaClBitcodeDist::Distribution *
+ IndexDistribution = IndexDist.GetDistribution();
+ for (NaClBitcodeDist::Distribution::const_iterator
+ IndexIter = IndexDistribution->begin(),
+ IndexIterEnd = IndexDistribution->end();
+ IndexIter != IndexIterEnd; ++IndexIter) {
+ unsigned Index = static_cast<unsigned>(IndexIter->second);
+ if (AddNewAbbreviations(
+ BlockID, Abbrev, Index,
+ cast<NaClBitcodeValueIndexDistElement>(IndexDist.at(Index))
+ ->GetValueDist(),
+ CandAbbrevs)) {
+ return;
+ }
+ }
+}
+
+// Look for new abbreviations in block BlockID, considering it was
+// read with the given abbreviation Abbrev, and the given record Code.
+// SizeDist is the corresponding distribution of sizes associated with
+// the abbreviation. Any found abbreviations are added to the
+// candidate abbreviations CandAbbrevs.
+static void AddNewAbbreviations(unsigned BlockID,
+ NaClBitCodeAbbrev *Abbrev,
+ unsigned Code,
+ NaClBitcodeDist &SizeDist,
+ CandidateAbbrevs &CandAbbrevs) {
+ const NaClBitcodeDist::Distribution *
+ SizeDistribution = SizeDist.GetDistribution();
+ for (NaClBitcodeDist::Distribution::const_iterator
+ SizeIter = SizeDistribution->begin(),
+ SizeIterEnd = SizeDistribution->end();
+ SizeIter != SizeIterEnd; ++SizeIter) {
+ unsigned Size = static_cast<unsigned>(SizeIter->second);
+ UnrolledAbbreviation UnrolledAbbrev(Abbrev, Size+1 /* Add code! */,
+ Size >= NaClValueIndexCutoff);
+ if (!UnrolledAbbrev.CodeOp.isLiteral()) {
+ // Try making the code a literal.
+ UnrolledAbbreviation CandAbbrev(UnrolledAbbrev);
+ CandAbbrev.CodeOp = NaClBitCodeAbbrevOp(Code);
+ CandAbbrevs.Add(BlockID, CandAbbrev,
+ SizeDist.at(Size)->GetNumInstances());
+ }
+ // Now process value indices to find candidate abbreviations.
+ AddNewAbbreviations(
+ BlockID, UnrolledAbbrev,
+ cast<NaClBitcodeSizeDistElement>(SizeDist.at(Size))
+ ->GetValueIndexDist(),
+ CandAbbrevs);
+ }
+}
+
+// Look for new abbreviations in block BlockID. Abbrevs is the map of
+// read (globally defined) abbreviations associated with the
+// BlockID. AbbrevDist is the distribution map of abbreviations
+// associated with BlockID. Any found abbreviations are added to the
+// candidate abbreviations CandAbbrevs.
+static void AddNewAbbreviations(unsigned BlockID,
+ BlockAbbrevs *Abbrevs,
+ NaClBitcodeDist &AbbrevDist,
+ CandidateAbbrevs &CandAbbrevs) {
+ const NaClBitcodeDist::Distribution *
+ AbbrevDistribution = AbbrevDist.GetDistribution();
+ for (NaClBitcodeDist::Distribution::const_iterator
+ AbbrevIter = AbbrevDistribution->begin(),
+ AbbrevIterEnd = AbbrevDistribution->end();
+ AbbrevIter != AbbrevIterEnd; ++AbbrevIter) {
+ NaClBitcodeDistValue AbbrevIndex = AbbrevIter->second;
+ NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(AbbrevIndex);
+ NaClBitcodeAbbrevDistElement *AbbrevElmt =
+ cast<NaClBitcodeAbbrevDistElement>(AbbrevDist.at(AbbrevIndex));
+ NaClBitcodeDist &CodeDist = AbbrevElmt->GetCodeDist();
+
+ const NaClBitcodeDist::Distribution *
+ CodeDistribution = CodeDist.GetDistribution();
+ for (NaClBitcodeDist::Distribution::const_iterator
+ CodeIter = CodeDistribution->begin(),
+ CodeIterEnd = CodeDistribution->end();
+ CodeIter != CodeIterEnd; ++CodeIter) {
+ unsigned Code = static_cast<unsigned>(CodeIter->second);
+ AddNewAbbreviations(
+ BlockID,
+ Abbrev,
+ Code,
+ cast<NaClCompressCodeDistElement>(CodeDist.at(CodeIter->second))
+ ->GetSizeDist(),
+ CandAbbrevs);
+ }
+ }
+}
+
+typedef std::pair<unsigned, CandBlockAbbrev> CountedAbbrevType;
+
+// Look for new abbreviations in the given block distribution map
+// BlockDist. BlockAbbrevsMap contains the set of read global
+// abbreviations. Adds found candidate abbreviations to the set of
+// known global abbreviations.
+static void AddNewAbbreviations(NaClBitcodeBlockDist &BlockDist,
+ BlockAbbrevsMapType &BlockAbbrevsMap) {
+ CandidateAbbrevs CandAbbrevs(BlockAbbrevsMap);
+ // Start by collecting candidate abbreviations.
+ const NaClBitcodeDist::Distribution *
+ Distribution = BlockDist.GetDistribution();
+ for (NaClBitcodeDist::Distribution::const_iterator
+ Iter = Distribution->begin(), IterEnd = Distribution->end();
+ Iter != IterEnd; ++Iter) {
+ unsigned BlockID = static_cast<unsigned>(Iter->second);
+ AddNewAbbreviations(
+ BlockID,
+ BlockAbbrevsMap[BlockID],
+ cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))
+ ->GetAbbrevDist(),
+ CandAbbrevs);
+ }
+ // Install candidate abbreviations.
+ //
+ // Sort the candidate abbreviations by number of instances, so that
+ // if multiple abbreviations apply, the one with the largest number
+ // of instances will be chosen when compressing a file.
+ //
+ // For example, we may have just generated two abbreviations. The
+ // first has replaced the 3rd element with the constant 4 while the
+ // second replaced the 4th element with the constant 5. The first
+ // abbreviation can apply to 300 records while the second can apply
+ // to 1000 records. Assuming that both abbreviations shrink the
+ // record by the same number of bits, we clearly want the tool to
+ // choose the second abbreviation when selecting the abbreviation
+ // index to choose (via method GetRecordAbbrevIndex).
+ //
+ // Selecting the second is important in that abbreviation are
+ // refined by successive calls to this tool. We do not want to
+ // restrict downstream refinements prematurely. Hence, we want the
+ // tool to choose the abbreviation with the most candidates first.
+ //
+ // Since method GetRecordAbbrevIndex chooses the first abbreviation
+ // that generates the least number of bits, we clearly want to make
+ // sure that the second abbreviation occurs before the first.
+ std::vector<CountedAbbrevType> Candidates;
+ for (CandidateAbbrevs::const_iterator
+ Iter = CandAbbrevs.GetAbbrevsMap().begin(),
+ IterEnd = CandAbbrevs.GetAbbrevsMap().end();
+ Iter != IterEnd; ++Iter) {
+ Candidates.push_back(CountedAbbrevType(Iter->second,Iter->first));
+ }
+ std::sort(Candidates.begin(), Candidates.end());
+ std::vector<CountedAbbrevType>::const_reverse_iterator
+ Iter = Candidates.rbegin(), IterEnd = Candidates.rend();
+ if (Iter == IterEnd) return;
+
+ if (TraceGeneratedAbbreviations) {
+ errs() << "-- New abbrevations:\n";
+ }
+ unsigned Min = (Iter->first >> 2);
+ for (; Iter != IterEnd; ++Iter) {
+ if (Iter->first < Min) break;
+ unsigned BlockID = Iter->second.GetBlockID();
+ BlockAbbrevs *Abbrevs = BlockAbbrevsMap[BlockID];
+ NaClBitCodeAbbrev *Abbrev = Iter->second.GetAbbrev()->Copy();
+ if (TraceGeneratedAbbreviations) {
+ errs() <<format("%12u", Iter->first) << ": ";
+ PrintAbbrev(errs(), BlockID, Abbrev);
+ }
+ Abbrevs->AddAbbreviation(Abbrev);
+ }
+ if (TraceGeneratedAbbreviations) {
+ errs() << "--\n";
+ }
+}
+
+// Walks the block distribution (BlockDist), sorting entries based
+// on the distribution of blocks and abbreviations, and then
+// prints out the frequency of each abbreviation used.
+static void DisplayAbbreviationFrequencies(
+ raw_ostream &Output,
+ const NaClBitcodeBlockDist &BlockDist,
+ const BlockAbbrevsMapType &BlockAbbrevsMap) {
+ const NaClBitcodeDist::Distribution *BlockDistribution =
+ BlockDist.GetDistribution();
+ for (NaClBitcodeDist::Distribution::const_iterator
+ BlockIter = BlockDistribution->begin(),
+ BlockIterEnd = BlockDistribution->end();
+ BlockIter != BlockIterEnd; ++BlockIter) {
+ unsigned BlockID = static_cast<unsigned>(BlockIter->second);
+ BlockAbbrevsMapType::const_iterator BlockPos = BlockAbbrevsMap.find(BlockID);
+ if (BlockPos == BlockAbbrevsMap.end()) continue;
+ Output << "Block " << BlockID << "\n";
+ if (NaClCompressBlockDistElement *BlockElement =
+ dyn_cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))) {
+ NaClBitcodeDist &AbbrevDist = BlockElement->GetAbbrevDist();
+ const NaClBitcodeDist::Distribution *AbbrevDistribution =
+ AbbrevDist.GetDistribution();
+ unsigned Total = AbbrevDist.GetTotal();
+ for (NaClBitcodeDist::Distribution::const_iterator
+ AbbrevIter = AbbrevDistribution->begin(),
+ AbbrevIterEnd = AbbrevDistribution->end();
+ AbbrevIter != AbbrevIterEnd; ++AbbrevIter) {
+ unsigned Index = static_cast<unsigned>(AbbrevIter->second);
+ unsigned Count = AbbrevDist.at(Index)->GetNumInstances();
+ Output << format("%8u (%6.2f%%): ", Count,
+ (double) Count/Total*100.0);
+ BlockPos->second->GetIndexedAbbrev(Index)->Print(Output);
+ }
+ }
+ Output << '\n';
+ }
+}
+
+// Read in bitcode, analyze data, and figure out set of abbreviations
+// to use, from memory buffer MemBuf containing the input bitcode file.
+static bool AnalyzeBitcode(std::unique_ptr<MemoryBuffer> &MemBuf,
+ BlockAbbrevsMapType &BlockAbbrevsMap) {
+ // TODO(kschimpf): The current code only extracts abbreviations
+ // defined in the bitcode file. This code needs to be updated to
+ // collect data distributions and figure out better (global)
+ // abbreviations to use.
+
+ const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
+ const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
+
+ // First read header and verify it is good.
+ NaClBitcodeHeader Header;
+ if (Header.Read(BufPtr, EndBufPtr) || !Header.IsSupported())
+ return Error("Invalid PNaCl bitcode header");
+
+ // Create a bitstream reader to read the bitcode file.
+ NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
+ NaClBitstreamCursor Stream(StreamFile);
+
+ // Parse the the bitcode file.
+ NaClAnalyzeParser Parser(Stream, BlockAbbrevsMap);
+ while (!Stream.AtEndOfStream()) {
+ if (Parser.Parse()) return true;
+ }
+
+ if (ShowAbbreviationFrequencies || ShowValueDistributions) {
+ std::error_code EC;
+ raw_fd_ostream Output(OutputFilename, EC, sys::fs::F_None);
+ if (EC) {
+ errs() << EC.message() << "\n";
+ exit(1);
+ }
+ if (ShowAbbreviationFrequencies)
+ DisplayAbbreviationFrequencies(Output, Parser.BlockDist, BlockAbbrevsMap);
+ if (ShowValueDistributions)
+ Parser.BlockDist.Print(Output);
+ }
+
+ AddNewAbbreviations(Parser.BlockDist, BlockAbbrevsMap);
+ return false;
+}
+
+/// Parses the input bitcode file and generates the corresponding
+/// compressed bitcode file, by replacing abbreviations in the input
+/// file with the corresponding abbreviations defined in
+/// BlockAbbrevsMap.
+class NaClBitcodeCopyParser : public NaClBitcodeParser {
+public:
+ // Top-level constructor to build the appropriate block parser
+ // using the given BlockAbbrevsMap to define abbreviations.
+ NaClBitcodeCopyParser(NaClBitstreamCursor &Cursor,
+ BlockAbbrevsMapType &BlockAbbrevsMap,
+ NaClBitstreamWriter &Writer)
+ : NaClBitcodeParser(Cursor),
+ BlockAbbrevsMap(BlockAbbrevsMap),
+ Writer(Writer),
+ FoundFirstBlockInfo(false)
+ {}
+
+ virtual ~NaClBitcodeCopyParser() {}
+
+ virtual bool Error(const std::string &Message) {
+ // Use local error routine so that all errors are treated uniformly.
+ return ::Error(Message);
+ }
+
+ virtual bool ParseBlock(unsigned BlockID);
+
+ // The abbreviations to use for the copied bitcode.
+ BlockAbbrevsMapType &BlockAbbrevsMap;
+
+ // The bitstream to copy the compressed bitcode into.
+ NaClBitstreamWriter &Writer;
+
+ // True if we have already found the first block info block.
+ // Used to make sure we don't use abbreviations until we
+ // have put them into the bitcode file.
+ bool FoundFirstBlockInfo;
+};
+
+class NaClBlockCopyParser : public NaClBitcodeParser {
+public:
+ // Top-level constructor to build the appropriate block parser.
+ NaClBlockCopyParser(unsigned BlockID,
+ NaClBitcodeCopyParser *Context)
+ : NaClBitcodeParser(BlockID, Context),
+ Context(Context),
+ BlockAbbreviations(0)
+ {}
+
+ virtual ~NaClBlockCopyParser() {}
+
+protected:
+ // The context defining state associated with the block parser.
+ NaClBitcodeCopyParser *Context;
+
+ // The block abbreviations defined for this block (initialized by
+ // EnterBlock).
+ BlockAbbrevs *BlockAbbreviations;
+
+ /// Constructor to parse nested blocks. Creates a block parser to
+ /// parse in a block with the given BlockID, and write the block
+ /// back out using the abbreviations in BlockAbbrevsMap.
+ NaClBlockCopyParser(unsigned BlockID,
+ NaClBlockCopyParser *EnclosingParser)
+ : NaClBitcodeParser(BlockID, EnclosingParser),
+ Context(EnclosingParser->Context)
+ {}
+
+ virtual bool Error(const std::string &Message) {
+ // Use local error routine so that all errors are treated uniformly.
+ return ::Error(Message);
+ }
+
+ /// Returns the set of (global) block abbreviations defined for the
+ /// given block ID.
+ BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) {
+ BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID];
+ if (Abbrevs == 0) {
+ Abbrevs = new BlockAbbrevs(BlockID);
+ Context->BlockAbbrevsMap[BlockID] = Abbrevs;
+ }
+ return Abbrevs;
+ }
+
+ virtual bool ParseBlock(unsigned BlockID) {
+ NaClBlockCopyParser Parser(BlockID, this);
+ return Parser.ParseThisBlock();
+ }
+
+ virtual void EnterBlock(unsigned NumWords) {
+ unsigned BlockID = GetBlockID();
+ BlockAbbreviations = GetGlobalAbbrevs(BlockID);
+
+ // Enter the subblock.
+ NaClBitcodeSelectorAbbrev
+ Selector(BlockAbbreviations->GetNumberAbbreviations()-1);
+ if (RemoveAbbreviations) Selector = NaClBitcodeSelectorAbbrev();
+ Context->Writer.EnterSubblock(BlockID, Selector);
+
+ // Note: We must dump module abbreviations as local
+ // abbreviations, because they are in a yet to be
+ // dumped BlockInfoBlock.
+ if (!RemoveAbbreviations && BlockID == naclbitc::MODULE_BLOCK_ID) {
+ BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(naclbitc::MODULE_BLOCK_ID);
+ for (unsigned i = 0; i < Abbrevs->GetNumberAbbreviations(); ++i) {
+ Context->Writer.EmitAbbrev(Abbrevs->GetIndexedAbbrev(i)->Copy());
+ }
+ }
+ }
+
+ virtual void ExitBlock() {
+ Context->Writer.ExitBlock();
+ }
+
+ virtual void ProcessBlockInfo() {
+ assert(!Context->FoundFirstBlockInfo &&
+ "Input bitcode has more that one BlockInfoBlock");
+ Context->FoundFirstBlockInfo = true;
+
+ // Generate global abbreviations within a blockinfo block.
+ Context->Writer.EnterBlockInfoBlock();
+ if (!RemoveAbbreviations) {
+ for (BlockAbbrevsMapType::const_iterator
+ Iter = Context->BlockAbbrevsMap.begin(),
+ IterEnd = Context->BlockAbbrevsMap.end();
+ Iter != IterEnd; ++Iter) {
+ unsigned BlockID = Iter->first;
+ // Don't emit module abbreviations, since they have been
+ // emitted as local abbreviations.
+ if (BlockID == naclbitc::MODULE_BLOCK_ID) continue;
+
+ BlockAbbrevs *Abbrevs = Iter->second;
+ if (Abbrevs == 0) continue;
+ for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation();
+ i < Abbrevs->GetNumberAbbreviations(); ++i) {
+ NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(i);
+ Context->Writer.EmitBlockInfoAbbrev(BlockID, Abbrev);
+ }
+ }
+ }
+ Context->Writer.ExitBlock();
+ }
+
+ virtual void ProcessRecord() {
+ const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
+ if (RemoveAbbreviations) {
+ Context->Writer.EmitRecord(Record.GetCode(), Values, 0);
+ return;
+ }
+ // Find best fitting abbreviation to use, and print out the record
+ // using that abbreviations.
+ unsigned AbbrevIndex =
+ BlockAbbreviations->GetRecordAbbrevIndex(Record.GetRecordData());
+ if (AbbrevIndex == naclbitc::UNABBREV_RECORD) {
+ Context->Writer.EmitRecord(Record.GetCode(), Values, 0);
+ } else {
+ Context->Writer.EmitRecord(Record.GetCode(), Values, AbbrevIndex);
+ }
+ }
+};
+
+bool NaClBitcodeCopyParser::ParseBlock(unsigned BlockID) {
+ NaClBlockCopyParser Parser(BlockID, this);
+ return Parser.ParseThisBlock();
+}
+
+// Read in bitcode, and write it back out using the abbreviations in
+// BlockAbbrevsMap, from memory buffer MemBuf containing the input
+// bitcode file.
+static bool CopyBitcode(std::unique_ptr<MemoryBuffer> &MemBuf,
+ BlockAbbrevsMapType &BlockAbbrevsMap) {
+
+ const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
+ const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
+
+ // Read header. No verification is needed since AnalyzeBitcode has
+ // already checked it.
+ NaClBitcodeHeader Header;
+ if (Header.Read(BufPtr, EndBufPtr))
+ return Error("Invalid PNaCl bitcode header");
+
+ // Create the bitcode reader.
+ NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
+ NaClBitstreamCursor Stream(StreamFile);
+
+ // Create the bitcode writer.
+ SmallVector<char, 0> OutputBuffer;
+ OutputBuffer.reserve(256*1024);
+ NaClBitstreamWriter StreamWriter(OutputBuffer);
+
+ // Emit the file header.
+ NaClWriteHeader(Header, StreamWriter);
+
+ // Set up the parser.
+ NaClBitcodeCopyParser Parser(Stream, BlockAbbrevsMap, StreamWriter);
+
+ // Parse the bitcode and copy.
+ while (!Stream.AtEndOfStream()) {
+ if (Parser.Parse()) return true;
+ }
+
+ // Write out the copied results.
+ std::error_code EC;
+ std::unique_ptr<tool_output_file> OutFile(
+ new tool_output_file(OutputFilename.c_str(), EC, sys::fs::F_None));
+ if (EC)
+ return Error(EC.message());
+
+ // Write the generated bitstream to "Out".
+ OutFile->os().write((char*)&OutputBuffer.front(),
+ OutputBuffer.size());
+ OutFile->keep();
+
+ return false;
+}
+
+// Build fast lookup abbreviation maps for each of the abbreviation blocks
+// defined in AbbrevsMap.
+static void BuildAbbrevLookupMaps(BlockAbbrevsMapType &AbbrevsMap) {
+ for (BlockAbbrevsMapType::const_iterator
+ Iter = AbbrevsMap.begin(),
+ IterEnd = AbbrevsMap.end();
+ Iter != IterEnd; ++Iter) {
+ Iter->second->BuildAbbrevLookupSizeMap();
+ }
+}
+
+} // namespace
+
+int main(int argc, char **argv) {
+ // Print a stack trace if we signal out.
+ sys::PrintStackTraceOnErrorSignal();
+ PrettyStackTraceProgram X(argc, argv);
+ llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
+ cl::ParseCommandLineOptions(argc, argv, "pnacl-bccompress file analyzer\n");
+
+ std::unique_ptr<MemoryBuffer> MemBuf;
+ if (ReadAndBuffer(MemBuf)) return 1;
+ BlockAbbrevsMapType BlockAbbrevsMap;
+ if (AnalyzeBitcode(MemBuf, BlockAbbrevsMap)) return 1;
+ if (ShowAbbreviationFrequencies || ShowValueDistributions) {
+ return 0;
+ }
+ BuildAbbrevLookupMaps(BlockAbbrevsMap);
+ if (CopyBitcode(MemBuf, BlockAbbrevsMap)) return 1;
+ return 0;
+}
« no previous file with comments | « tools/pnacl-bccompress/Makefile ('k') | tools/pnacl-bcdis/CMakeLists.txt » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698