tools/pnacl-bccompress/pnacl-bccompress.cpp - Issue 939073008: Rebased PNaCl localmods in LLVM to 223109

Unified Diff: tools/pnacl-bccompress/pnacl-bccompress.cpp

Issue 939073008: Rebased PNaCl localmods in LLVM to 223109 (Closed)

Patch Set: undo localmod Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/pnacl-bccompress/pnacl-bccompress.cpp

diff --git a/tools/pnacl-bccompress/pnacl-bccompress.cpp b/tools/pnacl-bccompress/pnacl-bccompress.cpp

new file mode 100644

index 0000000000000000000000000000000000000000..193a81f13352fafff5f99c25cc507039b98b7d45

--- /dev/null

+++ b/tools/pnacl-bccompress/pnacl-bccompress.cpp

@@ -0,0 +1,1517 @@

+//===-- pnacl-bccompress.cpp - Bitcode (abbrev) compression ---------------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This tool may be invoked in the following manner:

+// pnacl-bccompress [options] bcin.pexe -o bcout.pexe

+// - Read frozen PNaCl bitcode from the bcin.pexe and introduce

+// abbreviations to compress it into bcout.pexe.

+//

+// Options:

+// --help - Output information about command line switches

+//

+// This tool analyzes the data in bcin.pexe, and determines what

+// abbreviations can be added to compress the bitcode file. The result

+// is written to bcout.pexe.

+//

+// A bitcode file has two types of abbreviations. The first are Global

+// abbreviations that apply to all instances of a particular type of

+// block. These abbreviations appear in the BlockInfo block of the

+// bitcode file.

+//

+// The second type of abbreviations are local to a particular instance

+// of a block.

+//

+// In pnacl-bccompress, for simplicity, we will only add global

+// abbreviations. Local abbreviations are converted to corresponding

+// global abbreviations, so that they can be added as global

+// abbreviations.

+//

+// The process of compressing is done by reading the input file

+// twice. In the first round, the records are read and analyzed,

+// generating a set of (global) abbreviations to use to generate the

+// compressed output file. Then, the input file is read again, and for

+// each record, the best fitting global abbreviation is found (or it

+// figures out that leaving the record unabbreviated is best) and

+// writes the record out accordingly.

+//

+// TODO(kschimpf): The current implementation does a trivial solution

+// for the first round. It just converts all abbreviations (local and

+// global) into global abbreviations. Future refinements of this file

+// will generate better (and more intelligent) global abbreviations,

+// based on the records found on the first read of the bitcode file.

+//

+//===----------------------------------------------------------------------===//

+#include "llvm/ADT/DenseMap.h"

+#include "llvm/ADT/DenseSet.h"

+#include "llvm/ADT/SmallVector.h"

+#include "llvm/Bitcode/NaCl/AbbrevTrieNode.h"

+#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"

+#include "llvm/Bitcode/NaCl/NaClBitcodeParser.h"

+#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"

+#include "llvm/Bitcode/NaCl/NaClBitstreamWriter.h"

+#include "llvm/Bitcode/NaCl/NaClCompressBlockDist.h"

+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"

+#include "llvm/Support/CommandLine.h"

+#include "llvm/Support/FileSystem.h"

+#include "llvm/Support/Format.h"

+#include "llvm/Support/ManagedStatic.h"

+#include "llvm/Support/MemoryBuffer.h"

+#include "llvm/Support/PrettyStackTrace.h"

+#include "llvm/Support/Signals.h"

+#include "llvm/Support/ToolOutputFile.h"

+#include "llvm/Support/raw_ostream.h"

+#include <set>

+#include <map>

+#include <system_error>

+namespace {

+using namespace llvm;

+static cl::opt<bool>

+TraceGeneratedAbbreviations(

+ "abbreviations",

+ cl::desc("Trace abbreviations added to compressed file"),

+ cl::init(false));

+static cl::opt<std::string>

+ InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));

+static cl::opt<std::string>

+OutputFilename("o", cl::desc("Specify output filename"),

+ cl::value_desc("filename"), cl::init("-"));

+static cl::opt<bool>

+ShowValueDistributions(

+ "show-distributions",

+ cl::desc("Show collected value distributions in bitcode records. "

+ "Turns off compression."),

+ cl::init(false));

+static cl::opt<bool>

+ShowAbbrevLookupTries(

+ "show-lookup-tries",

+ cl::desc("Show lookup tries used to minimize search for \n"

+ "matching abbreviations."),

+ cl::init(false));

+static cl::opt<bool>

+ShowAbbreviationFrequencies(

+ "show-abbreviation-frequencies",

+ cl::desc("Show how often each abbreviation is used. "

+ "Turns off compression."),

+ cl::init(false));

+// Note: When this flag is true, we still generate new abbreviations,

+// because we don't want to add the complexity of turning it off.

+// Rather, we simply make sure abbreviations are ignored when writing

+// out the final copy.

+static cl::opt<bool>

+RemoveAbbreviations(

+ "remove-abbreviations",

+ cl::desc("Remove abbreviations from input bitcode file."),

+ cl::init(false));

+/// Error - All bitcode analysis errors go through this function,

+/// making this a good place to breakpoint if debugging.

+static bool Error(const std::string &Err) {

+ errs() << Err << "\n";

+ return true;

+// Prints out the abbreviation in readable form to the given Stream.

+static void PrintAbbrev(raw_ostream &Stream,

+ unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {

+ Stream << "Abbrev(block " << BlockID << "): ";

+ Abbrev->Print(Stream);

+// Reads the input file into the given buffer.

+static bool ReadAndBuffer(std::unique_ptr<MemoryBuffer> &MemBuf) {

+ ErrorOr<std::unique_ptr<MemoryBuffer>> ErrOrFile =

+ MemoryBuffer::getFileOrSTDIN(InputFilename);

+ if (std::error_code EC = ErrOrFile.getError())

+ return Error("Error reading '" + InputFilename + "': " + EC.message());

+ MemBuf.reset(ErrOrFile.get().release());

+ if (MemBuf->getBufferSize() % 4 != 0)

+ return Error("Bitcode stream should be a multiple of 4 bytes in length");

+ return false;

+/// type map from bitstream abbreviation index to corresponding

+/// internal abbreviation index.

+typedef std::map<unsigned, unsigned> BitstreamToInternalAbbrevMapType;

+/// Defines a mapping from bitstream abbreviation indices, to

+/// corresponding internal abbreviation indices.

+class AbbrevBitstreamToInternalMap {

+public:

+ AbbrevBitstreamToInternalMap()

+ : NextBitstreamAbbrevIndex(0) {}

+ /// Returns the bitstream abbreviaion index that will be associated

+ /// with the next internal abbreviation index.

+ unsigned GetNextBitstreamAbbrevIndex() {

+ return NextBitstreamAbbrevIndex;

+ }

+ /// Changes the next bitstream abbreviation index to the given value.

+ void SetNextBitstreamAbbrevIndex(unsigned NextIndex) {

+ NextBitstreamAbbrevIndex = NextIndex;

+ }

+ /// Returns true if there is an internal abbreviation index for the

+ /// given bitstream abbreviation.

+ bool DefinesBitstreamAbbrevIndex(unsigned Index) {

+ return BitstreamToInternalAbbrevMap.find(Index) !=

+ BitstreamToInternalAbbrevMap.end();

+ }

+ /// Returns the internal abbreviation index for the given bitstream

+ /// abbreviation index.

+ unsigned GetInternalAbbrevIndex(unsigned Index) {

+ return BitstreamToInternalAbbrevMap.at(Index);

+ }

+ /// Installs the given internal abbreviation index using the next

+ /// available bitstream abbreviation index.

+ void InstallNewBitstreamAbbrevIndex(unsigned InternalIndex) {

+ BitstreamToInternalAbbrevMap[NextBitstreamAbbrevIndex++] = InternalIndex;

+ }

+private:

+ // The index of the next bitstream abbreviation to be defined.

+ unsigned NextBitstreamAbbrevIndex;

+ // Map from bitstream abbreviation index to corresponding internal

+ // abbreviation index.

+ BitstreamToInternalAbbrevMapType BitstreamToInternalAbbrevMap;

+};

+/// Defines the list of abbreviations associated with a block.

+class BlockAbbrevs {

+public:

+ // Vector to hold the (ordered) list of abbreviations.

+ typedef SmallVector<NaClBitCodeAbbrev*, 32> AbbrevVector;

+ BlockAbbrevs(unsigned BlockID)

+ : BlockID(BlockID) {

+ // backfill internal indices that don't correspond to bitstream

+ // application abbreviations, so that added abbreviations have

+ // valid abbreviation indices.

+ for (unsigned i = 0; i < naclbitc::FIRST_APPLICATION_ABBREV; ++i) {

+ // Make all non-application abbreviations look like the default

+ // abbreviation.

+ NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();

+ Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));

+ Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));

+ Abbrevs.push_back(Abbrev);

+ }

+ GlobalAbbrevBitstreamToInternalMap.

+ SetNextBitstreamAbbrevIndex(Abbrevs.size());

+ }

+ ~BlockAbbrevs() {

+ for (AbbrevVector::const_iterator

+ Iter = Abbrevs.begin(), IterEnd = Abbrevs.end();

+ Iter != IterEnd; ++Iter) {

+ (*Iter)->dropRef();

+ }

+ for (AbbrevLookupSizeMap::const_iterator

+ Iter = LookupMap.begin(), IterEnd = LookupMap.end();

+ Iter != IterEnd; ++Iter) {

+ delete Iter->second;

+ }

+ // Constant used to denote that a given abbreviation is not in the

+ // set of abbreviations defined by the block.

+ static const int NO_SUCH_ABBREVIATION = -1;

+ // Returns the index to the corresponding application abbreviation,

+ // if it exists. Otherwise return NO_SUCH_ABBREVIATION;

+ int FindAbbreviation(const NaClBitCodeAbbrev *Abbrev) const {

+ for (unsigned i = GetFirstApplicationAbbreviation();

+ i < GetNumberAbbreviations(); ++i) {

+ if (*Abbrevs[i] == *Abbrev) return i;

+ }

+ return NO_SUCH_ABBREVIATION;

+ }

+ /// Adds the given abbreviation to the set of global abbreviations

+ /// defined for the block. Guarantees that duplicate abbreviations

+ /// are not added to the block. Note: Code takes ownership of

+ /// the given abbreviation. Returns true if new abbreviation.

+ /// Updates Index to the index where the abbreviation is located

+ /// in the set of abbreviations.

+ bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev, int &Index) {

+ Index = FindAbbreviation(Abbrev);

+ if (Index != NO_SUCH_ABBREVIATION) {

+ // Already defined, don't install.

+ Abbrev->dropRef();

+ return false;

+ }

+ // New abbreviation. Add.

+ Index = Abbrevs.size();

+ Abbrevs.push_back(Abbrev);

+ return true;

+ }

+ /// Adds the given abbreviation to the set of global abbreviations

+ /// defined for the block. Guarantees that duplicate abbreviations

+ /// are not added to the block. Note: Code takes ownership of

+ /// the given abbreviation. Returns true if new abbreviation.

+ bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev) {

+ int Index;

+ return AddAbbreviation(Abbrev, Index);

+ }

+ /// The block ID associated with the block.

+ unsigned GetBlockID() const {

+ return BlockID;

+ }

+ /// Returns the index of the frist application abbreviation for the

+ /// block.

+ unsigned GetFirstApplicationAbbreviation() const {

+ return naclbitc::FIRST_APPLICATION_ABBREV;

+ }

+ // Returns the number of abbreviations associated with the block.

+ unsigned GetNumberAbbreviations() const {

+ return Abbrevs.size();

+ }

+ /// Returns the abbreviation associated with the given abbreviation

+ /// index.

+ NaClBitCodeAbbrev *GetIndexedAbbrev(unsigned index) {

+ if (index >= Abbrevs.size()) return 0;

+ return Abbrevs[index];

+ }

+ // Builds the corresponding fast lookup map for finding abbreviations

+ // that applies to abbreviations in the block

+ void BuildAbbrevLookupSizeMap() {

+ NaClBuildAbbrevLookupMap(GetLookupMap(),

+ GetAbbrevs(),

+ GetFirstApplicationAbbreviation());

+ if (ShowAbbrevLookupTries) PrintLookupMap(errs());

+ }

+ AbbrevBitstreamToInternalMap &GetGlobalAbbrevBitstreamToInternalMap() {

+ return GlobalAbbrevBitstreamToInternalMap;

+ }

+ AbbrevLookupSizeMap &GetLookupMap() {

+ return LookupMap;

+ }

+ // Returns lower level vector of abbreviations.

+ const AbbrevVector &GetAbbrevs() const {

+ return Abbrevs;

+ }

+ // Returns the abbreviation (index) to use for the corresponding

+ // record, based on the abbreviations of this block. Note: Assumes

+ // that BuildAbbrevLookupSizeMap has already been called.

+ unsigned GetRecordAbbrevIndex(const NaClBitcodeRecordData &Record) {

+ unsigned BestIndex = 0; // Ignored unless found candidate.

+ unsigned BestScore = 0; // Number of bits associated with BestIndex.

+ bool FoundCandidate = false;

+ NaClBitcodeValues Values(Record);

+ size_t Size = Values.size();

+ if (Size > NaClValueIndexCutoff) Size = NaClValueIndexCutoff+1;

+ AbbrevLookupSizeMap::const_iterator Pos = LookupMap.find(Size);

+ if (Pos != LookupMap.end()) {

+ if (const AbbrevTrieNode *Node = Pos->second) {

+ if (const AbbrevTrieNode *MatchNode =

+ Node->MatchRecord(Record)) {

+ const std::set<AbbrevIndexPair> &Abbreviations =

+ MatchNode->GetAbbreviations();

+ for (std::set<AbbrevIndexPair>::const_iterator

+ Iter = Abbreviations.begin(),

+ IterEnd = Abbreviations.end();

+ Iter != IterEnd; ++Iter) {

+ uint64_t NumBits = 0;

+ if (CanUseAbbreviation(Values, Iter->second, NumBits)) {

+ if (!FoundCandidate || NumBits < BestScore) {

+ // Use this as candidate.

+ BestIndex = Iter->first;

+ BestScore = NumBits;

+ FoundCandidate = true;

+ }

+ if (FoundCandidate && BestScore <= UnabbreviatedSize(Record)) {

+ return BestIndex;

+ }

+ return naclbitc::UNABBREV_RECORD;

+ }

+ // Computes the number of bits that will be generated by the

+ // corresponding read record, if no abbreviation is used.

+ static uint64_t UnabbreviatedSize(const NaClBitcodeRecordData &Record) {

+ uint64_t NumBits = MatchVBRBits(Record.Code, DefaultVBRBits);

+ size_t NumValues = Record.Values.size();

+ NumBits += MatchVBRBits(NumValues, DefaultVBRBits);

+ for (size_t Index = 0; Index < NumValues; ++Index) {

+ NumBits += MatchVBRBits(Record.Values[Index], DefaultVBRBits);

+ }

+ return NumBits;

+ }

+ // Returns true if the given abbreviation can be used to represent the

+ // record. Sets NumBits to the number of bits the abbreviation will

+ // generate. Note: Value of NumBits is undefined if this function

+ // return false.

+ static bool CanUseAbbreviation(NaClBitcodeValues &Values,

+ NaClBitCodeAbbrev *Abbrev, uint64_t &NumBits) {

+ NumBits = 0;

+ unsigned OpIndex = 0;

+ unsigned OpIndexEnd = Abbrev->getNumOperandInfos();

+ size_t ValueIndex = 0;

+ size_t ValueIndexEnd = Values.size();

+ while (ValueIndex < ValueIndexEnd && OpIndex < OpIndexEnd) {

+ const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(OpIndex);

+ switch (Op.getEncoding()) {

+ case NaClBitCodeAbbrevOp::Literal:

+ if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {

+ ++ValueIndex;

+ ++OpIndex;

+ continue;

+ } else {

+ return false;

+ }

+ case NaClBitCodeAbbrevOp::Array: {

+ assert(OpIndex+2 == OpIndexEnd);

+ const NaClBitCodeAbbrevOp &ElmtOp =

+ Abbrev->getOperandInfo(OpIndex+1);

+ // Add size of array.

+ NumBits += MatchVBRBits(Values.size()-ValueIndex, DefaultVBRBits);

+ // Add size of each field.

+ for (; ValueIndex != ValueIndexEnd; ++ValueIndex) {

+ uint64_t FieldBits=0;

+ if (!CanUseSimpleAbbrevOp(ElmtOp, Values[ValueIndex], FieldBits)) {

+ return false;

+ }

+ NumBits += FieldBits;

+ }

+ return true;

+ }

+ default: {

+ if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {

+ ++ValueIndex;

+ ++OpIndex;

+ break;

+ }

+ return false;

+ }

+ return ValueIndex == ValueIndexEnd && OpIndex == OpIndexEnd;

+ }

+ // Returns true if the given abbreviation Op defines a single value,

+ // and can be applied to the given Val. Adds the number of bits the

+ // abbreviation Op will generate to NumBits if Op applies.

+ static bool CanUseSimpleAbbrevOp(const NaClBitCodeAbbrevOp &Op,

+ uint64_t Val,

+ uint64_t &NumBits) {

+ switch (Op.getEncoding()) {

+ case NaClBitCodeAbbrevOp::Literal:

+ return Val == Op.getValue();

+ case NaClBitCodeAbbrevOp::Array:

+ return false;

+ case NaClBitCodeAbbrevOp::Fixed: {

+ uint64_t Width = Op.getValue();

+ if (!MatchFixedBits(Val, Width))

+ return false;

+ NumBits += Width;

+ return true;

+ }

+ case NaClBitCodeAbbrevOp::VBR:

+ if (unsigned Width = MatchVBRBits(Val, Op.getValue())) {

+ NumBits += Width;

+ return true;

+ } else {

+ return false;

+ }

+ case NaClBitCodeAbbrevOp::Char6:

+ if (!NaClBitCodeAbbrevOp::isChar6(Val)) return false;

+ NumBits += 6;

+ return true;

+ }

+ llvm_unreachable("unhandled NaClBitCodeAbbrevOp encoding");

+ }

+ // Returns true if the given Val can be represented by abbreviation

+ // operand Fixed(Width).

+ static bool MatchFixedBits(uint64_t Val, unsigned Width) {

+ // Note: The reader only allows up to 32 bits for fixed values.

+ if (Val & Mask32) return false;

+ if (Val & ~(~0U >> (32-Width))) return false;

+ return true;

+ }

+ // Returns the number of bits needed to represent Val by abbreviation

+ // operand VBR(Width). Note: Returns 0 if Val can't be represented

+ // by VBR(Width).

+ static unsigned MatchVBRBits(uint64_t Val, unsigned Width) {

+ if (Width == 0) return 0;

+ unsigned NumBits = 0;

+ while (1) {

+ // values emitted Width-1 bits at a time (plus a continue bit).

+ NumBits += Width;

+ if ((Val & (1U << (Width-1))) == 0)

+ return NumBits;

+ Val >>= Width-1;

+ }

+private:

+ // Defines the number of bits used to print VBR array field values.

+ static const unsigned DefaultVBRBits = 6;

+ // Masks out the top-32 bits of a uint64_t value.

+ static const uint64_t Mask32 = 0xFFFFFFFF00000000;

+ // The block ID for which abbreviations are being associated.

+ unsigned BlockID;

+ // The list of abbreviations defined for the block.

+ AbbrevVector Abbrevs;

+ // The mapping from global bitstream abbreviations to the corresponding

+ // block abbreviation index (in Abbrevs).

+ AbbrevBitstreamToInternalMap GlobalAbbrevBitstreamToInternalMap;

+ // A fast lookup map for finding the abbreviation that applies

+ // to a record.

+ AbbrevLookupSizeMap LookupMap;

+ void PrintLookupMap(raw_ostream &Stream) {

+ Stream << "------------------------------\n";

+ Stream << "Block " << GetBlockID() << " abbreviation tries:\n";

+ bool IsFirstIteration = true;

+ for (AbbrevLookupSizeMap::const_iterator

+ Iter = LookupMap.begin(), IterEnd = LookupMap.end();

+ Iter != IterEnd; ++Iter) {

+ if (IsFirstIteration)

+ IsFirstIteration = false;

+ else

+ Stream << "-----\n";

+ if (Iter->second) {

+ Stream << "Index " << Iter->first << ":\n";

+ Iter->second->Print(Stream, " ");

+ }

+ Stream << "------------------------------\n";

+ }

+};

+/// Defines a map from block ID's to the corresponding abbreviation

+/// map to use.

+typedef DenseMap<unsigned, BlockAbbrevs*> BlockAbbrevsMapType;

+/// Parses the bitcode file, analyzes it, and generates the

+/// corresponding lists of global abbreviations to use in the

+/// generated (compressed) bitcode file.

+class NaClAnalyzeParser : public NaClBitcodeParser {

+ NaClAnalyzeParser(const NaClAnalyzeParser&)

+ LLVM_DELETED_FUNCTION;

+ void operator=(const NaClAnalyzeParser&)

+ LLVM_DELETED_FUNCTION;

+public:

+ // Creates the analysis parser, which will fill the given

+ // BlockAbbrevsMap with appropriate abbreviations, after

+ // analyzing the bitcode file defined by Cursor.

+ NaClAnalyzeParser(NaClBitstreamCursor &Cursor,

+ BlockAbbrevsMapType &BlockAbbrevsMap)

+ : NaClBitcodeParser(Cursor),

+ BlockAbbrevsMap(BlockAbbrevsMap),

+ BlockDist(&NaClCompressBlockDistElement::Sentinel),

+ AbbrevListener(this)

+ {

+ SetListener(&AbbrevListener);

+ }

+ virtual ~NaClAnalyzeParser() {}

+ virtual bool Error(const std::string &Message) {

+ // Use local error routine so that all errors are treated uniformly.

+ return ::Error(Message);

+ }

+ virtual bool ParseBlock(unsigned BlockID);

+ // Mapping from block ID's to the corresponding list of abbreviations

+ // associated with that block.

+ BlockAbbrevsMapType &BlockAbbrevsMap;

+ // Nested distribution capturing distribution of records in bitcode file.

+ NaClBitcodeBlockDist BlockDist;

+ // Listener used to get abbreviations as they are read.

+ NaClBitcodeParserListener AbbrevListener;

+};

+class NaClBlockAnalyzeParser : public NaClBitcodeParser {

+ NaClBlockAnalyzeParser(const NaClBlockAnalyzeParser&)

+ LLVM_DELETED_FUNCTION;

+ void operator=(NaClBlockAnalyzeParser&)

+ LLVM_DELETED_FUNCTION;

+public:

+ /// Top-level constructor to build the top-level block with the

+ /// given BlockID, and collect data (for compression) in that block.

+ NaClBlockAnalyzeParser(unsigned BlockID,

+ NaClAnalyzeParser *Context)

+ : NaClBitcodeParser(BlockID, Context), Context(Context) {

+ Init();

+ }

+ virtual ~NaClBlockAnalyzeParser() {

+ Context->BlockDist.AddBlock(GetBlock());

+ }

+protected:

+ /// Nested constructor to parse a block within a block. Creates a

+ /// block parser to parse a block with the given BlockID, and

+ /// collect data (for compression) in that block.

+ NaClBlockAnalyzeParser(unsigned BlockID,

+ NaClBlockAnalyzeParser *EnclosingParser)

+ : NaClBitcodeParser(BlockID, EnclosingParser),

+ Context(EnclosingParser->Context) {

+ Init();

+ }

+public:

+ virtual bool Error(const std::string &Message) {

+ // Use local error routine so that all errors are treated uniformly.

+ return ::Error(Message);

+ }

+ virtual bool ParseBlock(unsigned BlockID) {

+ NaClBlockAnalyzeParser Parser(BlockID, this);

+ return Parser.ParseThisBlock();

+ }

+ virtual void ProcessRecord() {

+ // Before processing the record, we need to rename the abbreviation

+ // index, so that we can look it up in the set of block abbreviations

+ // being defined.

+ if (Record.UsedAnAbbreviation()) {

+ unsigned AbbrevIndex = Record.GetAbbreviationIndex();

+ if (LocalAbbrevBitstreamToInternalMap.

+ DefinesBitstreamAbbrevIndex(AbbrevIndex)) {

+ Record.SetAbbreviationIndex(

+ LocalAbbrevBitstreamToInternalMap.

+ GetInternalAbbrevIndex(AbbrevIndex));

+ } else {

+ AbbrevBitstreamToInternalMap &GlobalAbbrevBitstreamToInternalMap =

+ GlobalBlockAbbrevs->GetGlobalAbbrevBitstreamToInternalMap();

+ if (GlobalAbbrevBitstreamToInternalMap.

+ DefinesBitstreamAbbrevIndex(AbbrevIndex)) {

+ Record.SetAbbreviationIndex(

+ GlobalAbbrevBitstreamToInternalMap.

+ GetInternalAbbrevIndex(AbbrevIndex));

+ } else {

+ report_fatal_error("Bad abbreviation index in file");

+ }

+ cast<NaClCompressBlockDistElement>(

+ Context->BlockDist.GetElement(Record.GetBlockID()))

+ ->GetAbbrevDist().AddRecord(Record);

+ }

+ virtual void ProcessAbbreviation(unsigned BlockID,

+ NaClBitCodeAbbrev *Abbrev,

+ bool IsLocal) {

+ int Index;

+ AddAbbreviation(BlockID, Abbrev->Simplify(), Index);

+ if (IsLocal) {

+ LocalAbbrevBitstreamToInternalMap.InstallNewBitstreamAbbrevIndex(Index);

+ } else {

+ GetGlobalAbbrevs(BlockID)->GetGlobalAbbrevBitstreamToInternalMap().

+ InstallNewBitstreamAbbrevIndex(Index);

+ }

+protected:

+ // The context (i.e. top-level) parser.

+ NaClAnalyzeParser *Context;

+ // The global block abbreviations associated with this block.

+ BlockAbbrevs *GlobalBlockAbbrevs;

+ // The local abbreviations associated with this block.

+ AbbrevBitstreamToInternalMap LocalAbbrevBitstreamToInternalMap;

+ /// Returns the set of (global) block abbreviations defined for the

+ /// given block ID.

+ BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) {

+ BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID];

+ if (Abbrevs == 0) {

+ Abbrevs = new BlockAbbrevs(BlockID);

+ Context->BlockAbbrevsMap[BlockID] = Abbrevs;

+ }

+ return Abbrevs;

+ }

+ void SetGlobalAbbrevs(unsigned BlockID, BlockAbbrevs *Abbrevs) {

+ Context->BlockAbbrevsMap[BlockID] = Abbrevs;

+ }

+ // Adds the abbreviation to the list of abbreviations for the given

+ // block.

+ void AddAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev,

+ int &Index) {

+ // Get block abbreviations.

+ BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(BlockID);

+ // Read abbreviation and add as a global abbreviation.

+ if (Abbrevs->AddAbbreviation(Abbrev, Index)

+ && TraceGeneratedAbbreviations) {

+ PrintAbbrev(errs(), BlockID, Abbrev);

+ }

+ /// Finds the index to the corresponding internal block abbreviation

+ /// for the given abbreviation.

+ int FindAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {

+ return GetGlobalAbbrevs(BlockID)->FindAbbreviation(Abbrev);

+ }

+ void Init() {

+ GlobalBlockAbbrevs = GetGlobalAbbrevs(GetBlockID());

+ LocalAbbrevBitstreamToInternalMap.SetNextBitstreamAbbrevIndex(

+ GlobalBlockAbbrevs->

+ GetGlobalAbbrevBitstreamToInternalMap().GetNextBitstreamAbbrevIndex());

+ }

+};

+bool NaClAnalyzeParser::ParseBlock(unsigned BlockID) {

+ NaClBlockAnalyzeParser Parser(BlockID, this);

+ return Parser.ParseThisBlock();

+/// Models the unrolling of an abbreviation into its sequence of

+/// individual operators. That is, unrolling arrays to match the width

+/// of the abbreviation.

+///

+/// For example, consider the abbreviation [Array(VBR(6))]. If the

+/// distribution map has data for records of size 3, and the

+/// distribution map suggests that a constant 4 appears as the second

+/// element in the record, it is nontrivial to figure out how to

+/// encorporate this into this abbrevation. Hence, we unroll the array

+/// (3 times) to get [VBR(6), VBR(6), VBR(6), Array(VBR(6))]. To

+/// update the second element to match the literal 4, we only need to

+/// replace the second element in the unrolled abbreviation resulting

+/// in [VBR(6), Lit(4), VBR(6), Array(VBR(6))].

+///

+/// After we have done appropriate substitutions, we can simplify the

+/// unrolled abbreviation by calling method Restore.

+///

+/// Note: We unroll in the form that best matches the distribution

+/// map. Hence, the code is stored as a separate operator. We also

+/// keep the array abbreviation op, for untracked elements within the

+/// distribution maps.

+class UnrolledAbbreviation {

+ void operator=(const UnrolledAbbreviation&) LLVM_DELETED_FUNCTION;

+public:

+ /// Unroll the given abbreviation, assuming it has the given size

+ /// (as specified in the distribution maps).

+ ///

+ /// If argument CanBeBigger is true, then we do not assume that we

+ /// can remove the trailing array when expanding, because the

+ /// actual size of the corresponding record using this abbreviation

+ /// may be bigger.

+ UnrolledAbbreviation(NaClBitCodeAbbrev *Abbrev, unsigned Size,

+ bool CanBeBigger = false)

+ : CodeOp(0) {

+ unsigned NextOp = 0;

+ UnrollAbbrevOp(CodeOp, Abbrev, NextOp);

+ --Size;

+ for (unsigned i = 0; i < Size; ++i) {

+ // Create slot and then fill with appropriate operator.

+ AbbrevOps.push_back(CodeOp);

+ UnrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp);

+ }

+ if (CanBeBigger) {

+ for (; NextOp < Abbrev->getNumOperandInfos(); ++NextOp) {

+ MoreOps.push_back(Abbrev->getOperandInfo(NextOp));

+ }

+ } else if (NextOp < Abbrev->getNumOperandInfos() &&

+ !Abbrev->getOperandInfo(NextOp).isArrayOp()) {

+ errs() << (Size+1) << ": ";

+ Abbrev->Print(errs());

+ llvm_unreachable("Malformed abbreviation/size pair");

+ }

+ explicit UnrolledAbbreviation(const UnrolledAbbreviation &Abbrev)

+ : CodeOp(Abbrev.CodeOp),

+ AbbrevOps(Abbrev.AbbrevOps),

+ MoreOps(Abbrev.MoreOps) {

+ }

+ /// Prints out the abbreviation modeled by the unrolled

+ /// abbreviation.

+ void Print(raw_ostream &Stream) const {

+ NaClBitCodeAbbrev *Abbrev = Restore(false);

+ Abbrev->Print(Stream);

+ Abbrev->dropRef();

+ }

+ /// Converts the unrolled abbreviation back into a regular

+ /// abbreviation. If Simplify is true, we simplify the

+ /// unrolled abbreviation as well.

+ NaClBitCodeAbbrev *Restore(bool Simplify=true) const {

+ NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();

+ Abbrev->Add(CodeOp);

+ for (std::vector<NaClBitCodeAbbrevOp>::const_iterator

+ Iter = AbbrevOps.begin(), IterEnd = AbbrevOps.end();

+ Iter != IterEnd; ++Iter) {

+ Abbrev->Add(*Iter);

+ }

+ for (std::vector<NaClBitCodeAbbrevOp>::const_iterator

+ Iter = MoreOps.begin(), IterEnd = MoreOps.end();

+ Iter != IterEnd; ++Iter) {

+ Abbrev->Add(*Iter);

+ }

+ if (Simplify) {

+ NaClBitCodeAbbrev *SimpAbbrev = Abbrev->Simplify();

+ Abbrev->dropRef();

+ return SimpAbbrev;

+ } else {

+ return Abbrev;

+ }

+ // The abbreviation used for the record code.

+ NaClBitCodeAbbrevOp CodeOp;

+ // The abbreviations used for each tracked value index.

+ std::vector<NaClBitCodeAbbrevOp> AbbrevOps;

+private:

+ // Any remaining abbreviation operators not part of the unrolling.

+ std::vector<NaClBitCodeAbbrevOp> MoreOps;

+ // Extracts out the next abbreviation operator from the abbreviation

+ // Abbrev, given the index NextOp, and assigns it to AbbrevOp

+ void UnrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp,

+ NaClBitCodeAbbrev *Abbrev,

+ unsigned &NextOp) {

+ assert(NextOp < Abbrev->getNumOperandInfos());

+ const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(NextOp);

+ if (Op.isArrayOp()) {

+ // Do not advance. The array operator assumes that all remaining

+ // elements should match its argument.

+ AbbrevOp = Abbrev->getOperandInfo(NextOp+1);

+ } else {

+ AbbrevOp = Op;

+ NextOp++;

+ }

+};

+/// Models a candidate block abbreviation, which is a blockID, and the

+/// corresponding abbreviation to be considered for addition. Note:

+/// Constructors and assignment take ownership of the abbreviation.

+class CandBlockAbbrev {

+public:

+ CandBlockAbbrev(unsigned BlockID, NaClBitCodeAbbrev *Abbrev)

+ : BlockID(BlockID), Abbrev(Abbrev) {

+ }

+ CandBlockAbbrev(const CandBlockAbbrev &BlockAbbrev)

+ : BlockID(BlockAbbrev.BlockID),

+ Abbrev(BlockAbbrev.Abbrev) {

+ Abbrev->addRef();

+ }

+ void operator=(const CandBlockAbbrev &BlockAbbrev) {

+ Abbrev->dropRef();

+ BlockID = BlockAbbrev.BlockID;

+ Abbrev = BlockAbbrev.Abbrev;

+ Abbrev->addRef();

+ }

+ ~CandBlockAbbrev() {

+ Abbrev->dropRef();

+ }

+ /// The block ID of the candidate abbreviation.

+ unsigned GetBlockID() const {

+ return BlockID;

+ }

+ /// The abbreviation of the candidate abbreviation.

+ const NaClBitCodeAbbrev *GetAbbrev() const {

+ return Abbrev;

+ }

+ /// orders this against the candidate abbreviation.

+ int Compare(const CandBlockAbbrev &CandAbbrev) const {

+ unsigned diff = BlockID - CandAbbrev.BlockID;

+ if (diff) return diff;

+ return Abbrev->Compare(*CandAbbrev.Abbrev);

+ }

+ /// Prints the candidate abbreviation to the given stream.

+ void Print(raw_ostream &Stream) const {

+ PrintAbbrev(Stream, BlockID, Abbrev);

+ }

+private:

+ // The block the abbreviation applies to.

+ unsigned BlockID;

+ // The candidate abbreviation.

+ NaClBitCodeAbbrev *Abbrev;

+};

+static inline bool operator<(const CandBlockAbbrev &A1,

+ const CandBlockAbbrev &A2) {

+ return A1.Compare(A2) < 0;

+/// Models the set of candidate abbreviations being considered, and

+/// the number of abbreviations associated with each candidate

+/// Abbreviation.

+///

+/// Note: Because we may have abbreviation refinements of A->B->C and

+/// A->D->C, we need to accumulate instance counts in such cases.

+class CandidateAbbrevs {

+public:

+ // Map from candidate abbreviations to the corresponding number of

+ // instances.

+ typedef std::map<CandBlockAbbrev, unsigned> AbbrevCountMap;

+ typedef AbbrevCountMap::const_iterator const_iterator;

+ /// Creates an empty set of candidate abbreviations, to be

+ /// (potentially) added to the given set of block abbreviations.

+ /// Argument is the (global) block abbreviations map, which is

+ /// used to determine if it already exists.

+ CandidateAbbrevs(BlockAbbrevsMapType &BlockAbbrevsMap)

+ : BlockAbbrevsMap(BlockAbbrevsMap)

+ {}

+ /// Adds the given (unrolled) abbreviation as a candidate

+ /// abbreviation to the given block. NumInstances is the number of

+ /// instances expected to use this candidate abbreviation. Returns

+ /// true if the corresponding candidate abbreviation is added to this

+ /// set of candidate abbreviations.

+ bool Add(unsigned BlockID,

+ UnrolledAbbreviation &UnrolledAbbrev,

+ unsigned NumInstances);

+ /// Returns the list of candidate abbreviations in this set.

+ const AbbrevCountMap &GetAbbrevsMap() const {

+ return AbbrevsMap;

+ }

+ /// Prints out the current contents of this set.

+ void Print(raw_ostream &Stream, const char *Title = "Candidates") const {

+ Stream << "-- " << Title << ": \n";

+ for (const_iterator Iter = AbbrevsMap.begin(), IterEnd = AbbrevsMap.end();

+ Iter != IterEnd; ++Iter) {

+ Stream << format("%12u", Iter->second) << ": ";

+ Iter->first.Print(Stream);

+ }

+ Stream << "--\n";

+ }

+private:

+ // The set of abbreviations and corresponding number instances.

+ AbbrevCountMap AbbrevsMap;

+ // The map of (global) abbreviations already associated with each block.

+ BlockAbbrevsMapType &BlockAbbrevsMap;

+};

+bool CandidateAbbrevs::Add(unsigned BlockID,

+ UnrolledAbbreviation &UnrolledAbbrev,

+ unsigned NumInstances) {

+ // Drop if it corresponds to an existing global abbreviation.

+ NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.Restore();

+ if (BlockAbbrevs* Abbrevs = BlockAbbrevsMap[BlockID]) {

+ if (Abbrevs->FindAbbreviation(Abbrev) !=

+ BlockAbbrevs::NO_SUCH_ABBREVIATION) {

+ Abbrev->dropRef();

+ return false;

+ }

+ CandBlockAbbrev CandAbbrev(BlockID, Abbrev);

+ AbbrevCountMap::iterator Pos = AbbrevsMap.find(CandAbbrev);

+ if (Pos == AbbrevsMap.end()) {

+ AbbrevsMap[CandAbbrev] = NumInstances;

+ } else {

+ Pos->second += NumInstances;

+ }

+ return true;

+// Look for new abbreviations in block BlockID, considering it was

+// read with the given abbreviation Abbrev, and considering changing

+// the abbreviation opererator for value Index. ValueDist is how

+// values at Index are distributed. Any found abbreviations are added

+// to the candidate abbreviations CandAbbrevs. Returns true only if we

+// have added new candidate abbreviations to CandAbbrevs.

+static bool AddNewAbbreviations(unsigned BlockID,

+ const UnrolledAbbreviation &Abbrev,

+ unsigned Index,

+ NaClBitcodeValueDist &ValueDist,

+ CandidateAbbrevs &CandAbbrevs) {

+ // TODO(kschimpf): Add code to try and find a better encoding for

+ // the values, based on the distribution.

+ // If this index is already a literal abbreviation, no improvements can

+ // be made.

+ const NaClBitCodeAbbrevOp Op = Abbrev.AbbrevOps[Index];

+ if (Op.isLiteral()) return false;

+ // Search based on sorted distribution, which sorts by number of

+ // instances. Start by trying to find possible constants to use.

+ const NaClBitcodeDist::Distribution *

+ Distribution = ValueDist.GetDistribution();

+ for (NaClBitcodeDist::Distribution::const_iterator

+ Iter = Distribution->begin(), IterEnd = Distribution->end();

+ Iter != IterEnd; ++Iter) {

+ NaClValueRangeType Range = GetNaClValueRange(Iter->second);

+ if (Range.first != Range.second) continue; // not a constant.

+ // Defines a constant. Try as new candidate range. In addition,

+ // don't try any more constant values, since this is the one with

+ // the greatest number of instances.

+ NaClBitcodeDistElement *Elmt = ValueDist.at(Range.first);

+ UnrolledAbbreviation CandAbbrev(Abbrev);

+ CandAbbrev.AbbrevOps[Index] = NaClBitCodeAbbrevOp(Range.first);

+ return CandAbbrevs.Add(BlockID, CandAbbrev, Elmt->GetNumInstances());

+ }

+ return false;

+// Look for new abbreviations in block BlockID, considering it was

+// read with the given abbreviation Abbrev. IndexDist is the

+// corresponding distribution of value indices associated with the

+// abbreviation. Any found abbreviations are added to the candidate

+// abbreviations CandAbbrevs.

+static void AddNewAbbreviations(unsigned BlockID,

+ const UnrolledAbbreviation &Abbrev,

+ NaClBitcodeDist &IndexDist,

+ CandidateAbbrevs &CandAbbrevs) {

+ // Search based on sorted distribution, which sorts based on heuristic

+ // of best index to fix first.

+ const NaClBitcodeDist::Distribution *

+ IndexDistribution = IndexDist.GetDistribution();

+ for (NaClBitcodeDist::Distribution::const_iterator

+ IndexIter = IndexDistribution->begin(),

+ IndexIterEnd = IndexDistribution->end();

+ IndexIter != IndexIterEnd; ++IndexIter) {

+ unsigned Index = static_cast<unsigned>(IndexIter->second);

+ if (AddNewAbbreviations(

+ BlockID, Abbrev, Index,

+ cast<NaClBitcodeValueIndexDistElement>(IndexDist.at(Index))

+ ->GetValueDist(),

+ CandAbbrevs)) {

+ return;

+ }

+// Look for new abbreviations in block BlockID, considering it was

+// read with the given abbreviation Abbrev, and the given record Code.

+// SizeDist is the corresponding distribution of sizes associated with

+// the abbreviation. Any found abbreviations are added to the

+// candidate abbreviations CandAbbrevs.

+static void AddNewAbbreviations(unsigned BlockID,

+ NaClBitCodeAbbrev *Abbrev,

+ unsigned Code,

+ NaClBitcodeDist &SizeDist,

+ CandidateAbbrevs &CandAbbrevs) {

+ const NaClBitcodeDist::Distribution *

+ SizeDistribution = SizeDist.GetDistribution();

+ for (NaClBitcodeDist::Distribution::const_iterator

+ SizeIter = SizeDistribution->begin(),

+ SizeIterEnd = SizeDistribution->end();

+ SizeIter != SizeIterEnd; ++SizeIter) {

+ unsigned Size = static_cast<unsigned>(SizeIter->second);

+ UnrolledAbbreviation UnrolledAbbrev(Abbrev, Size+1 /* Add code! */,

+ Size >= NaClValueIndexCutoff);

+ if (!UnrolledAbbrev.CodeOp.isLiteral()) {

+ // Try making the code a literal.

+ UnrolledAbbreviation CandAbbrev(UnrolledAbbrev);

+ CandAbbrev.CodeOp = NaClBitCodeAbbrevOp(Code);

+ CandAbbrevs.Add(BlockID, CandAbbrev,

+ SizeDist.at(Size)->GetNumInstances());

+ }

+ // Now process value indices to find candidate abbreviations.

+ AddNewAbbreviations(

+ BlockID, UnrolledAbbrev,

+ cast<NaClBitcodeSizeDistElement>(SizeDist.at(Size))

+ ->GetValueIndexDist(),

+ CandAbbrevs);

+ }

+// Look for new abbreviations in block BlockID. Abbrevs is the map of

+// read (globally defined) abbreviations associated with the

+// BlockID. AbbrevDist is the distribution map of abbreviations

+// associated with BlockID. Any found abbreviations are added to the

+// candidate abbreviations CandAbbrevs.

+static void AddNewAbbreviations(unsigned BlockID,

+ BlockAbbrevs *Abbrevs,

+ NaClBitcodeDist &AbbrevDist,

+ CandidateAbbrevs &CandAbbrevs) {

+ const NaClBitcodeDist::Distribution *

+ AbbrevDistribution = AbbrevDist.GetDistribution();

+ for (NaClBitcodeDist::Distribution::const_iterator

+ AbbrevIter = AbbrevDistribution->begin(),

+ AbbrevIterEnd = AbbrevDistribution->end();

+ AbbrevIter != AbbrevIterEnd; ++AbbrevIter) {

+ NaClBitcodeDistValue AbbrevIndex = AbbrevIter->second;

+ NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(AbbrevIndex);

+ NaClBitcodeAbbrevDistElement *AbbrevElmt =

+ cast<NaClBitcodeAbbrevDistElement>(AbbrevDist.at(AbbrevIndex));

+ NaClBitcodeDist &CodeDist = AbbrevElmt->GetCodeDist();

+ const NaClBitcodeDist::Distribution *

+ CodeDistribution = CodeDist.GetDistribution();

+ for (NaClBitcodeDist::Distribution::const_iterator

+ CodeIter = CodeDistribution->begin(),

+ CodeIterEnd = CodeDistribution->end();

+ CodeIter != CodeIterEnd; ++CodeIter) {

+ unsigned Code = static_cast<unsigned>(CodeIter->second);

+ AddNewAbbreviations(

+ BlockID,

+ Abbrev,

+ Code,

+ cast<NaClCompressCodeDistElement>(CodeDist.at(CodeIter->second))

+ ->GetSizeDist(),

+ CandAbbrevs);

+ }

+typedef std::pair<unsigned, CandBlockAbbrev> CountedAbbrevType;

+// Look for new abbreviations in the given block distribution map

+// BlockDist. BlockAbbrevsMap contains the set of read global

+// abbreviations. Adds found candidate abbreviations to the set of

+// known global abbreviations.

+static void AddNewAbbreviations(NaClBitcodeBlockDist &BlockDist,

+ BlockAbbrevsMapType &BlockAbbrevsMap) {

+ CandidateAbbrevs CandAbbrevs(BlockAbbrevsMap);

+ // Start by collecting candidate abbreviations.

+ const NaClBitcodeDist::Distribution *

+ Distribution = BlockDist.GetDistribution();

+ for (NaClBitcodeDist::Distribution::const_iterator

+ Iter = Distribution->begin(), IterEnd = Distribution->end();

+ Iter != IterEnd; ++Iter) {

+ unsigned BlockID = static_cast<unsigned>(Iter->second);

+ AddNewAbbreviations(

+ BlockID,

+ BlockAbbrevsMap[BlockID],

+ cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))

+ ->GetAbbrevDist(),

+ CandAbbrevs);

+ }

+ // Install candidate abbreviations.

+ //

+ // Sort the candidate abbreviations by number of instances, so that

+ // if multiple abbreviations apply, the one with the largest number

+ // of instances will be chosen when compressing a file.

+ //

+ // For example, we may have just generated two abbreviations. The

+ // first has replaced the 3rd element with the constant 4 while the

+ // second replaced the 4th element with the constant 5. The first

+ // abbreviation can apply to 300 records while the second can apply

+ // to 1000 records. Assuming that both abbreviations shrink the

+ // record by the same number of bits, we clearly want the tool to

+ // choose the second abbreviation when selecting the abbreviation

+ // index to choose (via method GetRecordAbbrevIndex).

+ //

+ // Selecting the second is important in that abbreviation are

+ // refined by successive calls to this tool. We do not want to

+ // restrict downstream refinements prematurely. Hence, we want the

+ // tool to choose the abbreviation with the most candidates first.

+ //

+ // Since method GetRecordAbbrevIndex chooses the first abbreviation

+ // that generates the least number of bits, we clearly want to make

+ // sure that the second abbreviation occurs before the first.

+ std::vector<CountedAbbrevType> Candidates;

+ for (CandidateAbbrevs::const_iterator

+ Iter = CandAbbrevs.GetAbbrevsMap().begin(),

+ IterEnd = CandAbbrevs.GetAbbrevsMap().end();

+ Iter != IterEnd; ++Iter) {

+ Candidates.push_back(CountedAbbrevType(Iter->second,Iter->first));

+ }

+ std::sort(Candidates.begin(), Candidates.end());

+ std::vector<CountedAbbrevType>::const_reverse_iterator

+ Iter = Candidates.rbegin(), IterEnd = Candidates.rend();

+ if (Iter == IterEnd) return;

+ if (TraceGeneratedAbbreviations) {

+ errs() << "-- New abbrevations:\n";

+ }

+ unsigned Min = (Iter->first >> 2);

+ for (; Iter != IterEnd; ++Iter) {

+ if (Iter->first < Min) break;

+ unsigned BlockID = Iter->second.GetBlockID();

+ BlockAbbrevs *Abbrevs = BlockAbbrevsMap[BlockID];

+ NaClBitCodeAbbrev *Abbrev = Iter->second.GetAbbrev()->Copy();

+ if (TraceGeneratedAbbreviations) {

+ errs() <<format("%12u", Iter->first) << ": ";

+ PrintAbbrev(errs(), BlockID, Abbrev);

+ }

+ Abbrevs->AddAbbreviation(Abbrev);

+ }

+ if (TraceGeneratedAbbreviations) {

+ errs() << "--\n";

+ }

+// Walks the block distribution (BlockDist), sorting entries based

+// on the distribution of blocks and abbreviations, and then

+// prints out the frequency of each abbreviation used.

+static void DisplayAbbreviationFrequencies(

+ raw_ostream &Output,

+ const NaClBitcodeBlockDist &BlockDist,

+ const BlockAbbrevsMapType &BlockAbbrevsMap) {

+ const NaClBitcodeDist::Distribution *BlockDistribution =

+ BlockDist.GetDistribution();

+ for (NaClBitcodeDist::Distribution::const_iterator

+ BlockIter = BlockDistribution->begin(),

+ BlockIterEnd = BlockDistribution->end();

+ BlockIter != BlockIterEnd; ++BlockIter) {

+ unsigned BlockID = static_cast<unsigned>(BlockIter->second);

+ BlockAbbrevsMapType::const_iterator BlockPos = BlockAbbrevsMap.find(BlockID);

+ if (BlockPos == BlockAbbrevsMap.end()) continue;

+ Output << "Block " << BlockID << "\n";

+ if (NaClCompressBlockDistElement *BlockElement =

+ dyn_cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))) {

+ NaClBitcodeDist &AbbrevDist = BlockElement->GetAbbrevDist();

+ const NaClBitcodeDist::Distribution *AbbrevDistribution =

+ AbbrevDist.GetDistribution();

+ unsigned Total = AbbrevDist.GetTotal();

+ for (NaClBitcodeDist::Distribution::const_iterator

+ AbbrevIter = AbbrevDistribution->begin(),

+ AbbrevIterEnd = AbbrevDistribution->end();

+ AbbrevIter != AbbrevIterEnd; ++AbbrevIter) {

+ unsigned Index = static_cast<unsigned>(AbbrevIter->second);

+ unsigned Count = AbbrevDist.at(Index)->GetNumInstances();

+ Output << format("%8u (%6.2f%%): ", Count,

+ (double) Count/Total*100.0);

+ BlockPos->second->GetIndexedAbbrev(Index)->Print(Output);

+ }

+ Output << '\n';

+ }

+// Read in bitcode, analyze data, and figure out set of abbreviations

+// to use, from memory buffer MemBuf containing the input bitcode file.

+static bool AnalyzeBitcode(std::unique_ptr<MemoryBuffer> &MemBuf,

+ BlockAbbrevsMapType &BlockAbbrevsMap) {

+ // TODO(kschimpf): The current code only extracts abbreviations

+ // defined in the bitcode file. This code needs to be updated to

+ // collect data distributions and figure out better (global)

+ // abbreviations to use.

+ const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();

+ const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();

+ // First read header and verify it is good.

+ NaClBitcodeHeader Header;

+ if (Header.Read(BufPtr, EndBufPtr) || !Header.IsSupported())

+ return Error("Invalid PNaCl bitcode header");

+ // Create a bitstream reader to read the bitcode file.

+ NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);

+ NaClBitstreamCursor Stream(StreamFile);

+ // Parse the the bitcode file.

+ NaClAnalyzeParser Parser(Stream, BlockAbbrevsMap);

+ while (!Stream.AtEndOfStream()) {

+ if (Parser.Parse()) return true;

+ }

+ if (ShowAbbreviationFrequencies || ShowValueDistributions) {

+ std::error_code EC;

+ raw_fd_ostream Output(OutputFilename, EC, sys::fs::F_None);

+ if (EC) {

+ errs() << EC.message() << "\n";

+ exit(1);

+ }

+ if (ShowAbbreviationFrequencies)

+ DisplayAbbreviationFrequencies(Output, Parser.BlockDist, BlockAbbrevsMap);

+ if (ShowValueDistributions)

+ Parser.BlockDist.Print(Output);

+ }

+ AddNewAbbreviations(Parser.BlockDist, BlockAbbrevsMap);

+ return false;

+/// Parses the input bitcode file and generates the corresponding

+/// compressed bitcode file, by replacing abbreviations in the input

+/// file with the corresponding abbreviations defined in

+/// BlockAbbrevsMap.

+class NaClBitcodeCopyParser : public NaClBitcodeParser {

+public:

+ // Top-level constructor to build the appropriate block parser

+ // using the given BlockAbbrevsMap to define abbreviations.

+ NaClBitcodeCopyParser(NaClBitstreamCursor &Cursor,

+ BlockAbbrevsMapType &BlockAbbrevsMap,

+ NaClBitstreamWriter &Writer)

+ : NaClBitcodeParser(Cursor),

+ BlockAbbrevsMap(BlockAbbrevsMap),

+ Writer(Writer),

+ FoundFirstBlockInfo(false)

+ {}

+ virtual ~NaClBitcodeCopyParser() {}

+ virtual bool Error(const std::string &Message) {

+ // Use local error routine so that all errors are treated uniformly.

+ return ::Error(Message);

+ }

+ virtual bool ParseBlock(unsigned BlockID);

+ // The abbreviations to use for the copied bitcode.

+ BlockAbbrevsMapType &BlockAbbrevsMap;

+ // The bitstream to copy the compressed bitcode into.

+ NaClBitstreamWriter &Writer;

+ // True if we have already found the first block info block.

+ // Used to make sure we don't use abbreviations until we

+ // have put them into the bitcode file.

+ bool FoundFirstBlockInfo;

+};

+class NaClBlockCopyParser : public NaClBitcodeParser {

+public:

+ // Top-level constructor to build the appropriate block parser.

+ NaClBlockCopyParser(unsigned BlockID,

+ NaClBitcodeCopyParser *Context)

+ : NaClBitcodeParser(BlockID, Context),

+ Context(Context),

+ BlockAbbreviations(0)

+ {}

+ virtual ~NaClBlockCopyParser() {}

+protected:

+ // The context defining state associated with the block parser.

+ NaClBitcodeCopyParser *Context;

+ // The block abbreviations defined for this block (initialized by

+ // EnterBlock).

+ BlockAbbrevs *BlockAbbreviations;

+ /// Constructor to parse nested blocks. Creates a block parser to

+ /// parse in a block with the given BlockID, and write the block

+ /// back out using the abbreviations in BlockAbbrevsMap.

+ NaClBlockCopyParser(unsigned BlockID,

+ NaClBlockCopyParser *EnclosingParser)

+ : NaClBitcodeParser(BlockID, EnclosingParser),

+ Context(EnclosingParser->Context)

+ {}

+ virtual bool Error(const std::string &Message) {

+ // Use local error routine so that all errors are treated uniformly.

+ return ::Error(Message);

+ }

+ /// Returns the set of (global) block abbreviations defined for the

+ /// given block ID.

+ BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) {

+ BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID];

+ if (Abbrevs == 0) {

+ Abbrevs = new BlockAbbrevs(BlockID);

+ Context->BlockAbbrevsMap[BlockID] = Abbrevs;

+ }

+ return Abbrevs;

+ }

+ virtual bool ParseBlock(unsigned BlockID) {

+ NaClBlockCopyParser Parser(BlockID, this);

+ return Parser.ParseThisBlock();

+ }

+ virtual void EnterBlock(unsigned NumWords) {

+ unsigned BlockID = GetBlockID();

+ BlockAbbreviations = GetGlobalAbbrevs(BlockID);

+ // Enter the subblock.

+ NaClBitcodeSelectorAbbrev

+ Selector(BlockAbbreviations->GetNumberAbbreviations()-1);

+ if (RemoveAbbreviations) Selector = NaClBitcodeSelectorAbbrev();

+ Context->Writer.EnterSubblock(BlockID, Selector);

+ // Note: We must dump module abbreviations as local

+ // abbreviations, because they are in a yet to be

+ // dumped BlockInfoBlock.

+ if (!RemoveAbbreviations && BlockID == naclbitc::MODULE_BLOCK_ID) {

+ BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(naclbitc::MODULE_BLOCK_ID);

+ for (unsigned i = 0; i < Abbrevs->GetNumberAbbreviations(); ++i) {

+ Context->Writer.EmitAbbrev(Abbrevs->GetIndexedAbbrev(i)->Copy());

+ }

+ virtual void ExitBlock() {

+ Context->Writer.ExitBlock();

+ }

+ virtual void ProcessBlockInfo() {

+ assert(!Context->FoundFirstBlockInfo &&

+ "Input bitcode has more that one BlockInfoBlock");

+ Context->FoundFirstBlockInfo = true;

+ // Generate global abbreviations within a blockinfo block.

+ Context->Writer.EnterBlockInfoBlock();

+ if (!RemoveAbbreviations) {

+ for (BlockAbbrevsMapType::const_iterator

+ Iter = Context->BlockAbbrevsMap.begin(),

+ IterEnd = Context->BlockAbbrevsMap.end();

+ Iter != IterEnd; ++Iter) {

+ unsigned BlockID = Iter->first;

+ // Don't emit module abbreviations, since they have been

+ // emitted as local abbreviations.

+ if (BlockID == naclbitc::MODULE_BLOCK_ID) continue;

+ BlockAbbrevs *Abbrevs = Iter->second;

+ if (Abbrevs == 0) continue;

+ for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation();

+ i < Abbrevs->GetNumberAbbreviations(); ++i) {

+ NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(i);

+ Context->Writer.EmitBlockInfoAbbrev(BlockID, Abbrev);

+ }

+ Context->Writer.ExitBlock();

+ }

+ virtual void ProcessRecord() {

+ const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();

+ if (RemoveAbbreviations) {

+ Context->Writer.EmitRecord(Record.GetCode(), Values, 0);

+ return;

+ }

+ // Find best fitting abbreviation to use, and print out the record

+ // using that abbreviations.

+ unsigned AbbrevIndex =

+ BlockAbbreviations->GetRecordAbbrevIndex(Record.GetRecordData());

+ if (AbbrevIndex == naclbitc::UNABBREV_RECORD) {

+ Context->Writer.EmitRecord(Record.GetCode(), Values, 0);

+ } else {

+ Context->Writer.EmitRecord(Record.GetCode(), Values, AbbrevIndex);

+ }

+};

+bool NaClBitcodeCopyParser::ParseBlock(unsigned BlockID) {

+ NaClBlockCopyParser Parser(BlockID, this);

+ return Parser.ParseThisBlock();

+// Read in bitcode, and write it back out using the abbreviations in

+// BlockAbbrevsMap, from memory buffer MemBuf containing the input

+// bitcode file.

+static bool CopyBitcode(std::unique_ptr<MemoryBuffer> &MemBuf,

+ BlockAbbrevsMapType &BlockAbbrevsMap) {

+ const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();

+ const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();

+ // Read header. No verification is needed since AnalyzeBitcode has

+ // already checked it.

+ NaClBitcodeHeader Header;

+ if (Header.Read(BufPtr, EndBufPtr))

+ return Error("Invalid PNaCl bitcode header");

+ // Create the bitcode reader.

+ NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);

+ NaClBitstreamCursor Stream(StreamFile);

+ // Create the bitcode writer.

+ SmallVector<char, 0> OutputBuffer;

+ OutputBuffer.reserve(256*1024);

+ NaClBitstreamWriter StreamWriter(OutputBuffer);

+ // Emit the file header.

+ NaClWriteHeader(Header, StreamWriter);

+ // Set up the parser.

+ NaClBitcodeCopyParser Parser(Stream, BlockAbbrevsMap, StreamWriter);

+ // Parse the bitcode and copy.

+ while (!Stream.AtEndOfStream()) {

+ if (Parser.Parse()) return true;

+ }

+ // Write out the copied results.

+ std::error_code EC;

+ std::unique_ptr<tool_output_file> OutFile(

+ new tool_output_file(OutputFilename.c_str(), EC, sys::fs::F_None));

+ if (EC)

+ return Error(EC.message());

+ // Write the generated bitstream to "Out".

+ OutFile->os().write((char*)&OutputBuffer.front(),

+ OutputBuffer.size());

+ OutFile->keep();

+ return false;

+// Build fast lookup abbreviation maps for each of the abbreviation blocks

+// defined in AbbrevsMap.

+static void BuildAbbrevLookupMaps(BlockAbbrevsMapType &AbbrevsMap) {

+ for (BlockAbbrevsMapType::const_iterator

+ Iter = AbbrevsMap.begin(),

+ IterEnd = AbbrevsMap.end();

+ Iter != IterEnd; ++Iter) {

+ Iter->second->BuildAbbrevLookupSizeMap();

+ }

+} // namespace

+int main(int argc, char **argv) {

+ // Print a stack trace if we signal out.

+ sys::PrintStackTraceOnErrorSignal();

+ PrettyStackTraceProgram X(argc, argv);

+ llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.

+ cl::ParseCommandLineOptions(argc, argv, "pnacl-bccompress file analyzer\n");

+ std::unique_ptr<MemoryBuffer> MemBuf;

+ if (ReadAndBuffer(MemBuf)) return 1;

+ BlockAbbrevsMapType BlockAbbrevsMap;

+ if (AnalyzeBitcode(MemBuf, BlockAbbrevsMap)) return 1;

+ if (ShowAbbreviationFrequencies || ShowValueDistributions) {

+ return 0;

+ }

+ BuildAbbrevLookupMaps(BlockAbbrevsMap);

+ if (CopyBitcode(MemBuf, BlockAbbrevsMap)) return 1;

+ return 0;

« no previous file with comments | « tools/pnacl-bccompress/Makefile ('k') | tools/pnacl-bcdis/CMakeLists.txt » ('j') | no next file with comments »