| Index: tools/pnacl-bccompress/pnacl-bccompress.cpp
|
| diff --git a/tools/pnacl-bccompress/pnacl-bccompress.cpp b/tools/pnacl-bccompress/pnacl-bccompress.cpp
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..193a81f13352fafff5f99c25cc507039b98b7d45
|
| --- /dev/null
|
| +++ b/tools/pnacl-bccompress/pnacl-bccompress.cpp
|
| @@ -0,0 +1,1517 @@
|
| +//===-- pnacl-bccompress.cpp - Bitcode (abbrev) compression ---------------===//
|
| +//
|
| +// The LLVM Compiler Infrastructure
|
| +//
|
| +// This file is distributed under the University of Illinois Open Source
|
| +// License. See LICENSE.TXT for details.
|
| +//
|
| +//===----------------------------------------------------------------------===//
|
| +//
|
| +// This tool may be invoked in the following manner:
|
| +// pnacl-bccompress [options] bcin.pexe -o bcout.pexe
|
| +// - Read frozen PNaCl bitcode from the bcin.pexe and introduce
|
| +// abbreviations to compress it into bcout.pexe.
|
| +//
|
| +// Options:
|
| +// --help - Output information about command line switches
|
| +//
|
| +// This tool analyzes the data in bcin.pexe, and determines what
|
| +// abbreviations can be added to compress the bitcode file. The result
|
| +// is written to bcout.pexe.
|
| +//
|
| +// A bitcode file has two types of abbreviations. The first are Global
|
| +// abbreviations that apply to all instances of a particular type of
|
| +// block. These abbreviations appear in the BlockInfo block of the
|
| +// bitcode file.
|
| +//
|
| +// The second type of abbreviations are local to a particular instance
|
| +// of a block.
|
| +//
|
| +// In pnacl-bccompress, for simplicity, we will only add global
|
| +// abbreviations. Local abbreviations are converted to corresponding
|
| +// global abbreviations, so that they can be added as global
|
| +// abbreviations.
|
| +//
|
| +// The process of compressing is done by reading the input file
|
| +// twice. In the first round, the records are read and analyzed,
|
| +// generating a set of (global) abbreviations to use to generate the
|
| +// compressed output file. Then, the input file is read again, and for
|
| +// each record, the best fitting global abbreviation is found (or it
|
| +// figures out that leaving the record unabbreviated is best) and
|
| +// writes the record out accordingly.
|
| +//
|
| +// TODO(kschimpf): The current implementation does a trivial solution
|
| +// for the first round. It just converts all abbreviations (local and
|
| +// global) into global abbreviations. Future refinements of this file
|
| +// will generate better (and more intelligent) global abbreviations,
|
| +// based on the records found on the first read of the bitcode file.
|
| +//
|
| +//===----------------------------------------------------------------------===//
|
| +
|
| +#include "llvm/ADT/DenseMap.h"
|
| +#include "llvm/ADT/DenseSet.h"
|
| +#include "llvm/ADT/SmallVector.h"
|
| +#include "llvm/Bitcode/NaCl/AbbrevTrieNode.h"
|
| +#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
|
| +#include "llvm/Bitcode/NaCl/NaClBitcodeParser.h"
|
| +#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
|
| +#include "llvm/Bitcode/NaCl/NaClBitstreamWriter.h"
|
| +#include "llvm/Bitcode/NaCl/NaClCompressBlockDist.h"
|
| +#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
|
| +#include "llvm/Support/CommandLine.h"
|
| +#include "llvm/Support/FileSystem.h"
|
| +#include "llvm/Support/Format.h"
|
| +#include "llvm/Support/ManagedStatic.h"
|
| +#include "llvm/Support/MemoryBuffer.h"
|
| +#include "llvm/Support/PrettyStackTrace.h"
|
| +#include "llvm/Support/Signals.h"
|
| +#include "llvm/Support/ToolOutputFile.h"
|
| +#include "llvm/Support/raw_ostream.h"
|
| +#include <set>
|
| +#include <map>
|
| +#include <system_error>
|
| +
|
| +namespace {
|
| +
|
| +using namespace llvm;
|
| +
|
| +static cl::opt<bool>
|
| +TraceGeneratedAbbreviations(
|
| + "abbreviations",
|
| + cl::desc("Trace abbreviations added to compressed file"),
|
| + cl::init(false));
|
| +
|
| +static cl::opt<std::string>
|
| + InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
|
| +
|
| +
|
| +static cl::opt<std::string>
|
| +OutputFilename("o", cl::desc("Specify output filename"),
|
| + cl::value_desc("filename"), cl::init("-"));
|
| +
|
| +static cl::opt<bool>
|
| +ShowValueDistributions(
|
| + "show-distributions",
|
| + cl::desc("Show collected value distributions in bitcode records. "
|
| + "Turns off compression."),
|
| + cl::init(false));
|
| +
|
| +static cl::opt<bool>
|
| +ShowAbbrevLookupTries(
|
| + "show-lookup-tries",
|
| + cl::desc("Show lookup tries used to minimize search for \n"
|
| + "matching abbreviations."),
|
| + cl::init(false));
|
| +
|
| +static cl::opt<bool>
|
| +ShowAbbreviationFrequencies(
|
| + "show-abbreviation-frequencies",
|
| + cl::desc("Show how often each abbreviation is used. "
|
| + "Turns off compression."),
|
| + cl::init(false));
|
| +
|
| +// Note: When this flag is true, we still generate new abbreviations,
|
| +// because we don't want to add the complexity of turning it off.
|
| +// Rather, we simply make sure abbreviations are ignored when writing
|
| +// out the final copy.
|
| +static cl::opt<bool>
|
| +RemoveAbbreviations(
|
| + "remove-abbreviations",
|
| + cl::desc("Remove abbreviations from input bitcode file."),
|
| + cl::init(false));
|
| +
|
| +/// Error - All bitcode analysis errors go through this function,
|
| +/// making this a good place to breakpoint if debugging.
|
| +static bool Error(const std::string &Err) {
|
| + errs() << Err << "\n";
|
| + return true;
|
| +}
|
| +
|
| +// Prints out the abbreviation in readable form to the given Stream.
|
| +static void PrintAbbrev(raw_ostream &Stream,
|
| + unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
|
| + Stream << "Abbrev(block " << BlockID << "): ";
|
| + Abbrev->Print(Stream);
|
| +}
|
| +
|
| +// Reads the input file into the given buffer.
|
| +static bool ReadAndBuffer(std::unique_ptr<MemoryBuffer> &MemBuf) {
|
| + ErrorOr<std::unique_ptr<MemoryBuffer>> ErrOrFile =
|
| + MemoryBuffer::getFileOrSTDIN(InputFilename);
|
| + if (std::error_code EC = ErrOrFile.getError())
|
| + return Error("Error reading '" + InputFilename + "': " + EC.message());
|
| +
|
| + MemBuf.reset(ErrOrFile.get().release());
|
| + if (MemBuf->getBufferSize() % 4 != 0)
|
| + return Error("Bitcode stream should be a multiple of 4 bytes in length");
|
| + return false;
|
| +}
|
| +
|
| +/// type map from bitstream abbreviation index to corresponding
|
| +/// internal abbreviation index.
|
| +typedef std::map<unsigned, unsigned> BitstreamToInternalAbbrevMapType;
|
| +
|
| +/// Defines a mapping from bitstream abbreviation indices, to
|
| +/// corresponding internal abbreviation indices.
|
| +class AbbrevBitstreamToInternalMap {
|
| +public:
|
| +
|
| + AbbrevBitstreamToInternalMap()
|
| + : NextBitstreamAbbrevIndex(0) {}
|
| +
|
| + /// Returns the bitstream abbreviaion index that will be associated
|
| + /// with the next internal abbreviation index.
|
| + unsigned GetNextBitstreamAbbrevIndex() {
|
| + return NextBitstreamAbbrevIndex;
|
| + }
|
| +
|
| + /// Changes the next bitstream abbreviation index to the given value.
|
| + void SetNextBitstreamAbbrevIndex(unsigned NextIndex) {
|
| + NextBitstreamAbbrevIndex = NextIndex;
|
| + }
|
| +
|
| + /// Returns true if there is an internal abbreviation index for the
|
| + /// given bitstream abbreviation.
|
| + bool DefinesBitstreamAbbrevIndex(unsigned Index) {
|
| + return BitstreamToInternalAbbrevMap.find(Index) !=
|
| + BitstreamToInternalAbbrevMap.end();
|
| + }
|
| +
|
| + /// Returns the internal abbreviation index for the given bitstream
|
| + /// abbreviation index.
|
| + unsigned GetInternalAbbrevIndex(unsigned Index) {
|
| + return BitstreamToInternalAbbrevMap.at(Index);
|
| + }
|
| +
|
| + /// Installs the given internal abbreviation index using the next
|
| + /// available bitstream abbreviation index.
|
| + void InstallNewBitstreamAbbrevIndex(unsigned InternalIndex) {
|
| + BitstreamToInternalAbbrevMap[NextBitstreamAbbrevIndex++] = InternalIndex;
|
| + }
|
| +
|
| +private:
|
| + // The index of the next bitstream abbreviation to be defined.
|
| + unsigned NextBitstreamAbbrevIndex;
|
| + // Map from bitstream abbreviation index to corresponding internal
|
| + // abbreviation index.
|
| + BitstreamToInternalAbbrevMapType BitstreamToInternalAbbrevMap;
|
| +};
|
| +
|
| +/// Defines the list of abbreviations associated with a block.
|
| +class BlockAbbrevs {
|
| +public:
|
| + // Vector to hold the (ordered) list of abbreviations.
|
| + typedef SmallVector<NaClBitCodeAbbrev*, 32> AbbrevVector;
|
| +
|
| + BlockAbbrevs(unsigned BlockID)
|
| + : BlockID(BlockID) {
|
| + // backfill internal indices that don't correspond to bitstream
|
| + // application abbreviations, so that added abbreviations have
|
| + // valid abbreviation indices.
|
| + for (unsigned i = 0; i < naclbitc::FIRST_APPLICATION_ABBREV; ++i) {
|
| + // Make all non-application abbreviations look like the default
|
| + // abbreviation.
|
| + NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();
|
| + Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
|
| + Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
|
| + Abbrevs.push_back(Abbrev);
|
| + }
|
| + GlobalAbbrevBitstreamToInternalMap.
|
| + SetNextBitstreamAbbrevIndex(Abbrevs.size());
|
| + }
|
| +
|
| + ~BlockAbbrevs() {
|
| + for (AbbrevVector::const_iterator
|
| + Iter = Abbrevs.begin(), IterEnd = Abbrevs.end();
|
| + Iter != IterEnd; ++Iter) {
|
| + (*Iter)->dropRef();
|
| + }
|
| + for (AbbrevLookupSizeMap::const_iterator
|
| + Iter = LookupMap.begin(), IterEnd = LookupMap.end();
|
| + Iter != IterEnd; ++Iter) {
|
| + delete Iter->second;
|
| + }
|
| + }
|
| +
|
| + // Constant used to denote that a given abbreviation is not in the
|
| + // set of abbreviations defined by the block.
|
| + static const int NO_SUCH_ABBREVIATION = -1;
|
| +
|
| + // Returns the index to the corresponding application abbreviation,
|
| + // if it exists. Otherwise return NO_SUCH_ABBREVIATION;
|
| + int FindAbbreviation(const NaClBitCodeAbbrev *Abbrev) const {
|
| + for (unsigned i = GetFirstApplicationAbbreviation();
|
| + i < GetNumberAbbreviations(); ++i) {
|
| + if (*Abbrevs[i] == *Abbrev) return i;
|
| + }
|
| + return NO_SUCH_ABBREVIATION;
|
| + }
|
| +
|
| + /// Adds the given abbreviation to the set of global abbreviations
|
| + /// defined for the block. Guarantees that duplicate abbreviations
|
| + /// are not added to the block. Note: Code takes ownership of
|
| + /// the given abbreviation. Returns true if new abbreviation.
|
| + /// Updates Index to the index where the abbreviation is located
|
| + /// in the set of abbreviations.
|
| + bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev, int &Index) {
|
| + Index = FindAbbreviation(Abbrev);
|
| + if (Index != NO_SUCH_ABBREVIATION) {
|
| + // Already defined, don't install.
|
| + Abbrev->dropRef();
|
| + return false;
|
| + }
|
| +
|
| + // New abbreviation. Add.
|
| + Index = Abbrevs.size();
|
| + Abbrevs.push_back(Abbrev);
|
| + return true;
|
| + }
|
| +
|
| + /// Adds the given abbreviation to the set of global abbreviations
|
| + /// defined for the block. Guarantees that duplicate abbreviations
|
| + /// are not added to the block. Note: Code takes ownership of
|
| + /// the given abbreviation. Returns true if new abbreviation.
|
| + bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev) {
|
| + int Index;
|
| + return AddAbbreviation(Abbrev, Index);
|
| + }
|
| +
|
| + /// The block ID associated with the block.
|
| + unsigned GetBlockID() const {
|
| + return BlockID;
|
| + }
|
| +
|
| + /// Returns the index of the frist application abbreviation for the
|
| + /// block.
|
| + unsigned GetFirstApplicationAbbreviation() const {
|
| + return naclbitc::FIRST_APPLICATION_ABBREV;
|
| + }
|
| +
|
| + // Returns the number of abbreviations associated with the block.
|
| + unsigned GetNumberAbbreviations() const {
|
| + return Abbrevs.size();
|
| + }
|
| +
|
| + /// Returns the abbreviation associated with the given abbreviation
|
| + /// index.
|
| + NaClBitCodeAbbrev *GetIndexedAbbrev(unsigned index) {
|
| + if (index >= Abbrevs.size()) return 0;
|
| + return Abbrevs[index];
|
| + }
|
| +
|
| + // Builds the corresponding fast lookup map for finding abbreviations
|
| + // that applies to abbreviations in the block
|
| + void BuildAbbrevLookupSizeMap() {
|
| + NaClBuildAbbrevLookupMap(GetLookupMap(),
|
| + GetAbbrevs(),
|
| + GetFirstApplicationAbbreviation());
|
| + if (ShowAbbrevLookupTries) PrintLookupMap(errs());
|
| + }
|
| +
|
| + AbbrevBitstreamToInternalMap &GetGlobalAbbrevBitstreamToInternalMap() {
|
| + return GlobalAbbrevBitstreamToInternalMap;
|
| + }
|
| +
|
| + AbbrevLookupSizeMap &GetLookupMap() {
|
| + return LookupMap;
|
| + }
|
| +
|
| + // Returns lower level vector of abbreviations.
|
| + const AbbrevVector &GetAbbrevs() const {
|
| + return Abbrevs;
|
| + }
|
| +
|
| + // Returns the abbreviation (index) to use for the corresponding
|
| + // record, based on the abbreviations of this block. Note: Assumes
|
| + // that BuildAbbrevLookupSizeMap has already been called.
|
| + unsigned GetRecordAbbrevIndex(const NaClBitcodeRecordData &Record) {
|
| + unsigned BestIndex = 0; // Ignored unless found candidate.
|
| + unsigned BestScore = 0; // Number of bits associated with BestIndex.
|
| + bool FoundCandidate = false;
|
| + NaClBitcodeValues Values(Record);
|
| + size_t Size = Values.size();
|
| +
|
| + if (Size > NaClValueIndexCutoff) Size = NaClValueIndexCutoff+1;
|
| + AbbrevLookupSizeMap::const_iterator Pos = LookupMap.find(Size);
|
| + if (Pos != LookupMap.end()) {
|
| + if (const AbbrevTrieNode *Node = Pos->second) {
|
| + if (const AbbrevTrieNode *MatchNode =
|
| + Node->MatchRecord(Record)) {
|
| + const std::set<AbbrevIndexPair> &Abbreviations =
|
| + MatchNode->GetAbbreviations();
|
| + for (std::set<AbbrevIndexPair>::const_iterator
|
| + Iter = Abbreviations.begin(),
|
| + IterEnd = Abbreviations.end();
|
| + Iter != IterEnd; ++Iter) {
|
| + uint64_t NumBits = 0;
|
| + if (CanUseAbbreviation(Values, Iter->second, NumBits)) {
|
| + if (!FoundCandidate || NumBits < BestScore) {
|
| + // Use this as candidate.
|
| + BestIndex = Iter->first;
|
| + BestScore = NumBits;
|
| + FoundCandidate = true;
|
| + }
|
| + }
|
| + }
|
| + }
|
| + }
|
| + }
|
| + if (FoundCandidate && BestScore <= UnabbreviatedSize(Record)) {
|
| + return BestIndex;
|
| + }
|
| + return naclbitc::UNABBREV_RECORD;
|
| + }
|
| +
|
| + // Computes the number of bits that will be generated by the
|
| + // corresponding read record, if no abbreviation is used.
|
| + static uint64_t UnabbreviatedSize(const NaClBitcodeRecordData &Record) {
|
| + uint64_t NumBits = MatchVBRBits(Record.Code, DefaultVBRBits);
|
| + size_t NumValues = Record.Values.size();
|
| + NumBits += MatchVBRBits(NumValues, DefaultVBRBits);
|
| + for (size_t Index = 0; Index < NumValues; ++Index) {
|
| + NumBits += MatchVBRBits(Record.Values[Index], DefaultVBRBits);
|
| + }
|
| + return NumBits;
|
| + }
|
| +
|
| + // Returns true if the given abbreviation can be used to represent the
|
| + // record. Sets NumBits to the number of bits the abbreviation will
|
| + // generate. Note: Value of NumBits is undefined if this function
|
| + // return false.
|
| + static bool CanUseAbbreviation(NaClBitcodeValues &Values,
|
| + NaClBitCodeAbbrev *Abbrev, uint64_t &NumBits) {
|
| + NumBits = 0;
|
| + unsigned OpIndex = 0;
|
| + unsigned OpIndexEnd = Abbrev->getNumOperandInfos();
|
| + size_t ValueIndex = 0;
|
| + size_t ValueIndexEnd = Values.size();
|
| + while (ValueIndex < ValueIndexEnd && OpIndex < OpIndexEnd) {
|
| + const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(OpIndex);
|
| + switch (Op.getEncoding()) {
|
| + case NaClBitCodeAbbrevOp::Literal:
|
| + if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
|
| + ++ValueIndex;
|
| + ++OpIndex;
|
| + continue;
|
| + } else {
|
| + return false;
|
| + }
|
| + case NaClBitCodeAbbrevOp::Array: {
|
| + assert(OpIndex+2 == OpIndexEnd);
|
| + const NaClBitCodeAbbrevOp &ElmtOp =
|
| + Abbrev->getOperandInfo(OpIndex+1);
|
| +
|
| + // Add size of array.
|
| + NumBits += MatchVBRBits(Values.size()-ValueIndex, DefaultVBRBits);
|
| +
|
| + // Add size of each field.
|
| + for (; ValueIndex != ValueIndexEnd; ++ValueIndex) {
|
| + uint64_t FieldBits=0;
|
| + if (!CanUseSimpleAbbrevOp(ElmtOp, Values[ValueIndex], FieldBits)) {
|
| + return false;
|
| + }
|
| + NumBits += FieldBits;
|
| + }
|
| + return true;
|
| + }
|
| + default: {
|
| + if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
|
| + ++ValueIndex;
|
| + ++OpIndex;
|
| + break;
|
| + }
|
| + return false;
|
| + }
|
| + }
|
| + }
|
| + return ValueIndex == ValueIndexEnd && OpIndex == OpIndexEnd;
|
| + }
|
| +
|
| + // Returns true if the given abbreviation Op defines a single value,
|
| + // and can be applied to the given Val. Adds the number of bits the
|
| + // abbreviation Op will generate to NumBits if Op applies.
|
| + static bool CanUseSimpleAbbrevOp(const NaClBitCodeAbbrevOp &Op,
|
| + uint64_t Val,
|
| + uint64_t &NumBits) {
|
| + switch (Op.getEncoding()) {
|
| + case NaClBitCodeAbbrevOp::Literal:
|
| + return Val == Op.getValue();
|
| + case NaClBitCodeAbbrevOp::Array:
|
| + return false;
|
| + case NaClBitCodeAbbrevOp::Fixed: {
|
| + uint64_t Width = Op.getValue();
|
| + if (!MatchFixedBits(Val, Width))
|
| + return false;
|
| + NumBits += Width;
|
| + return true;
|
| + }
|
| + case NaClBitCodeAbbrevOp::VBR:
|
| + if (unsigned Width = MatchVBRBits(Val, Op.getValue())) {
|
| + NumBits += Width;
|
| + return true;
|
| + } else {
|
| + return false;
|
| + }
|
| + case NaClBitCodeAbbrevOp::Char6:
|
| + if (!NaClBitCodeAbbrevOp::isChar6(Val)) return false;
|
| + NumBits += 6;
|
| + return true;
|
| + }
|
| + llvm_unreachable("unhandled NaClBitCodeAbbrevOp encoding");
|
| + }
|
| +
|
| + // Returns true if the given Val can be represented by abbreviation
|
| + // operand Fixed(Width).
|
| + static bool MatchFixedBits(uint64_t Val, unsigned Width) {
|
| + // Note: The reader only allows up to 32 bits for fixed values.
|
| + if (Val & Mask32) return false;
|
| + if (Val & ~(~0U >> (32-Width))) return false;
|
| + return true;
|
| + }
|
| +
|
| + // Returns the number of bits needed to represent Val by abbreviation
|
| + // operand VBR(Width). Note: Returns 0 if Val can't be represented
|
| + // by VBR(Width).
|
| + static unsigned MatchVBRBits(uint64_t Val, unsigned Width) {
|
| + if (Width == 0) return 0;
|
| + unsigned NumBits = 0;
|
| + while (1) {
|
| + // values emitted Width-1 bits at a time (plus a continue bit).
|
| + NumBits += Width;
|
| + if ((Val & (1U << (Width-1))) == 0)
|
| + return NumBits;
|
| + Val >>= Width-1;
|
| + }
|
| + }
|
| +
|
| +private:
|
| + // Defines the number of bits used to print VBR array field values.
|
| + static const unsigned DefaultVBRBits = 6;
|
| + // Masks out the top-32 bits of a uint64_t value.
|
| + static const uint64_t Mask32 = 0xFFFFFFFF00000000;
|
| + // The block ID for which abbreviations are being associated.
|
| + unsigned BlockID;
|
| + // The list of abbreviations defined for the block.
|
| + AbbrevVector Abbrevs;
|
| + // The mapping from global bitstream abbreviations to the corresponding
|
| + // block abbreviation index (in Abbrevs).
|
| + AbbrevBitstreamToInternalMap GlobalAbbrevBitstreamToInternalMap;
|
| + // A fast lookup map for finding the abbreviation that applies
|
| + // to a record.
|
| + AbbrevLookupSizeMap LookupMap;
|
| +
|
| + void PrintLookupMap(raw_ostream &Stream) {
|
| + Stream << "------------------------------\n";
|
| + Stream << "Block " << GetBlockID() << " abbreviation tries:\n";
|
| + bool IsFirstIteration = true;
|
| + for (AbbrevLookupSizeMap::const_iterator
|
| + Iter = LookupMap.begin(), IterEnd = LookupMap.end();
|
| + Iter != IterEnd; ++Iter) {
|
| + if (IsFirstIteration)
|
| + IsFirstIteration = false;
|
| + else
|
| + Stream << "-----\n";
|
| + if (Iter->second) {
|
| + Stream << "Index " << Iter->first << ":\n";
|
| + Iter->second->Print(Stream, " ");
|
| + }
|
| + }
|
| + Stream << "------------------------------\n";
|
| + }
|
| +};
|
| +
|
| +/// Defines a map from block ID's to the corresponding abbreviation
|
| +/// map to use.
|
| +typedef DenseMap<unsigned, BlockAbbrevs*> BlockAbbrevsMapType;
|
| +
|
| +/// Parses the bitcode file, analyzes it, and generates the
|
| +/// corresponding lists of global abbreviations to use in the
|
| +/// generated (compressed) bitcode file.
|
| +class NaClAnalyzeParser : public NaClBitcodeParser {
|
| + NaClAnalyzeParser(const NaClAnalyzeParser&)
|
| + LLVM_DELETED_FUNCTION;
|
| + void operator=(const NaClAnalyzeParser&)
|
| + LLVM_DELETED_FUNCTION;
|
| +
|
| +public:
|
| + // Creates the analysis parser, which will fill the given
|
| + // BlockAbbrevsMap with appropriate abbreviations, after
|
| + // analyzing the bitcode file defined by Cursor.
|
| + NaClAnalyzeParser(NaClBitstreamCursor &Cursor,
|
| + BlockAbbrevsMapType &BlockAbbrevsMap)
|
| + : NaClBitcodeParser(Cursor),
|
| + BlockAbbrevsMap(BlockAbbrevsMap),
|
| + BlockDist(&NaClCompressBlockDistElement::Sentinel),
|
| + AbbrevListener(this)
|
| + {
|
| + SetListener(&AbbrevListener);
|
| + }
|
| +
|
| + virtual ~NaClAnalyzeParser() {}
|
| +
|
| + virtual bool Error(const std::string &Message) {
|
| + // Use local error routine so that all errors are treated uniformly.
|
| + return ::Error(Message);
|
| + }
|
| +
|
| + virtual bool ParseBlock(unsigned BlockID);
|
| +
|
| + // Mapping from block ID's to the corresponding list of abbreviations
|
| + // associated with that block.
|
| + BlockAbbrevsMapType &BlockAbbrevsMap;
|
| +
|
| + // Nested distribution capturing distribution of records in bitcode file.
|
| + NaClBitcodeBlockDist BlockDist;
|
| +
|
| + // Listener used to get abbreviations as they are read.
|
| + NaClBitcodeParserListener AbbrevListener;
|
| +};
|
| +
|
| +class NaClBlockAnalyzeParser : public NaClBitcodeParser {
|
| + NaClBlockAnalyzeParser(const NaClBlockAnalyzeParser&)
|
| + LLVM_DELETED_FUNCTION;
|
| + void operator=(NaClBlockAnalyzeParser&)
|
| + LLVM_DELETED_FUNCTION;
|
| +
|
| +public:
|
| + /// Top-level constructor to build the top-level block with the
|
| + /// given BlockID, and collect data (for compression) in that block.
|
| + NaClBlockAnalyzeParser(unsigned BlockID,
|
| + NaClAnalyzeParser *Context)
|
| + : NaClBitcodeParser(BlockID, Context), Context(Context) {
|
| + Init();
|
| + }
|
| +
|
| + virtual ~NaClBlockAnalyzeParser() {
|
| + Context->BlockDist.AddBlock(GetBlock());
|
| + }
|
| +
|
| +protected:
|
| + /// Nested constructor to parse a block within a block. Creates a
|
| + /// block parser to parse a block with the given BlockID, and
|
| + /// collect data (for compression) in that block.
|
| + NaClBlockAnalyzeParser(unsigned BlockID,
|
| + NaClBlockAnalyzeParser *EnclosingParser)
|
| + : NaClBitcodeParser(BlockID, EnclosingParser),
|
| + Context(EnclosingParser->Context) {
|
| + Init();
|
| + }
|
| +
|
| +public:
|
| + virtual bool Error(const std::string &Message) {
|
| + // Use local error routine so that all errors are treated uniformly.
|
| + return ::Error(Message);
|
| + }
|
| +
|
| + virtual bool ParseBlock(unsigned BlockID) {
|
| + NaClBlockAnalyzeParser Parser(BlockID, this);
|
| + return Parser.ParseThisBlock();
|
| + }
|
| +
|
| + virtual void ProcessRecord() {
|
| + // Before processing the record, we need to rename the abbreviation
|
| + // index, so that we can look it up in the set of block abbreviations
|
| + // being defined.
|
| + if (Record.UsedAnAbbreviation()) {
|
| + unsigned AbbrevIndex = Record.GetAbbreviationIndex();
|
| + if (LocalAbbrevBitstreamToInternalMap.
|
| + DefinesBitstreamAbbrevIndex(AbbrevIndex)) {
|
| + Record.SetAbbreviationIndex(
|
| + LocalAbbrevBitstreamToInternalMap.
|
| + GetInternalAbbrevIndex(AbbrevIndex));
|
| + } else {
|
| + AbbrevBitstreamToInternalMap &GlobalAbbrevBitstreamToInternalMap =
|
| + GlobalBlockAbbrevs->GetGlobalAbbrevBitstreamToInternalMap();
|
| + if (GlobalAbbrevBitstreamToInternalMap.
|
| + DefinesBitstreamAbbrevIndex(AbbrevIndex)) {
|
| + Record.SetAbbreviationIndex(
|
| + GlobalAbbrevBitstreamToInternalMap.
|
| + GetInternalAbbrevIndex(AbbrevIndex));
|
| + } else {
|
| + report_fatal_error("Bad abbreviation index in file");
|
| + }
|
| + }
|
| + }
|
| +
|
| + cast<NaClCompressBlockDistElement>(
|
| + Context->BlockDist.GetElement(Record.GetBlockID()))
|
| + ->GetAbbrevDist().AddRecord(Record);
|
| + }
|
| +
|
| + virtual void ProcessAbbreviation(unsigned BlockID,
|
| + NaClBitCodeAbbrev *Abbrev,
|
| + bool IsLocal) {
|
| + int Index;
|
| + AddAbbreviation(BlockID, Abbrev->Simplify(), Index);
|
| + if (IsLocal) {
|
| + LocalAbbrevBitstreamToInternalMap.InstallNewBitstreamAbbrevIndex(Index);
|
| + } else {
|
| + GetGlobalAbbrevs(BlockID)->GetGlobalAbbrevBitstreamToInternalMap().
|
| + InstallNewBitstreamAbbrevIndex(Index);
|
| + }
|
| + }
|
| +
|
| +protected:
|
| + // The context (i.e. top-level) parser.
|
| + NaClAnalyzeParser *Context;
|
| +
|
| + // The global block abbreviations associated with this block.
|
| + BlockAbbrevs *GlobalBlockAbbrevs;
|
| +
|
| + // The local abbreviations associated with this block.
|
| + AbbrevBitstreamToInternalMap LocalAbbrevBitstreamToInternalMap;
|
| +
|
| + /// Returns the set of (global) block abbreviations defined for the
|
| + /// given block ID.
|
| + BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) {
|
| + BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID];
|
| + if (Abbrevs == 0) {
|
| + Abbrevs = new BlockAbbrevs(BlockID);
|
| + Context->BlockAbbrevsMap[BlockID] = Abbrevs;
|
| + }
|
| + return Abbrevs;
|
| + }
|
| +
|
| + void SetGlobalAbbrevs(unsigned BlockID, BlockAbbrevs *Abbrevs) {
|
| + Context->BlockAbbrevsMap[BlockID] = Abbrevs;
|
| + }
|
| +
|
| + // Adds the abbreviation to the list of abbreviations for the given
|
| + // block.
|
| + void AddAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev,
|
| + int &Index) {
|
| + // Get block abbreviations.
|
| + BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(BlockID);
|
| +
|
| + // Read abbreviation and add as a global abbreviation.
|
| + if (Abbrevs->AddAbbreviation(Abbrev, Index)
|
| + && TraceGeneratedAbbreviations) {
|
| + PrintAbbrev(errs(), BlockID, Abbrev);
|
| + }
|
| + }
|
| +
|
| + /// Finds the index to the corresponding internal block abbreviation
|
| + /// for the given abbreviation.
|
| + int FindAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
|
| + return GetGlobalAbbrevs(BlockID)->FindAbbreviation(Abbrev);
|
| + }
|
| +
|
| + void Init() {
|
| + GlobalBlockAbbrevs = GetGlobalAbbrevs(GetBlockID());
|
| + LocalAbbrevBitstreamToInternalMap.SetNextBitstreamAbbrevIndex(
|
| + GlobalBlockAbbrevs->
|
| + GetGlobalAbbrevBitstreamToInternalMap().GetNextBitstreamAbbrevIndex());
|
| + }
|
| +};
|
| +
|
| +bool NaClAnalyzeParser::ParseBlock(unsigned BlockID) {
|
| + NaClBlockAnalyzeParser Parser(BlockID, this);
|
| + return Parser.ParseThisBlock();
|
| +}
|
| +
|
| +/// Models the unrolling of an abbreviation into its sequence of
|
| +/// individual operators. That is, unrolling arrays to match the width
|
| +/// of the abbreviation.
|
| +///
|
| +/// For example, consider the abbreviation [Array(VBR(6))]. If the
|
| +/// distribution map has data for records of size 3, and the
|
| +/// distribution map suggests that a constant 4 appears as the second
|
| +/// element in the record, it is nontrivial to figure out how to
|
| +/// encorporate this into this abbrevation. Hence, we unroll the array
|
| +/// (3 times) to get [VBR(6), VBR(6), VBR(6), Array(VBR(6))]. To
|
| +/// update the second element to match the literal 4, we only need to
|
| +/// replace the second element in the unrolled abbreviation resulting
|
| +/// in [VBR(6), Lit(4), VBR(6), Array(VBR(6))].
|
| +///
|
| +/// After we have done appropriate substitutions, we can simplify the
|
| +/// unrolled abbreviation by calling method Restore.
|
| +///
|
| +/// Note: We unroll in the form that best matches the distribution
|
| +/// map. Hence, the code is stored as a separate operator. We also
|
| +/// keep the array abbreviation op, for untracked elements within the
|
| +/// distribution maps.
|
| +class UnrolledAbbreviation {
|
| + void operator=(const UnrolledAbbreviation&) LLVM_DELETED_FUNCTION;
|
| +public:
|
| + /// Unroll the given abbreviation, assuming it has the given size
|
| + /// (as specified in the distribution maps).
|
| + ///
|
| + /// If argument CanBeBigger is true, then we do not assume that we
|
| + /// can remove the trailing array when expanding, because the
|
| + /// actual size of the corresponding record using this abbreviation
|
| + /// may be bigger.
|
| + UnrolledAbbreviation(NaClBitCodeAbbrev *Abbrev, unsigned Size,
|
| + bool CanBeBigger = false)
|
| + : CodeOp(0) {
|
| + unsigned NextOp = 0;
|
| + UnrollAbbrevOp(CodeOp, Abbrev, NextOp);
|
| + --Size;
|
| + for (unsigned i = 0; i < Size; ++i) {
|
| + // Create slot and then fill with appropriate operator.
|
| + AbbrevOps.push_back(CodeOp);
|
| + UnrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp);
|
| + }
|
| + if (CanBeBigger) {
|
| + for (; NextOp < Abbrev->getNumOperandInfos(); ++NextOp) {
|
| + MoreOps.push_back(Abbrev->getOperandInfo(NextOp));
|
| + }
|
| + } else if (NextOp < Abbrev->getNumOperandInfos() &&
|
| + !Abbrev->getOperandInfo(NextOp).isArrayOp()) {
|
| + errs() << (Size+1) << ": ";
|
| + Abbrev->Print(errs());
|
| + llvm_unreachable("Malformed abbreviation/size pair");
|
| + }
|
| + }
|
| +
|
| + explicit UnrolledAbbreviation(const UnrolledAbbreviation &Abbrev)
|
| + : CodeOp(Abbrev.CodeOp),
|
| + AbbrevOps(Abbrev.AbbrevOps),
|
| + MoreOps(Abbrev.MoreOps) {
|
| + }
|
| +
|
| + /// Prints out the abbreviation modeled by the unrolled
|
| + /// abbreviation.
|
| + void Print(raw_ostream &Stream) const {
|
| + NaClBitCodeAbbrev *Abbrev = Restore(false);
|
| + Abbrev->Print(Stream);
|
| + Abbrev->dropRef();
|
| + }
|
| +
|
| + /// Converts the unrolled abbreviation back into a regular
|
| + /// abbreviation. If Simplify is true, we simplify the
|
| + /// unrolled abbreviation as well.
|
| + NaClBitCodeAbbrev *Restore(bool Simplify=true) const {
|
| + NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();
|
| + Abbrev->Add(CodeOp);
|
| + for (std::vector<NaClBitCodeAbbrevOp>::const_iterator
|
| + Iter = AbbrevOps.begin(), IterEnd = AbbrevOps.end();
|
| + Iter != IterEnd; ++Iter) {
|
| + Abbrev->Add(*Iter);
|
| + }
|
| + for (std::vector<NaClBitCodeAbbrevOp>::const_iterator
|
| + Iter = MoreOps.begin(), IterEnd = MoreOps.end();
|
| + Iter != IterEnd; ++Iter) {
|
| + Abbrev->Add(*Iter);
|
| + }
|
| + if (Simplify) {
|
| + NaClBitCodeAbbrev *SimpAbbrev = Abbrev->Simplify();
|
| + Abbrev->dropRef();
|
| + return SimpAbbrev;
|
| + } else {
|
| + return Abbrev;
|
| + }
|
| + }
|
| +
|
| + // The abbreviation used for the record code.
|
| + NaClBitCodeAbbrevOp CodeOp;
|
| +
|
| + // The abbreviations used for each tracked value index.
|
| + std::vector<NaClBitCodeAbbrevOp> AbbrevOps;
|
| +
|
| +private:
|
| + // Any remaining abbreviation operators not part of the unrolling.
|
| + std::vector<NaClBitCodeAbbrevOp> MoreOps;
|
| +
|
| + // Extracts out the next abbreviation operator from the abbreviation
|
| + // Abbrev, given the index NextOp, and assigns it to AbbrevOp
|
| + void UnrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp,
|
| + NaClBitCodeAbbrev *Abbrev,
|
| + unsigned &NextOp) {
|
| + assert(NextOp < Abbrev->getNumOperandInfos());
|
| + const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(NextOp);
|
| + if (Op.isArrayOp()) {
|
| + // Do not advance. The array operator assumes that all remaining
|
| + // elements should match its argument.
|
| + AbbrevOp = Abbrev->getOperandInfo(NextOp+1);
|
| + } else {
|
| + AbbrevOp = Op;
|
| + NextOp++;
|
| + }
|
| + }
|
| +};
|
| +
|
| +/// Models a candidate block abbreviation, which is a blockID, and the
|
| +/// corresponding abbreviation to be considered for addition. Note:
|
| +/// Constructors and assignment take ownership of the abbreviation.
|
| +class CandBlockAbbrev {
|
| +public:
|
| + CandBlockAbbrev(unsigned BlockID, NaClBitCodeAbbrev *Abbrev)
|
| + : BlockID(BlockID), Abbrev(Abbrev) {
|
| + }
|
| +
|
| + CandBlockAbbrev(const CandBlockAbbrev &BlockAbbrev)
|
| + : BlockID(BlockAbbrev.BlockID),
|
| + Abbrev(BlockAbbrev.Abbrev) {
|
| + Abbrev->addRef();
|
| + }
|
| +
|
| + void operator=(const CandBlockAbbrev &BlockAbbrev) {
|
| + Abbrev->dropRef();
|
| + BlockID = BlockAbbrev.BlockID;
|
| + Abbrev = BlockAbbrev.Abbrev;
|
| + Abbrev->addRef();
|
| + }
|
| +
|
| + ~CandBlockAbbrev() {
|
| + Abbrev->dropRef();
|
| + }
|
| +
|
| + /// The block ID of the candidate abbreviation.
|
| + unsigned GetBlockID() const {
|
| + return BlockID;
|
| + }
|
| +
|
| + /// The abbreviation of the candidate abbreviation.
|
| + const NaClBitCodeAbbrev *GetAbbrev() const {
|
| + return Abbrev;
|
| + }
|
| +
|
| + /// orders this against the candidate abbreviation.
|
| + int Compare(const CandBlockAbbrev &CandAbbrev) const {
|
| + unsigned diff = BlockID - CandAbbrev.BlockID;
|
| + if (diff) return diff;
|
| + return Abbrev->Compare(*CandAbbrev.Abbrev);
|
| + }
|
| +
|
| + /// Prints the candidate abbreviation to the given stream.
|
| + void Print(raw_ostream &Stream) const {
|
| + PrintAbbrev(Stream, BlockID, Abbrev);
|
| + }
|
| +
|
| +private:
|
| + // The block the abbreviation applies to.
|
| + unsigned BlockID;
|
| + // The candidate abbreviation.
|
| + NaClBitCodeAbbrev *Abbrev;
|
| +};
|
| +
|
| +static inline bool operator<(const CandBlockAbbrev &A1,
|
| + const CandBlockAbbrev &A2) {
|
| + return A1.Compare(A2) < 0;
|
| +}
|
| +
|
| +/// Models the set of candidate abbreviations being considered, and
|
| +/// the number of abbreviations associated with each candidate
|
| +/// Abbreviation.
|
| +///
|
| +/// Note: Because we may have abbreviation refinements of A->B->C and
|
| +/// A->D->C, we need to accumulate instance counts in such cases.
|
| +class CandidateAbbrevs {
|
| +public:
|
| + // Map from candidate abbreviations to the corresponding number of
|
| + // instances.
|
| + typedef std::map<CandBlockAbbrev, unsigned> AbbrevCountMap;
|
| + typedef AbbrevCountMap::const_iterator const_iterator;
|
| +
|
| + /// Creates an empty set of candidate abbreviations, to be
|
| + /// (potentially) added to the given set of block abbreviations.
|
| + /// Argument is the (global) block abbreviations map, which is
|
| + /// used to determine if it already exists.
|
| + CandidateAbbrevs(BlockAbbrevsMapType &BlockAbbrevsMap)
|
| + : BlockAbbrevsMap(BlockAbbrevsMap)
|
| + {}
|
| +
|
| + /// Adds the given (unrolled) abbreviation as a candidate
|
| + /// abbreviation to the given block. NumInstances is the number of
|
| + /// instances expected to use this candidate abbreviation. Returns
|
| + /// true if the corresponding candidate abbreviation is added to this
|
| + /// set of candidate abbreviations.
|
| + bool Add(unsigned BlockID,
|
| + UnrolledAbbreviation &UnrolledAbbrev,
|
| + unsigned NumInstances);
|
| +
|
| + /// Returns the list of candidate abbreviations in this set.
|
| + const AbbrevCountMap &GetAbbrevsMap() const {
|
| + return AbbrevsMap;
|
| + }
|
| +
|
| + /// Prints out the current contents of this set.
|
| + void Print(raw_ostream &Stream, const char *Title = "Candidates") const {
|
| + Stream << "-- " << Title << ": \n";
|
| + for (const_iterator Iter = AbbrevsMap.begin(), IterEnd = AbbrevsMap.end();
|
| + Iter != IterEnd; ++Iter) {
|
| + Stream << format("%12u", Iter->second) << ": ";
|
| + Iter->first.Print(Stream);
|
| + }
|
| + Stream << "--\n";
|
| + }
|
| +
|
| +private:
|
| + // The set of abbreviations and corresponding number instances.
|
| + AbbrevCountMap AbbrevsMap;
|
| +
|
| + // The map of (global) abbreviations already associated with each block.
|
| + BlockAbbrevsMapType &BlockAbbrevsMap;
|
| +};
|
| +
|
| +bool CandidateAbbrevs::Add(unsigned BlockID,
|
| + UnrolledAbbreviation &UnrolledAbbrev,
|
| + unsigned NumInstances) {
|
| + // Drop if it corresponds to an existing global abbreviation.
|
| + NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.Restore();
|
| + if (BlockAbbrevs* Abbrevs = BlockAbbrevsMap[BlockID]) {
|
| + if (Abbrevs->FindAbbreviation(Abbrev) !=
|
| + BlockAbbrevs::NO_SUCH_ABBREVIATION) {
|
| + Abbrev->dropRef();
|
| + return false;
|
| + }
|
| + }
|
| +
|
| + CandBlockAbbrev CandAbbrev(BlockID, Abbrev);
|
| + AbbrevCountMap::iterator Pos = AbbrevsMap.find(CandAbbrev);
|
| + if (Pos == AbbrevsMap.end()) {
|
| + AbbrevsMap[CandAbbrev] = NumInstances;
|
| + } else {
|
| + Pos->second += NumInstances;
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +// Look for new abbreviations in block BlockID, considering it was
|
| +// read with the given abbreviation Abbrev, and considering changing
|
| +// the abbreviation opererator for value Index. ValueDist is how
|
| +// values at Index are distributed. Any found abbreviations are added
|
| +// to the candidate abbreviations CandAbbrevs. Returns true only if we
|
| +// have added new candidate abbreviations to CandAbbrevs.
|
| +static bool AddNewAbbreviations(unsigned BlockID,
|
| + const UnrolledAbbreviation &Abbrev,
|
| + unsigned Index,
|
| + NaClBitcodeValueDist &ValueDist,
|
| + CandidateAbbrevs &CandAbbrevs) {
|
| + // TODO(kschimpf): Add code to try and find a better encoding for
|
| + // the values, based on the distribution.
|
| +
|
| + // If this index is already a literal abbreviation, no improvements can
|
| + // be made.
|
| + const NaClBitCodeAbbrevOp Op = Abbrev.AbbrevOps[Index];
|
| + if (Op.isLiteral()) return false;
|
| +
|
| + // Search based on sorted distribution, which sorts by number of
|
| + // instances. Start by trying to find possible constants to use.
|
| + const NaClBitcodeDist::Distribution *
|
| + Distribution = ValueDist.GetDistribution();
|
| + for (NaClBitcodeDist::Distribution::const_iterator
|
| + Iter = Distribution->begin(), IterEnd = Distribution->end();
|
| + Iter != IterEnd; ++Iter) {
|
| + NaClValueRangeType Range = GetNaClValueRange(Iter->second);
|
| + if (Range.first != Range.second) continue; // not a constant.
|
| +
|
| + // Defines a constant. Try as new candidate range. In addition,
|
| + // don't try any more constant values, since this is the one with
|
| + // the greatest number of instances.
|
| + NaClBitcodeDistElement *Elmt = ValueDist.at(Range.first);
|
| + UnrolledAbbreviation CandAbbrev(Abbrev);
|
| + CandAbbrev.AbbrevOps[Index] = NaClBitCodeAbbrevOp(Range.first);
|
| + return CandAbbrevs.Add(BlockID, CandAbbrev, Elmt->GetNumInstances());
|
| + }
|
| + return false;
|
| +}
|
| +
|
| +// Look for new abbreviations in block BlockID, considering it was
|
| +// read with the given abbreviation Abbrev. IndexDist is the
|
| +// corresponding distribution of value indices associated with the
|
| +// abbreviation. Any found abbreviations are added to the candidate
|
| +// abbreviations CandAbbrevs.
|
| +static void AddNewAbbreviations(unsigned BlockID,
|
| + const UnrolledAbbreviation &Abbrev,
|
| + NaClBitcodeDist &IndexDist,
|
| + CandidateAbbrevs &CandAbbrevs) {
|
| + // Search based on sorted distribution, which sorts based on heuristic
|
| + // of best index to fix first.
|
| + const NaClBitcodeDist::Distribution *
|
| + IndexDistribution = IndexDist.GetDistribution();
|
| + for (NaClBitcodeDist::Distribution::const_iterator
|
| + IndexIter = IndexDistribution->begin(),
|
| + IndexIterEnd = IndexDistribution->end();
|
| + IndexIter != IndexIterEnd; ++IndexIter) {
|
| + unsigned Index = static_cast<unsigned>(IndexIter->second);
|
| + if (AddNewAbbreviations(
|
| + BlockID, Abbrev, Index,
|
| + cast<NaClBitcodeValueIndexDistElement>(IndexDist.at(Index))
|
| + ->GetValueDist(),
|
| + CandAbbrevs)) {
|
| + return;
|
| + }
|
| + }
|
| +}
|
| +
|
| +// Look for new abbreviations in block BlockID, considering it was
|
| +// read with the given abbreviation Abbrev, and the given record Code.
|
| +// SizeDist is the corresponding distribution of sizes associated with
|
| +// the abbreviation. Any found abbreviations are added to the
|
| +// candidate abbreviations CandAbbrevs.
|
| +static void AddNewAbbreviations(unsigned BlockID,
|
| + NaClBitCodeAbbrev *Abbrev,
|
| + unsigned Code,
|
| + NaClBitcodeDist &SizeDist,
|
| + CandidateAbbrevs &CandAbbrevs) {
|
| + const NaClBitcodeDist::Distribution *
|
| + SizeDistribution = SizeDist.GetDistribution();
|
| + for (NaClBitcodeDist::Distribution::const_iterator
|
| + SizeIter = SizeDistribution->begin(),
|
| + SizeIterEnd = SizeDistribution->end();
|
| + SizeIter != SizeIterEnd; ++SizeIter) {
|
| + unsigned Size = static_cast<unsigned>(SizeIter->second);
|
| + UnrolledAbbreviation UnrolledAbbrev(Abbrev, Size+1 /* Add code! */,
|
| + Size >= NaClValueIndexCutoff);
|
| + if (!UnrolledAbbrev.CodeOp.isLiteral()) {
|
| + // Try making the code a literal.
|
| + UnrolledAbbreviation CandAbbrev(UnrolledAbbrev);
|
| + CandAbbrev.CodeOp = NaClBitCodeAbbrevOp(Code);
|
| + CandAbbrevs.Add(BlockID, CandAbbrev,
|
| + SizeDist.at(Size)->GetNumInstances());
|
| + }
|
| + // Now process value indices to find candidate abbreviations.
|
| + AddNewAbbreviations(
|
| + BlockID, UnrolledAbbrev,
|
| + cast<NaClBitcodeSizeDistElement>(SizeDist.at(Size))
|
| + ->GetValueIndexDist(),
|
| + CandAbbrevs);
|
| + }
|
| +}
|
| +
|
| +// Look for new abbreviations in block BlockID. Abbrevs is the map of
|
| +// read (globally defined) abbreviations associated with the
|
| +// BlockID. AbbrevDist is the distribution map of abbreviations
|
| +// associated with BlockID. Any found abbreviations are added to the
|
| +// candidate abbreviations CandAbbrevs.
|
| +static void AddNewAbbreviations(unsigned BlockID,
|
| + BlockAbbrevs *Abbrevs,
|
| + NaClBitcodeDist &AbbrevDist,
|
| + CandidateAbbrevs &CandAbbrevs) {
|
| + const NaClBitcodeDist::Distribution *
|
| + AbbrevDistribution = AbbrevDist.GetDistribution();
|
| + for (NaClBitcodeDist::Distribution::const_iterator
|
| + AbbrevIter = AbbrevDistribution->begin(),
|
| + AbbrevIterEnd = AbbrevDistribution->end();
|
| + AbbrevIter != AbbrevIterEnd; ++AbbrevIter) {
|
| + NaClBitcodeDistValue AbbrevIndex = AbbrevIter->second;
|
| + NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(AbbrevIndex);
|
| + NaClBitcodeAbbrevDistElement *AbbrevElmt =
|
| + cast<NaClBitcodeAbbrevDistElement>(AbbrevDist.at(AbbrevIndex));
|
| + NaClBitcodeDist &CodeDist = AbbrevElmt->GetCodeDist();
|
| +
|
| + const NaClBitcodeDist::Distribution *
|
| + CodeDistribution = CodeDist.GetDistribution();
|
| + for (NaClBitcodeDist::Distribution::const_iterator
|
| + CodeIter = CodeDistribution->begin(),
|
| + CodeIterEnd = CodeDistribution->end();
|
| + CodeIter != CodeIterEnd; ++CodeIter) {
|
| + unsigned Code = static_cast<unsigned>(CodeIter->second);
|
| + AddNewAbbreviations(
|
| + BlockID,
|
| + Abbrev,
|
| + Code,
|
| + cast<NaClCompressCodeDistElement>(CodeDist.at(CodeIter->second))
|
| + ->GetSizeDist(),
|
| + CandAbbrevs);
|
| + }
|
| + }
|
| +}
|
| +
|
| +typedef std::pair<unsigned, CandBlockAbbrev> CountedAbbrevType;
|
| +
|
| +// Look for new abbreviations in the given block distribution map
|
| +// BlockDist. BlockAbbrevsMap contains the set of read global
|
| +// abbreviations. Adds found candidate abbreviations to the set of
|
| +// known global abbreviations.
|
| +static void AddNewAbbreviations(NaClBitcodeBlockDist &BlockDist,
|
| + BlockAbbrevsMapType &BlockAbbrevsMap) {
|
| + CandidateAbbrevs CandAbbrevs(BlockAbbrevsMap);
|
| + // Start by collecting candidate abbreviations.
|
| + const NaClBitcodeDist::Distribution *
|
| + Distribution = BlockDist.GetDistribution();
|
| + for (NaClBitcodeDist::Distribution::const_iterator
|
| + Iter = Distribution->begin(), IterEnd = Distribution->end();
|
| + Iter != IterEnd; ++Iter) {
|
| + unsigned BlockID = static_cast<unsigned>(Iter->second);
|
| + AddNewAbbreviations(
|
| + BlockID,
|
| + BlockAbbrevsMap[BlockID],
|
| + cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))
|
| + ->GetAbbrevDist(),
|
| + CandAbbrevs);
|
| + }
|
| + // Install candidate abbreviations.
|
| + //
|
| + // Sort the candidate abbreviations by number of instances, so that
|
| + // if multiple abbreviations apply, the one with the largest number
|
| + // of instances will be chosen when compressing a file.
|
| + //
|
| + // For example, we may have just generated two abbreviations. The
|
| + // first has replaced the 3rd element with the constant 4 while the
|
| + // second replaced the 4th element with the constant 5. The first
|
| + // abbreviation can apply to 300 records while the second can apply
|
| + // to 1000 records. Assuming that both abbreviations shrink the
|
| + // record by the same number of bits, we clearly want the tool to
|
| + // choose the second abbreviation when selecting the abbreviation
|
| + // index to choose (via method GetRecordAbbrevIndex).
|
| + //
|
| + // Selecting the second is important in that abbreviation are
|
| + // refined by successive calls to this tool. We do not want to
|
| + // restrict downstream refinements prematurely. Hence, we want the
|
| + // tool to choose the abbreviation with the most candidates first.
|
| + //
|
| + // Since method GetRecordAbbrevIndex chooses the first abbreviation
|
| + // that generates the least number of bits, we clearly want to make
|
| + // sure that the second abbreviation occurs before the first.
|
| + std::vector<CountedAbbrevType> Candidates;
|
| + for (CandidateAbbrevs::const_iterator
|
| + Iter = CandAbbrevs.GetAbbrevsMap().begin(),
|
| + IterEnd = CandAbbrevs.GetAbbrevsMap().end();
|
| + Iter != IterEnd; ++Iter) {
|
| + Candidates.push_back(CountedAbbrevType(Iter->second,Iter->first));
|
| + }
|
| + std::sort(Candidates.begin(), Candidates.end());
|
| + std::vector<CountedAbbrevType>::const_reverse_iterator
|
| + Iter = Candidates.rbegin(), IterEnd = Candidates.rend();
|
| + if (Iter == IterEnd) return;
|
| +
|
| + if (TraceGeneratedAbbreviations) {
|
| + errs() << "-- New abbrevations:\n";
|
| + }
|
| + unsigned Min = (Iter->first >> 2);
|
| + for (; Iter != IterEnd; ++Iter) {
|
| + if (Iter->first < Min) break;
|
| + unsigned BlockID = Iter->second.GetBlockID();
|
| + BlockAbbrevs *Abbrevs = BlockAbbrevsMap[BlockID];
|
| + NaClBitCodeAbbrev *Abbrev = Iter->second.GetAbbrev()->Copy();
|
| + if (TraceGeneratedAbbreviations) {
|
| + errs() <<format("%12u", Iter->first) << ": ";
|
| + PrintAbbrev(errs(), BlockID, Abbrev);
|
| + }
|
| + Abbrevs->AddAbbreviation(Abbrev);
|
| + }
|
| + if (TraceGeneratedAbbreviations) {
|
| + errs() << "--\n";
|
| + }
|
| +}
|
| +
|
| +// Walks the block distribution (BlockDist), sorting entries based
|
| +// on the distribution of blocks and abbreviations, and then
|
| +// prints out the frequency of each abbreviation used.
|
| +static void DisplayAbbreviationFrequencies(
|
| + raw_ostream &Output,
|
| + const NaClBitcodeBlockDist &BlockDist,
|
| + const BlockAbbrevsMapType &BlockAbbrevsMap) {
|
| + const NaClBitcodeDist::Distribution *BlockDistribution =
|
| + BlockDist.GetDistribution();
|
| + for (NaClBitcodeDist::Distribution::const_iterator
|
| + BlockIter = BlockDistribution->begin(),
|
| + BlockIterEnd = BlockDistribution->end();
|
| + BlockIter != BlockIterEnd; ++BlockIter) {
|
| + unsigned BlockID = static_cast<unsigned>(BlockIter->second);
|
| + BlockAbbrevsMapType::const_iterator BlockPos = BlockAbbrevsMap.find(BlockID);
|
| + if (BlockPos == BlockAbbrevsMap.end()) continue;
|
| + Output << "Block " << BlockID << "\n";
|
| + if (NaClCompressBlockDistElement *BlockElement =
|
| + dyn_cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))) {
|
| + NaClBitcodeDist &AbbrevDist = BlockElement->GetAbbrevDist();
|
| + const NaClBitcodeDist::Distribution *AbbrevDistribution =
|
| + AbbrevDist.GetDistribution();
|
| + unsigned Total = AbbrevDist.GetTotal();
|
| + for (NaClBitcodeDist::Distribution::const_iterator
|
| + AbbrevIter = AbbrevDistribution->begin(),
|
| + AbbrevIterEnd = AbbrevDistribution->end();
|
| + AbbrevIter != AbbrevIterEnd; ++AbbrevIter) {
|
| + unsigned Index = static_cast<unsigned>(AbbrevIter->second);
|
| + unsigned Count = AbbrevDist.at(Index)->GetNumInstances();
|
| + Output << format("%8u (%6.2f%%): ", Count,
|
| + (double) Count/Total*100.0);
|
| + BlockPos->second->GetIndexedAbbrev(Index)->Print(Output);
|
| + }
|
| + }
|
| + Output << '\n';
|
| + }
|
| +}
|
| +
|
| +// Read in bitcode, analyze data, and figure out set of abbreviations
|
| +// to use, from memory buffer MemBuf containing the input bitcode file.
|
| +static bool AnalyzeBitcode(std::unique_ptr<MemoryBuffer> &MemBuf,
|
| + BlockAbbrevsMapType &BlockAbbrevsMap) {
|
| + // TODO(kschimpf): The current code only extracts abbreviations
|
| + // defined in the bitcode file. This code needs to be updated to
|
| + // collect data distributions and figure out better (global)
|
| + // abbreviations to use.
|
| +
|
| + const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
|
| + const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
|
| +
|
| + // First read header and verify it is good.
|
| + NaClBitcodeHeader Header;
|
| + if (Header.Read(BufPtr, EndBufPtr) || !Header.IsSupported())
|
| + return Error("Invalid PNaCl bitcode header");
|
| +
|
| + // Create a bitstream reader to read the bitcode file.
|
| + NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
|
| + NaClBitstreamCursor Stream(StreamFile);
|
| +
|
| + // Parse the the bitcode file.
|
| + NaClAnalyzeParser Parser(Stream, BlockAbbrevsMap);
|
| + while (!Stream.AtEndOfStream()) {
|
| + if (Parser.Parse()) return true;
|
| + }
|
| +
|
| + if (ShowAbbreviationFrequencies || ShowValueDistributions) {
|
| + std::error_code EC;
|
| + raw_fd_ostream Output(OutputFilename, EC, sys::fs::F_None);
|
| + if (EC) {
|
| + errs() << EC.message() << "\n";
|
| + exit(1);
|
| + }
|
| + if (ShowAbbreviationFrequencies)
|
| + DisplayAbbreviationFrequencies(Output, Parser.BlockDist, BlockAbbrevsMap);
|
| + if (ShowValueDistributions)
|
| + Parser.BlockDist.Print(Output);
|
| + }
|
| +
|
| + AddNewAbbreviations(Parser.BlockDist, BlockAbbrevsMap);
|
| + return false;
|
| +}
|
| +
|
| +/// Parses the input bitcode file and generates the corresponding
|
| +/// compressed bitcode file, by replacing abbreviations in the input
|
| +/// file with the corresponding abbreviations defined in
|
| +/// BlockAbbrevsMap.
|
| +class NaClBitcodeCopyParser : public NaClBitcodeParser {
|
| +public:
|
| + // Top-level constructor to build the appropriate block parser
|
| + // using the given BlockAbbrevsMap to define abbreviations.
|
| + NaClBitcodeCopyParser(NaClBitstreamCursor &Cursor,
|
| + BlockAbbrevsMapType &BlockAbbrevsMap,
|
| + NaClBitstreamWriter &Writer)
|
| + : NaClBitcodeParser(Cursor),
|
| + BlockAbbrevsMap(BlockAbbrevsMap),
|
| + Writer(Writer),
|
| + FoundFirstBlockInfo(false)
|
| + {}
|
| +
|
| + virtual ~NaClBitcodeCopyParser() {}
|
| +
|
| + virtual bool Error(const std::string &Message) {
|
| + // Use local error routine so that all errors are treated uniformly.
|
| + return ::Error(Message);
|
| + }
|
| +
|
| + virtual bool ParseBlock(unsigned BlockID);
|
| +
|
| + // The abbreviations to use for the copied bitcode.
|
| + BlockAbbrevsMapType &BlockAbbrevsMap;
|
| +
|
| + // The bitstream to copy the compressed bitcode into.
|
| + NaClBitstreamWriter &Writer;
|
| +
|
| + // True if we have already found the first block info block.
|
| + // Used to make sure we don't use abbreviations until we
|
| + // have put them into the bitcode file.
|
| + bool FoundFirstBlockInfo;
|
| +};
|
| +
|
| +class NaClBlockCopyParser : public NaClBitcodeParser {
|
| +public:
|
| + // Top-level constructor to build the appropriate block parser.
|
| + NaClBlockCopyParser(unsigned BlockID,
|
| + NaClBitcodeCopyParser *Context)
|
| + : NaClBitcodeParser(BlockID, Context),
|
| + Context(Context),
|
| + BlockAbbreviations(0)
|
| + {}
|
| +
|
| + virtual ~NaClBlockCopyParser() {}
|
| +
|
| +protected:
|
| + // The context defining state associated with the block parser.
|
| + NaClBitcodeCopyParser *Context;
|
| +
|
| + // The block abbreviations defined for this block (initialized by
|
| + // EnterBlock).
|
| + BlockAbbrevs *BlockAbbreviations;
|
| +
|
| + /// Constructor to parse nested blocks. Creates a block parser to
|
| + /// parse in a block with the given BlockID, and write the block
|
| + /// back out using the abbreviations in BlockAbbrevsMap.
|
| + NaClBlockCopyParser(unsigned BlockID,
|
| + NaClBlockCopyParser *EnclosingParser)
|
| + : NaClBitcodeParser(BlockID, EnclosingParser),
|
| + Context(EnclosingParser->Context)
|
| + {}
|
| +
|
| + virtual bool Error(const std::string &Message) {
|
| + // Use local error routine so that all errors are treated uniformly.
|
| + return ::Error(Message);
|
| + }
|
| +
|
| + /// Returns the set of (global) block abbreviations defined for the
|
| + /// given block ID.
|
| + BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) {
|
| + BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID];
|
| + if (Abbrevs == 0) {
|
| + Abbrevs = new BlockAbbrevs(BlockID);
|
| + Context->BlockAbbrevsMap[BlockID] = Abbrevs;
|
| + }
|
| + return Abbrevs;
|
| + }
|
| +
|
| + virtual bool ParseBlock(unsigned BlockID) {
|
| + NaClBlockCopyParser Parser(BlockID, this);
|
| + return Parser.ParseThisBlock();
|
| + }
|
| +
|
| + virtual void EnterBlock(unsigned NumWords) {
|
| + unsigned BlockID = GetBlockID();
|
| + BlockAbbreviations = GetGlobalAbbrevs(BlockID);
|
| +
|
| + // Enter the subblock.
|
| + NaClBitcodeSelectorAbbrev
|
| + Selector(BlockAbbreviations->GetNumberAbbreviations()-1);
|
| + if (RemoveAbbreviations) Selector = NaClBitcodeSelectorAbbrev();
|
| + Context->Writer.EnterSubblock(BlockID, Selector);
|
| +
|
| + // Note: We must dump module abbreviations as local
|
| + // abbreviations, because they are in a yet to be
|
| + // dumped BlockInfoBlock.
|
| + if (!RemoveAbbreviations && BlockID == naclbitc::MODULE_BLOCK_ID) {
|
| + BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(naclbitc::MODULE_BLOCK_ID);
|
| + for (unsigned i = 0; i < Abbrevs->GetNumberAbbreviations(); ++i) {
|
| + Context->Writer.EmitAbbrev(Abbrevs->GetIndexedAbbrev(i)->Copy());
|
| + }
|
| + }
|
| + }
|
| +
|
| + virtual void ExitBlock() {
|
| + Context->Writer.ExitBlock();
|
| + }
|
| +
|
| + virtual void ProcessBlockInfo() {
|
| + assert(!Context->FoundFirstBlockInfo &&
|
| + "Input bitcode has more that one BlockInfoBlock");
|
| + Context->FoundFirstBlockInfo = true;
|
| +
|
| + // Generate global abbreviations within a blockinfo block.
|
| + Context->Writer.EnterBlockInfoBlock();
|
| + if (!RemoveAbbreviations) {
|
| + for (BlockAbbrevsMapType::const_iterator
|
| + Iter = Context->BlockAbbrevsMap.begin(),
|
| + IterEnd = Context->BlockAbbrevsMap.end();
|
| + Iter != IterEnd; ++Iter) {
|
| + unsigned BlockID = Iter->first;
|
| + // Don't emit module abbreviations, since they have been
|
| + // emitted as local abbreviations.
|
| + if (BlockID == naclbitc::MODULE_BLOCK_ID) continue;
|
| +
|
| + BlockAbbrevs *Abbrevs = Iter->second;
|
| + if (Abbrevs == 0) continue;
|
| + for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation();
|
| + i < Abbrevs->GetNumberAbbreviations(); ++i) {
|
| + NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(i);
|
| + Context->Writer.EmitBlockInfoAbbrev(BlockID, Abbrev);
|
| + }
|
| + }
|
| + }
|
| + Context->Writer.ExitBlock();
|
| + }
|
| +
|
| + virtual void ProcessRecord() {
|
| + const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
|
| + if (RemoveAbbreviations) {
|
| + Context->Writer.EmitRecord(Record.GetCode(), Values, 0);
|
| + return;
|
| + }
|
| + // Find best fitting abbreviation to use, and print out the record
|
| + // using that abbreviations.
|
| + unsigned AbbrevIndex =
|
| + BlockAbbreviations->GetRecordAbbrevIndex(Record.GetRecordData());
|
| + if (AbbrevIndex == naclbitc::UNABBREV_RECORD) {
|
| + Context->Writer.EmitRecord(Record.GetCode(), Values, 0);
|
| + } else {
|
| + Context->Writer.EmitRecord(Record.GetCode(), Values, AbbrevIndex);
|
| + }
|
| + }
|
| +};
|
| +
|
| +bool NaClBitcodeCopyParser::ParseBlock(unsigned BlockID) {
|
| + NaClBlockCopyParser Parser(BlockID, this);
|
| + return Parser.ParseThisBlock();
|
| +}
|
| +
|
| +// Read in bitcode, and write it back out using the abbreviations in
|
| +// BlockAbbrevsMap, from memory buffer MemBuf containing the input
|
| +// bitcode file.
|
| +static bool CopyBitcode(std::unique_ptr<MemoryBuffer> &MemBuf,
|
| + BlockAbbrevsMapType &BlockAbbrevsMap) {
|
| +
|
| + const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
|
| + const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
|
| +
|
| + // Read header. No verification is needed since AnalyzeBitcode has
|
| + // already checked it.
|
| + NaClBitcodeHeader Header;
|
| + if (Header.Read(BufPtr, EndBufPtr))
|
| + return Error("Invalid PNaCl bitcode header");
|
| +
|
| + // Create the bitcode reader.
|
| + NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
|
| + NaClBitstreamCursor Stream(StreamFile);
|
| +
|
| + // Create the bitcode writer.
|
| + SmallVector<char, 0> OutputBuffer;
|
| + OutputBuffer.reserve(256*1024);
|
| + NaClBitstreamWriter StreamWriter(OutputBuffer);
|
| +
|
| + // Emit the file header.
|
| + NaClWriteHeader(Header, StreamWriter);
|
| +
|
| + // Set up the parser.
|
| + NaClBitcodeCopyParser Parser(Stream, BlockAbbrevsMap, StreamWriter);
|
| +
|
| + // Parse the bitcode and copy.
|
| + while (!Stream.AtEndOfStream()) {
|
| + if (Parser.Parse()) return true;
|
| + }
|
| +
|
| + // Write out the copied results.
|
| + std::error_code EC;
|
| + std::unique_ptr<tool_output_file> OutFile(
|
| + new tool_output_file(OutputFilename.c_str(), EC, sys::fs::F_None));
|
| + if (EC)
|
| + return Error(EC.message());
|
| +
|
| + // Write the generated bitstream to "Out".
|
| + OutFile->os().write((char*)&OutputBuffer.front(),
|
| + OutputBuffer.size());
|
| + OutFile->keep();
|
| +
|
| + return false;
|
| +}
|
| +
|
| +// Build fast lookup abbreviation maps for each of the abbreviation blocks
|
| +// defined in AbbrevsMap.
|
| +static void BuildAbbrevLookupMaps(BlockAbbrevsMapType &AbbrevsMap) {
|
| + for (BlockAbbrevsMapType::const_iterator
|
| + Iter = AbbrevsMap.begin(),
|
| + IterEnd = AbbrevsMap.end();
|
| + Iter != IterEnd; ++Iter) {
|
| + Iter->second->BuildAbbrevLookupSizeMap();
|
| + }
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| +int main(int argc, char **argv) {
|
| + // Print a stack trace if we signal out.
|
| + sys::PrintStackTraceOnErrorSignal();
|
| + PrettyStackTraceProgram X(argc, argv);
|
| + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
|
| + cl::ParseCommandLineOptions(argc, argv, "pnacl-bccompress file analyzer\n");
|
| +
|
| + std::unique_ptr<MemoryBuffer> MemBuf;
|
| + if (ReadAndBuffer(MemBuf)) return 1;
|
| + BlockAbbrevsMapType BlockAbbrevsMap;
|
| + if (AnalyzeBitcode(MemBuf, BlockAbbrevsMap)) return 1;
|
| + if (ShowAbbreviationFrequencies || ShowValueDistributions) {
|
| + return 0;
|
| + }
|
| + BuildAbbrevLookupMaps(BlockAbbrevsMap);
|
| + if (CopyBitcode(MemBuf, BlockAbbrevsMap)) return 1;
|
| + return 0;
|
| +}
|
|
|