OLD | NEW |
(Empty) | |
| 1 //===-- pnacl-bccompress.cpp - Bitcode (abbrev) compression ---------------===// |
| 2 // |
| 3 // The LLVM Compiler Infrastructure |
| 4 // |
| 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. |
| 7 // |
| 8 //===----------------------------------------------------------------------===// |
| 9 // |
| 10 // This tool may be invoked in the following manner: |
| 11 // pnacl-bccompress [options] bcin.pexe -o bcout.pexe |
| 12 // - Read frozen PNaCl bitcode from the bcin.pexe and introduce |
| 13 // abbreviations to compress it into bcout.pexe. |
| 14 // |
| 15 // Options: |
| 16 // --help - Output information about command line switches |
| 17 // |
| 18 // This tool analyzes the data in bcin.pexe, and determines what |
| 19 // abbreviations can be added to compress the bitcode file. The result |
| 20 // is written to bcout.pexe. |
| 21 // |
| 22 // A bitcode file has two types of abbreviations. The first are Global |
| 23 // abbreviations that apply to all instances of a particular type of |
| 24 // block. These abbreviations appear in the BlockInfo block of the |
| 25 // bitcode file. |
| 26 // |
| 27 // The second type of abbreviations are local to a particular instance |
| 28 // of a block. |
| 29 // |
| 30 // In pnacl-bccompress, for simplicity, we will only add global |
| 31 // abbreviations. Local abbreviations are converted to corresponding |
| 32 // global abbreviations, so that they can be added as global |
| 33 // abbreviations. |
| 34 // |
| 35 // The process of compressing is done by reading the input file |
| 36 // twice. In the first round, the records are read and analyzed, |
| 37 // generating a set of (global) abbreviations to use to generate the |
| 38 // compressed output file. Then, the input file is read again, and for |
| 39 // each record, the best fitting global abbreviation is found (or it |
| 40 // figures out that leaving the record unabbreviated is best) and |
| 41 // writes the record out accordingly. |
| 42 // |
| 43 // TODO(kschimpf): The current implementation does a trivial solution |
| 44 // for the first round. It just converts all abbreviations (local and |
| 45 // global) into global abbreviations. Future refinements of this file |
| 46 // will generate better (and more intelligent) global abbreviations, |
| 47 // based on the records found on the first read of the bitcode file. |
| 48 // |
| 49 //===----------------------------------------------------------------------===// |
| 50 |
| 51 #include "llvm/ADT/DenseMap.h" |
| 52 #include "llvm/ADT/DenseSet.h" |
| 53 #include "llvm/ADT/SmallVector.h" |
| 54 #include "llvm/Bitcode/NaCl/AbbrevTrieNode.h" |
| 55 #include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h" |
| 56 #include "llvm/Bitcode/NaCl/NaClBitcodeParser.h" |
| 57 #include "llvm/Bitcode/NaCl/NaClBitstreamReader.h" |
| 58 #include "llvm/Bitcode/NaCl/NaClBitstreamWriter.h" |
| 59 #include "llvm/Bitcode/NaCl/NaClCompressBlockDist.h" |
| 60 #include "llvm/Bitcode/NaCl/NaClReaderWriter.h" |
| 61 #include "llvm/Support/CommandLine.h" |
| 62 #include "llvm/Support/FileSystem.h" |
| 63 #include "llvm/Support/Format.h" |
| 64 #include "llvm/Support/ManagedStatic.h" |
| 65 #include "llvm/Support/MemoryBuffer.h" |
| 66 #include "llvm/Support/PrettyStackTrace.h" |
| 67 #include "llvm/Support/Signals.h" |
| 68 #include "llvm/Support/ToolOutputFile.h" |
| 69 #include "llvm/Support/raw_ostream.h" |
| 70 #include <set> |
| 71 #include <map> |
| 72 #include <system_error> |
| 73 |
| 74 namespace { |
| 75 |
| 76 using namespace llvm; |
| 77 |
| 78 static cl::opt<bool> |
| 79 TraceGeneratedAbbreviations( |
| 80 "abbreviations", |
| 81 cl::desc("Trace abbreviations added to compressed file"), |
| 82 cl::init(false)); |
| 83 |
| 84 static cl::opt<std::string> |
| 85 InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-")); |
| 86 |
| 87 |
| 88 static cl::opt<std::string> |
| 89 OutputFilename("o", cl::desc("Specify output filename"), |
| 90 cl::value_desc("filename"), cl::init("-")); |
| 91 |
| 92 static cl::opt<bool> |
| 93 ShowValueDistributions( |
| 94 "show-distributions", |
| 95 cl::desc("Show collected value distributions in bitcode records. " |
| 96 "Turns off compression."), |
| 97 cl::init(false)); |
| 98 |
| 99 static cl::opt<bool> |
| 100 ShowAbbrevLookupTries( |
| 101 "show-lookup-tries", |
| 102 cl::desc("Show lookup tries used to minimize search for \n" |
| 103 "matching abbreviations."), |
| 104 cl::init(false)); |
| 105 |
| 106 static cl::opt<bool> |
| 107 ShowAbbreviationFrequencies( |
| 108 "show-abbreviation-frequencies", |
| 109 cl::desc("Show how often each abbreviation is used. " |
| 110 "Turns off compression."), |
| 111 cl::init(false)); |
| 112 |
| 113 // Note: When this flag is true, we still generate new abbreviations, |
| 114 // because we don't want to add the complexity of turning it off. |
| 115 // Rather, we simply make sure abbreviations are ignored when writing |
| 116 // out the final copy. |
| 117 static cl::opt<bool> |
| 118 RemoveAbbreviations( |
| 119 "remove-abbreviations", |
| 120 cl::desc("Remove abbreviations from input bitcode file."), |
| 121 cl::init(false)); |
| 122 |
| 123 /// Error - All bitcode analysis errors go through this function, |
| 124 /// making this a good place to breakpoint if debugging. |
| 125 static bool Error(const std::string &Err) { |
| 126 errs() << Err << "\n"; |
| 127 return true; |
| 128 } |
| 129 |
| 130 // Prints out the abbreviation in readable form to the given Stream. |
| 131 static void PrintAbbrev(raw_ostream &Stream, |
| 132 unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { |
| 133 Stream << "Abbrev(block " << BlockID << "): "; |
| 134 Abbrev->Print(Stream); |
| 135 } |
| 136 |
| 137 // Reads the input file into the given buffer. |
| 138 static bool ReadAndBuffer(std::unique_ptr<MemoryBuffer> &MemBuf) { |
| 139 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrOrFile = |
| 140 MemoryBuffer::getFileOrSTDIN(InputFilename); |
| 141 if (std::error_code EC = ErrOrFile.getError()) |
| 142 return Error("Error reading '" + InputFilename + "': " + EC.message()); |
| 143 |
| 144 MemBuf.reset(ErrOrFile.get().release()); |
| 145 if (MemBuf->getBufferSize() % 4 != 0) |
| 146 return Error("Bitcode stream should be a multiple of 4 bytes in length"); |
| 147 return false; |
| 148 } |
| 149 |
| 150 /// type map from bitstream abbreviation index to corresponding |
| 151 /// internal abbreviation index. |
| 152 typedef std::map<unsigned, unsigned> BitstreamToInternalAbbrevMapType; |
| 153 |
| 154 /// Defines a mapping from bitstream abbreviation indices, to |
| 155 /// corresponding internal abbreviation indices. |
| 156 class AbbrevBitstreamToInternalMap { |
| 157 public: |
| 158 |
| 159 AbbrevBitstreamToInternalMap() |
| 160 : NextBitstreamAbbrevIndex(0) {} |
| 161 |
| 162 /// Returns the bitstream abbreviaion index that will be associated |
| 163 /// with the next internal abbreviation index. |
| 164 unsigned GetNextBitstreamAbbrevIndex() { |
| 165 return NextBitstreamAbbrevIndex; |
| 166 } |
| 167 |
| 168 /// Changes the next bitstream abbreviation index to the given value. |
| 169 void SetNextBitstreamAbbrevIndex(unsigned NextIndex) { |
| 170 NextBitstreamAbbrevIndex = NextIndex; |
| 171 } |
| 172 |
| 173 /// Returns true if there is an internal abbreviation index for the |
| 174 /// given bitstream abbreviation. |
| 175 bool DefinesBitstreamAbbrevIndex(unsigned Index) { |
| 176 return BitstreamToInternalAbbrevMap.find(Index) != |
| 177 BitstreamToInternalAbbrevMap.end(); |
| 178 } |
| 179 |
| 180 /// Returns the internal abbreviation index for the given bitstream |
| 181 /// abbreviation index. |
| 182 unsigned GetInternalAbbrevIndex(unsigned Index) { |
| 183 return BitstreamToInternalAbbrevMap.at(Index); |
| 184 } |
| 185 |
| 186 /// Installs the given internal abbreviation index using the next |
| 187 /// available bitstream abbreviation index. |
| 188 void InstallNewBitstreamAbbrevIndex(unsigned InternalIndex) { |
| 189 BitstreamToInternalAbbrevMap[NextBitstreamAbbrevIndex++] = InternalIndex; |
| 190 } |
| 191 |
| 192 private: |
| 193 // The index of the next bitstream abbreviation to be defined. |
| 194 unsigned NextBitstreamAbbrevIndex; |
| 195 // Map from bitstream abbreviation index to corresponding internal |
| 196 // abbreviation index. |
| 197 BitstreamToInternalAbbrevMapType BitstreamToInternalAbbrevMap; |
| 198 }; |
| 199 |
| 200 /// Defines the list of abbreviations associated with a block. |
| 201 class BlockAbbrevs { |
| 202 public: |
| 203 // Vector to hold the (ordered) list of abbreviations. |
| 204 typedef SmallVector<NaClBitCodeAbbrev*, 32> AbbrevVector; |
| 205 |
| 206 BlockAbbrevs(unsigned BlockID) |
| 207 : BlockID(BlockID) { |
| 208 // backfill internal indices that don't correspond to bitstream |
| 209 // application abbreviations, so that added abbreviations have |
| 210 // valid abbreviation indices. |
| 211 for (unsigned i = 0; i < naclbitc::FIRST_APPLICATION_ABBREV; ++i) { |
| 212 // Make all non-application abbreviations look like the default |
| 213 // abbreviation. |
| 214 NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev(); |
| 215 Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array)); |
| 216 Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); |
| 217 Abbrevs.push_back(Abbrev); |
| 218 } |
| 219 GlobalAbbrevBitstreamToInternalMap. |
| 220 SetNextBitstreamAbbrevIndex(Abbrevs.size()); |
| 221 } |
| 222 |
| 223 ~BlockAbbrevs() { |
| 224 for (AbbrevVector::const_iterator |
| 225 Iter = Abbrevs.begin(), IterEnd = Abbrevs.end(); |
| 226 Iter != IterEnd; ++Iter) { |
| 227 (*Iter)->dropRef(); |
| 228 } |
| 229 for (AbbrevLookupSizeMap::const_iterator |
| 230 Iter = LookupMap.begin(), IterEnd = LookupMap.end(); |
| 231 Iter != IterEnd; ++Iter) { |
| 232 delete Iter->second; |
| 233 } |
| 234 } |
| 235 |
| 236 // Constant used to denote that a given abbreviation is not in the |
| 237 // set of abbreviations defined by the block. |
| 238 static const int NO_SUCH_ABBREVIATION = -1; |
| 239 |
| 240 // Returns the index to the corresponding application abbreviation, |
| 241 // if it exists. Otherwise return NO_SUCH_ABBREVIATION; |
| 242 int FindAbbreviation(const NaClBitCodeAbbrev *Abbrev) const { |
| 243 for (unsigned i = GetFirstApplicationAbbreviation(); |
| 244 i < GetNumberAbbreviations(); ++i) { |
| 245 if (*Abbrevs[i] == *Abbrev) return i; |
| 246 } |
| 247 return NO_SUCH_ABBREVIATION; |
| 248 } |
| 249 |
| 250 /// Adds the given abbreviation to the set of global abbreviations |
| 251 /// defined for the block. Guarantees that duplicate abbreviations |
| 252 /// are not added to the block. Note: Code takes ownership of |
| 253 /// the given abbreviation. Returns true if new abbreviation. |
| 254 /// Updates Index to the index where the abbreviation is located |
| 255 /// in the set of abbreviations. |
| 256 bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev, int &Index) { |
| 257 Index = FindAbbreviation(Abbrev); |
| 258 if (Index != NO_SUCH_ABBREVIATION) { |
| 259 // Already defined, don't install. |
| 260 Abbrev->dropRef(); |
| 261 return false; |
| 262 } |
| 263 |
| 264 // New abbreviation. Add. |
| 265 Index = Abbrevs.size(); |
| 266 Abbrevs.push_back(Abbrev); |
| 267 return true; |
| 268 } |
| 269 |
| 270 /// Adds the given abbreviation to the set of global abbreviations |
| 271 /// defined for the block. Guarantees that duplicate abbreviations |
| 272 /// are not added to the block. Note: Code takes ownership of |
| 273 /// the given abbreviation. Returns true if new abbreviation. |
| 274 bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev) { |
| 275 int Index; |
| 276 return AddAbbreviation(Abbrev, Index); |
| 277 } |
| 278 |
| 279 /// The block ID associated with the block. |
| 280 unsigned GetBlockID() const { |
| 281 return BlockID; |
| 282 } |
| 283 |
| 284 /// Returns the index of the frist application abbreviation for the |
| 285 /// block. |
| 286 unsigned GetFirstApplicationAbbreviation() const { |
| 287 return naclbitc::FIRST_APPLICATION_ABBREV; |
| 288 } |
| 289 |
| 290 // Returns the number of abbreviations associated with the block. |
| 291 unsigned GetNumberAbbreviations() const { |
| 292 return Abbrevs.size(); |
| 293 } |
| 294 |
| 295 /// Returns the abbreviation associated with the given abbreviation |
| 296 /// index. |
| 297 NaClBitCodeAbbrev *GetIndexedAbbrev(unsigned index) { |
| 298 if (index >= Abbrevs.size()) return 0; |
| 299 return Abbrevs[index]; |
| 300 } |
| 301 |
| 302 // Builds the corresponding fast lookup map for finding abbreviations |
| 303 // that applies to abbreviations in the block |
| 304 void BuildAbbrevLookupSizeMap() { |
| 305 NaClBuildAbbrevLookupMap(GetLookupMap(), |
| 306 GetAbbrevs(), |
| 307 GetFirstApplicationAbbreviation()); |
| 308 if (ShowAbbrevLookupTries) PrintLookupMap(errs()); |
| 309 } |
| 310 |
| 311 AbbrevBitstreamToInternalMap &GetGlobalAbbrevBitstreamToInternalMap() { |
| 312 return GlobalAbbrevBitstreamToInternalMap; |
| 313 } |
| 314 |
| 315 AbbrevLookupSizeMap &GetLookupMap() { |
| 316 return LookupMap; |
| 317 } |
| 318 |
| 319 // Returns lower level vector of abbreviations. |
| 320 const AbbrevVector &GetAbbrevs() const { |
| 321 return Abbrevs; |
| 322 } |
| 323 |
| 324 // Returns the abbreviation (index) to use for the corresponding |
| 325 // record, based on the abbreviations of this block. Note: Assumes |
| 326 // that BuildAbbrevLookupSizeMap has already been called. |
| 327 unsigned GetRecordAbbrevIndex(const NaClBitcodeRecordData &Record) { |
| 328 unsigned BestIndex = 0; // Ignored unless found candidate. |
| 329 unsigned BestScore = 0; // Number of bits associated with BestIndex. |
| 330 bool FoundCandidate = false; |
| 331 NaClBitcodeValues Values(Record); |
| 332 size_t Size = Values.size(); |
| 333 |
| 334 if (Size > NaClValueIndexCutoff) Size = NaClValueIndexCutoff+1; |
| 335 AbbrevLookupSizeMap::const_iterator Pos = LookupMap.find(Size); |
| 336 if (Pos != LookupMap.end()) { |
| 337 if (const AbbrevTrieNode *Node = Pos->second) { |
| 338 if (const AbbrevTrieNode *MatchNode = |
| 339 Node->MatchRecord(Record)) { |
| 340 const std::set<AbbrevIndexPair> &Abbreviations = |
| 341 MatchNode->GetAbbreviations(); |
| 342 for (std::set<AbbrevIndexPair>::const_iterator |
| 343 Iter = Abbreviations.begin(), |
| 344 IterEnd = Abbreviations.end(); |
| 345 Iter != IterEnd; ++Iter) { |
| 346 uint64_t NumBits = 0; |
| 347 if (CanUseAbbreviation(Values, Iter->second, NumBits)) { |
| 348 if (!FoundCandidate || NumBits < BestScore) { |
| 349 // Use this as candidate. |
| 350 BestIndex = Iter->first; |
| 351 BestScore = NumBits; |
| 352 FoundCandidate = true; |
| 353 } |
| 354 } |
| 355 } |
| 356 } |
| 357 } |
| 358 } |
| 359 if (FoundCandidate && BestScore <= UnabbreviatedSize(Record)) { |
| 360 return BestIndex; |
| 361 } |
| 362 return naclbitc::UNABBREV_RECORD; |
| 363 } |
| 364 |
| 365 // Computes the number of bits that will be generated by the |
| 366 // corresponding read record, if no abbreviation is used. |
| 367 static uint64_t UnabbreviatedSize(const NaClBitcodeRecordData &Record) { |
| 368 uint64_t NumBits = MatchVBRBits(Record.Code, DefaultVBRBits); |
| 369 size_t NumValues = Record.Values.size(); |
| 370 NumBits += MatchVBRBits(NumValues, DefaultVBRBits); |
| 371 for (size_t Index = 0; Index < NumValues; ++Index) { |
| 372 NumBits += MatchVBRBits(Record.Values[Index], DefaultVBRBits); |
| 373 } |
| 374 return NumBits; |
| 375 } |
| 376 |
| 377 // Returns true if the given abbreviation can be used to represent the |
| 378 // record. Sets NumBits to the number of bits the abbreviation will |
| 379 // generate. Note: Value of NumBits is undefined if this function |
| 380 // return false. |
| 381 static bool CanUseAbbreviation(NaClBitcodeValues &Values, |
| 382 NaClBitCodeAbbrev *Abbrev, uint64_t &NumBits) { |
| 383 NumBits = 0; |
| 384 unsigned OpIndex = 0; |
| 385 unsigned OpIndexEnd = Abbrev->getNumOperandInfos(); |
| 386 size_t ValueIndex = 0; |
| 387 size_t ValueIndexEnd = Values.size(); |
| 388 while (ValueIndex < ValueIndexEnd && OpIndex < OpIndexEnd) { |
| 389 const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(OpIndex); |
| 390 switch (Op.getEncoding()) { |
| 391 case NaClBitCodeAbbrevOp::Literal: |
| 392 if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) { |
| 393 ++ValueIndex; |
| 394 ++OpIndex; |
| 395 continue; |
| 396 } else { |
| 397 return false; |
| 398 } |
| 399 case NaClBitCodeAbbrevOp::Array: { |
| 400 assert(OpIndex+2 == OpIndexEnd); |
| 401 const NaClBitCodeAbbrevOp &ElmtOp = |
| 402 Abbrev->getOperandInfo(OpIndex+1); |
| 403 |
| 404 // Add size of array. |
| 405 NumBits += MatchVBRBits(Values.size()-ValueIndex, DefaultVBRBits); |
| 406 |
| 407 // Add size of each field. |
| 408 for (; ValueIndex != ValueIndexEnd; ++ValueIndex) { |
| 409 uint64_t FieldBits=0; |
| 410 if (!CanUseSimpleAbbrevOp(ElmtOp, Values[ValueIndex], FieldBits)) { |
| 411 return false; |
| 412 } |
| 413 NumBits += FieldBits; |
| 414 } |
| 415 return true; |
| 416 } |
| 417 default: { |
| 418 if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) { |
| 419 ++ValueIndex; |
| 420 ++OpIndex; |
| 421 break; |
| 422 } |
| 423 return false; |
| 424 } |
| 425 } |
| 426 } |
| 427 return ValueIndex == ValueIndexEnd && OpIndex == OpIndexEnd; |
| 428 } |
| 429 |
| 430 // Returns true if the given abbreviation Op defines a single value, |
| 431 // and can be applied to the given Val. Adds the number of bits the |
| 432 // abbreviation Op will generate to NumBits if Op applies. |
| 433 static bool CanUseSimpleAbbrevOp(const NaClBitCodeAbbrevOp &Op, |
| 434 uint64_t Val, |
| 435 uint64_t &NumBits) { |
| 436 switch (Op.getEncoding()) { |
| 437 case NaClBitCodeAbbrevOp::Literal: |
| 438 return Val == Op.getValue(); |
| 439 case NaClBitCodeAbbrevOp::Array: |
| 440 return false; |
| 441 case NaClBitCodeAbbrevOp::Fixed: { |
| 442 uint64_t Width = Op.getValue(); |
| 443 if (!MatchFixedBits(Val, Width)) |
| 444 return false; |
| 445 NumBits += Width; |
| 446 return true; |
| 447 } |
| 448 case NaClBitCodeAbbrevOp::VBR: |
| 449 if (unsigned Width = MatchVBRBits(Val, Op.getValue())) { |
| 450 NumBits += Width; |
| 451 return true; |
| 452 } else { |
| 453 return false; |
| 454 } |
| 455 case NaClBitCodeAbbrevOp::Char6: |
| 456 if (!NaClBitCodeAbbrevOp::isChar6(Val)) return false; |
| 457 NumBits += 6; |
| 458 return true; |
| 459 } |
| 460 llvm_unreachable("unhandled NaClBitCodeAbbrevOp encoding"); |
| 461 } |
| 462 |
| 463 // Returns true if the given Val can be represented by abbreviation |
| 464 // operand Fixed(Width). |
| 465 static bool MatchFixedBits(uint64_t Val, unsigned Width) { |
| 466 // Note: The reader only allows up to 32 bits for fixed values. |
| 467 if (Val & Mask32) return false; |
| 468 if (Val & ~(~0U >> (32-Width))) return false; |
| 469 return true; |
| 470 } |
| 471 |
| 472 // Returns the number of bits needed to represent Val by abbreviation |
| 473 // operand VBR(Width). Note: Returns 0 if Val can't be represented |
| 474 // by VBR(Width). |
| 475 static unsigned MatchVBRBits(uint64_t Val, unsigned Width) { |
| 476 if (Width == 0) return 0; |
| 477 unsigned NumBits = 0; |
| 478 while (1) { |
| 479 // values emitted Width-1 bits at a time (plus a continue bit). |
| 480 NumBits += Width; |
| 481 if ((Val & (1U << (Width-1))) == 0) |
| 482 return NumBits; |
| 483 Val >>= Width-1; |
| 484 } |
| 485 } |
| 486 |
| 487 private: |
| 488 // Defines the number of bits used to print VBR array field values. |
| 489 static const unsigned DefaultVBRBits = 6; |
| 490 // Masks out the top-32 bits of a uint64_t value. |
| 491 static const uint64_t Mask32 = 0xFFFFFFFF00000000; |
| 492 // The block ID for which abbreviations are being associated. |
| 493 unsigned BlockID; |
| 494 // The list of abbreviations defined for the block. |
| 495 AbbrevVector Abbrevs; |
| 496 // The mapping from global bitstream abbreviations to the corresponding |
| 497 // block abbreviation index (in Abbrevs). |
| 498 AbbrevBitstreamToInternalMap GlobalAbbrevBitstreamToInternalMap; |
| 499 // A fast lookup map for finding the abbreviation that applies |
| 500 // to a record. |
| 501 AbbrevLookupSizeMap LookupMap; |
| 502 |
| 503 void PrintLookupMap(raw_ostream &Stream) { |
| 504 Stream << "------------------------------\n"; |
| 505 Stream << "Block " << GetBlockID() << " abbreviation tries:\n"; |
| 506 bool IsFirstIteration = true; |
| 507 for (AbbrevLookupSizeMap::const_iterator |
| 508 Iter = LookupMap.begin(), IterEnd = LookupMap.end(); |
| 509 Iter != IterEnd; ++Iter) { |
| 510 if (IsFirstIteration) |
| 511 IsFirstIteration = false; |
| 512 else |
| 513 Stream << "-----\n"; |
| 514 if (Iter->second) { |
| 515 Stream << "Index " << Iter->first << ":\n"; |
| 516 Iter->second->Print(Stream, " "); |
| 517 } |
| 518 } |
| 519 Stream << "------------------------------\n"; |
| 520 } |
| 521 }; |
| 522 |
| 523 /// Defines a map from block ID's to the corresponding abbreviation |
| 524 /// map to use. |
| 525 typedef DenseMap<unsigned, BlockAbbrevs*> BlockAbbrevsMapType; |
| 526 |
| 527 /// Parses the bitcode file, analyzes it, and generates the |
| 528 /// corresponding lists of global abbreviations to use in the |
| 529 /// generated (compressed) bitcode file. |
| 530 class NaClAnalyzeParser : public NaClBitcodeParser { |
| 531 NaClAnalyzeParser(const NaClAnalyzeParser&) |
| 532 LLVM_DELETED_FUNCTION; |
| 533 void operator=(const NaClAnalyzeParser&) |
| 534 LLVM_DELETED_FUNCTION; |
| 535 |
| 536 public: |
| 537 // Creates the analysis parser, which will fill the given |
| 538 // BlockAbbrevsMap with appropriate abbreviations, after |
| 539 // analyzing the bitcode file defined by Cursor. |
| 540 NaClAnalyzeParser(NaClBitstreamCursor &Cursor, |
| 541 BlockAbbrevsMapType &BlockAbbrevsMap) |
| 542 : NaClBitcodeParser(Cursor), |
| 543 BlockAbbrevsMap(BlockAbbrevsMap), |
| 544 BlockDist(&NaClCompressBlockDistElement::Sentinel), |
| 545 AbbrevListener(this) |
| 546 { |
| 547 SetListener(&AbbrevListener); |
| 548 } |
| 549 |
| 550 virtual ~NaClAnalyzeParser() {} |
| 551 |
| 552 virtual bool Error(const std::string &Message) { |
| 553 // Use local error routine so that all errors are treated uniformly. |
| 554 return ::Error(Message); |
| 555 } |
| 556 |
| 557 virtual bool ParseBlock(unsigned BlockID); |
| 558 |
| 559 // Mapping from block ID's to the corresponding list of abbreviations |
| 560 // associated with that block. |
| 561 BlockAbbrevsMapType &BlockAbbrevsMap; |
| 562 |
| 563 // Nested distribution capturing distribution of records in bitcode file. |
| 564 NaClBitcodeBlockDist BlockDist; |
| 565 |
| 566 // Listener used to get abbreviations as they are read. |
| 567 NaClBitcodeParserListener AbbrevListener; |
| 568 }; |
| 569 |
| 570 class NaClBlockAnalyzeParser : public NaClBitcodeParser { |
| 571 NaClBlockAnalyzeParser(const NaClBlockAnalyzeParser&) |
| 572 LLVM_DELETED_FUNCTION; |
| 573 void operator=(NaClBlockAnalyzeParser&) |
| 574 LLVM_DELETED_FUNCTION; |
| 575 |
| 576 public: |
| 577 /// Top-level constructor to build the top-level block with the |
| 578 /// given BlockID, and collect data (for compression) in that block. |
| 579 NaClBlockAnalyzeParser(unsigned BlockID, |
| 580 NaClAnalyzeParser *Context) |
| 581 : NaClBitcodeParser(BlockID, Context), Context(Context) { |
| 582 Init(); |
| 583 } |
| 584 |
| 585 virtual ~NaClBlockAnalyzeParser() { |
| 586 Context->BlockDist.AddBlock(GetBlock()); |
| 587 } |
| 588 |
| 589 protected: |
| 590 /// Nested constructor to parse a block within a block. Creates a |
| 591 /// block parser to parse a block with the given BlockID, and |
| 592 /// collect data (for compression) in that block. |
| 593 NaClBlockAnalyzeParser(unsigned BlockID, |
| 594 NaClBlockAnalyzeParser *EnclosingParser) |
| 595 : NaClBitcodeParser(BlockID, EnclosingParser), |
| 596 Context(EnclosingParser->Context) { |
| 597 Init(); |
| 598 } |
| 599 |
| 600 public: |
| 601 virtual bool Error(const std::string &Message) { |
| 602 // Use local error routine so that all errors are treated uniformly. |
| 603 return ::Error(Message); |
| 604 } |
| 605 |
| 606 virtual bool ParseBlock(unsigned BlockID) { |
| 607 NaClBlockAnalyzeParser Parser(BlockID, this); |
| 608 return Parser.ParseThisBlock(); |
| 609 } |
| 610 |
| 611 virtual void ProcessRecord() { |
| 612 // Before processing the record, we need to rename the abbreviation |
| 613 // index, so that we can look it up in the set of block abbreviations |
| 614 // being defined. |
| 615 if (Record.UsedAnAbbreviation()) { |
| 616 unsigned AbbrevIndex = Record.GetAbbreviationIndex(); |
| 617 if (LocalAbbrevBitstreamToInternalMap. |
| 618 DefinesBitstreamAbbrevIndex(AbbrevIndex)) { |
| 619 Record.SetAbbreviationIndex( |
| 620 LocalAbbrevBitstreamToInternalMap. |
| 621 GetInternalAbbrevIndex(AbbrevIndex)); |
| 622 } else { |
| 623 AbbrevBitstreamToInternalMap &GlobalAbbrevBitstreamToInternalMap = |
| 624 GlobalBlockAbbrevs->GetGlobalAbbrevBitstreamToInternalMap(); |
| 625 if (GlobalAbbrevBitstreamToInternalMap. |
| 626 DefinesBitstreamAbbrevIndex(AbbrevIndex)) { |
| 627 Record.SetAbbreviationIndex( |
| 628 GlobalAbbrevBitstreamToInternalMap. |
| 629 GetInternalAbbrevIndex(AbbrevIndex)); |
| 630 } else { |
| 631 report_fatal_error("Bad abbreviation index in file"); |
| 632 } |
| 633 } |
| 634 } |
| 635 |
| 636 cast<NaClCompressBlockDistElement>( |
| 637 Context->BlockDist.GetElement(Record.GetBlockID())) |
| 638 ->GetAbbrevDist().AddRecord(Record); |
| 639 } |
| 640 |
| 641 virtual void ProcessAbbreviation(unsigned BlockID, |
| 642 NaClBitCodeAbbrev *Abbrev, |
| 643 bool IsLocal) { |
| 644 int Index; |
| 645 AddAbbreviation(BlockID, Abbrev->Simplify(), Index); |
| 646 if (IsLocal) { |
| 647 LocalAbbrevBitstreamToInternalMap.InstallNewBitstreamAbbrevIndex(Index); |
| 648 } else { |
| 649 GetGlobalAbbrevs(BlockID)->GetGlobalAbbrevBitstreamToInternalMap(). |
| 650 InstallNewBitstreamAbbrevIndex(Index); |
| 651 } |
| 652 } |
| 653 |
| 654 protected: |
| 655 // The context (i.e. top-level) parser. |
| 656 NaClAnalyzeParser *Context; |
| 657 |
| 658 // The global block abbreviations associated with this block. |
| 659 BlockAbbrevs *GlobalBlockAbbrevs; |
| 660 |
| 661 // The local abbreviations associated with this block. |
| 662 AbbrevBitstreamToInternalMap LocalAbbrevBitstreamToInternalMap; |
| 663 |
| 664 /// Returns the set of (global) block abbreviations defined for the |
| 665 /// given block ID. |
| 666 BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) { |
| 667 BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID]; |
| 668 if (Abbrevs == 0) { |
| 669 Abbrevs = new BlockAbbrevs(BlockID); |
| 670 Context->BlockAbbrevsMap[BlockID] = Abbrevs; |
| 671 } |
| 672 return Abbrevs; |
| 673 } |
| 674 |
| 675 void SetGlobalAbbrevs(unsigned BlockID, BlockAbbrevs *Abbrevs) { |
| 676 Context->BlockAbbrevsMap[BlockID] = Abbrevs; |
| 677 } |
| 678 |
| 679 // Adds the abbreviation to the list of abbreviations for the given |
| 680 // block. |
| 681 void AddAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev, |
| 682 int &Index) { |
| 683 // Get block abbreviations. |
| 684 BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(BlockID); |
| 685 |
| 686 // Read abbreviation and add as a global abbreviation. |
| 687 if (Abbrevs->AddAbbreviation(Abbrev, Index) |
| 688 && TraceGeneratedAbbreviations) { |
| 689 PrintAbbrev(errs(), BlockID, Abbrev); |
| 690 } |
| 691 } |
| 692 |
| 693 /// Finds the index to the corresponding internal block abbreviation |
| 694 /// for the given abbreviation. |
| 695 int FindAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { |
| 696 return GetGlobalAbbrevs(BlockID)->FindAbbreviation(Abbrev); |
| 697 } |
| 698 |
| 699 void Init() { |
| 700 GlobalBlockAbbrevs = GetGlobalAbbrevs(GetBlockID()); |
| 701 LocalAbbrevBitstreamToInternalMap.SetNextBitstreamAbbrevIndex( |
| 702 GlobalBlockAbbrevs-> |
| 703 GetGlobalAbbrevBitstreamToInternalMap().GetNextBitstreamAbbrevIndex()); |
| 704 } |
| 705 }; |
| 706 |
| 707 bool NaClAnalyzeParser::ParseBlock(unsigned BlockID) { |
| 708 NaClBlockAnalyzeParser Parser(BlockID, this); |
| 709 return Parser.ParseThisBlock(); |
| 710 } |
| 711 |
| 712 /// Models the unrolling of an abbreviation into its sequence of |
| 713 /// individual operators. That is, unrolling arrays to match the width |
| 714 /// of the abbreviation. |
| 715 /// |
| 716 /// For example, consider the abbreviation [Array(VBR(6))]. If the |
| 717 /// distribution map has data for records of size 3, and the |
| 718 /// distribution map suggests that a constant 4 appears as the second |
| 719 /// element in the record, it is nontrivial to figure out how to |
| 720 /// encorporate this into this abbrevation. Hence, we unroll the array |
| 721 /// (3 times) to get [VBR(6), VBR(6), VBR(6), Array(VBR(6))]. To |
| 722 /// update the second element to match the literal 4, we only need to |
| 723 /// replace the second element in the unrolled abbreviation resulting |
| 724 /// in [VBR(6), Lit(4), VBR(6), Array(VBR(6))]. |
| 725 /// |
| 726 /// After we have done appropriate substitutions, we can simplify the |
| 727 /// unrolled abbreviation by calling method Restore. |
| 728 /// |
| 729 /// Note: We unroll in the form that best matches the distribution |
| 730 /// map. Hence, the code is stored as a separate operator. We also |
| 731 /// keep the array abbreviation op, for untracked elements within the |
| 732 /// distribution maps. |
| 733 class UnrolledAbbreviation { |
| 734 void operator=(const UnrolledAbbreviation&) LLVM_DELETED_FUNCTION; |
| 735 public: |
| 736 /// Unroll the given abbreviation, assuming it has the given size |
| 737 /// (as specified in the distribution maps). |
| 738 /// |
| 739 /// If argument CanBeBigger is true, then we do not assume that we |
| 740 /// can remove the trailing array when expanding, because the |
| 741 /// actual size of the corresponding record using this abbreviation |
| 742 /// may be bigger. |
| 743 UnrolledAbbreviation(NaClBitCodeAbbrev *Abbrev, unsigned Size, |
| 744 bool CanBeBigger = false) |
| 745 : CodeOp(0) { |
| 746 unsigned NextOp = 0; |
| 747 UnrollAbbrevOp(CodeOp, Abbrev, NextOp); |
| 748 --Size; |
| 749 for (unsigned i = 0; i < Size; ++i) { |
| 750 // Create slot and then fill with appropriate operator. |
| 751 AbbrevOps.push_back(CodeOp); |
| 752 UnrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp); |
| 753 } |
| 754 if (CanBeBigger) { |
| 755 for (; NextOp < Abbrev->getNumOperandInfos(); ++NextOp) { |
| 756 MoreOps.push_back(Abbrev->getOperandInfo(NextOp)); |
| 757 } |
| 758 } else if (NextOp < Abbrev->getNumOperandInfos() && |
| 759 !Abbrev->getOperandInfo(NextOp).isArrayOp()) { |
| 760 errs() << (Size+1) << ": "; |
| 761 Abbrev->Print(errs()); |
| 762 llvm_unreachable("Malformed abbreviation/size pair"); |
| 763 } |
| 764 } |
| 765 |
| 766 explicit UnrolledAbbreviation(const UnrolledAbbreviation &Abbrev) |
| 767 : CodeOp(Abbrev.CodeOp), |
| 768 AbbrevOps(Abbrev.AbbrevOps), |
| 769 MoreOps(Abbrev.MoreOps) { |
| 770 } |
| 771 |
| 772 /// Prints out the abbreviation modeled by the unrolled |
| 773 /// abbreviation. |
| 774 void Print(raw_ostream &Stream) const { |
| 775 NaClBitCodeAbbrev *Abbrev = Restore(false); |
| 776 Abbrev->Print(Stream); |
| 777 Abbrev->dropRef(); |
| 778 } |
| 779 |
| 780 /// Converts the unrolled abbreviation back into a regular |
| 781 /// abbreviation. If Simplify is true, we simplify the |
| 782 /// unrolled abbreviation as well. |
| 783 NaClBitCodeAbbrev *Restore(bool Simplify=true) const { |
| 784 NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev(); |
| 785 Abbrev->Add(CodeOp); |
| 786 for (std::vector<NaClBitCodeAbbrevOp>::const_iterator |
| 787 Iter = AbbrevOps.begin(), IterEnd = AbbrevOps.end(); |
| 788 Iter != IterEnd; ++Iter) { |
| 789 Abbrev->Add(*Iter); |
| 790 } |
| 791 for (std::vector<NaClBitCodeAbbrevOp>::const_iterator |
| 792 Iter = MoreOps.begin(), IterEnd = MoreOps.end(); |
| 793 Iter != IterEnd; ++Iter) { |
| 794 Abbrev->Add(*Iter); |
| 795 } |
| 796 if (Simplify) { |
| 797 NaClBitCodeAbbrev *SimpAbbrev = Abbrev->Simplify(); |
| 798 Abbrev->dropRef(); |
| 799 return SimpAbbrev; |
| 800 } else { |
| 801 return Abbrev; |
| 802 } |
| 803 } |
| 804 |
| 805 // The abbreviation used for the record code. |
| 806 NaClBitCodeAbbrevOp CodeOp; |
| 807 |
| 808 // The abbreviations used for each tracked value index. |
| 809 std::vector<NaClBitCodeAbbrevOp> AbbrevOps; |
| 810 |
| 811 private: |
| 812 // Any remaining abbreviation operators not part of the unrolling. |
| 813 std::vector<NaClBitCodeAbbrevOp> MoreOps; |
| 814 |
| 815 // Extracts out the next abbreviation operator from the abbreviation |
| 816 // Abbrev, given the index NextOp, and assigns it to AbbrevOp |
| 817 void UnrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp, |
| 818 NaClBitCodeAbbrev *Abbrev, |
| 819 unsigned &NextOp) { |
| 820 assert(NextOp < Abbrev->getNumOperandInfos()); |
| 821 const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(NextOp); |
| 822 if (Op.isArrayOp()) { |
| 823 // Do not advance. The array operator assumes that all remaining |
| 824 // elements should match its argument. |
| 825 AbbrevOp = Abbrev->getOperandInfo(NextOp+1); |
| 826 } else { |
| 827 AbbrevOp = Op; |
| 828 NextOp++; |
| 829 } |
| 830 } |
| 831 }; |
| 832 |
| 833 /// Models a candidate block abbreviation, which is a blockID, and the |
| 834 /// corresponding abbreviation to be considered for addition. Note: |
| 835 /// Constructors and assignment take ownership of the abbreviation. |
| 836 class CandBlockAbbrev { |
| 837 public: |
| 838 CandBlockAbbrev(unsigned BlockID, NaClBitCodeAbbrev *Abbrev) |
| 839 : BlockID(BlockID), Abbrev(Abbrev) { |
| 840 } |
| 841 |
| 842 CandBlockAbbrev(const CandBlockAbbrev &BlockAbbrev) |
| 843 : BlockID(BlockAbbrev.BlockID), |
| 844 Abbrev(BlockAbbrev.Abbrev) { |
| 845 Abbrev->addRef(); |
| 846 } |
| 847 |
| 848 void operator=(const CandBlockAbbrev &BlockAbbrev) { |
| 849 Abbrev->dropRef(); |
| 850 BlockID = BlockAbbrev.BlockID; |
| 851 Abbrev = BlockAbbrev.Abbrev; |
| 852 Abbrev->addRef(); |
| 853 } |
| 854 |
| 855 ~CandBlockAbbrev() { |
| 856 Abbrev->dropRef(); |
| 857 } |
| 858 |
| 859 /// The block ID of the candidate abbreviation. |
| 860 unsigned GetBlockID() const { |
| 861 return BlockID; |
| 862 } |
| 863 |
| 864 /// The abbreviation of the candidate abbreviation. |
| 865 const NaClBitCodeAbbrev *GetAbbrev() const { |
| 866 return Abbrev; |
| 867 } |
| 868 |
| 869 /// orders this against the candidate abbreviation. |
| 870 int Compare(const CandBlockAbbrev &CandAbbrev) const { |
| 871 unsigned diff = BlockID - CandAbbrev.BlockID; |
| 872 if (diff) return diff; |
| 873 return Abbrev->Compare(*CandAbbrev.Abbrev); |
| 874 } |
| 875 |
| 876 /// Prints the candidate abbreviation to the given stream. |
| 877 void Print(raw_ostream &Stream) const { |
| 878 PrintAbbrev(Stream, BlockID, Abbrev); |
| 879 } |
| 880 |
| 881 private: |
| 882 // The block the abbreviation applies to. |
| 883 unsigned BlockID; |
| 884 // The candidate abbreviation. |
| 885 NaClBitCodeAbbrev *Abbrev; |
| 886 }; |
| 887 |
| 888 static inline bool operator<(const CandBlockAbbrev &A1, |
| 889 const CandBlockAbbrev &A2) { |
| 890 return A1.Compare(A2) < 0; |
| 891 } |
| 892 |
| 893 /// Models the set of candidate abbreviations being considered, and |
| 894 /// the number of abbreviations associated with each candidate |
| 895 /// Abbreviation. |
| 896 /// |
| 897 /// Note: Because we may have abbreviation refinements of A->B->C and |
| 898 /// A->D->C, we need to accumulate instance counts in such cases. |
| 899 class CandidateAbbrevs { |
| 900 public: |
| 901 // Map from candidate abbreviations to the corresponding number of |
| 902 // instances. |
| 903 typedef std::map<CandBlockAbbrev, unsigned> AbbrevCountMap; |
| 904 typedef AbbrevCountMap::const_iterator const_iterator; |
| 905 |
| 906 /// Creates an empty set of candidate abbreviations, to be |
| 907 /// (potentially) added to the given set of block abbreviations. |
| 908 /// Argument is the (global) block abbreviations map, which is |
| 909 /// used to determine if it already exists. |
| 910 CandidateAbbrevs(BlockAbbrevsMapType &BlockAbbrevsMap) |
| 911 : BlockAbbrevsMap(BlockAbbrevsMap) |
| 912 {} |
| 913 |
| 914 /// Adds the given (unrolled) abbreviation as a candidate |
| 915 /// abbreviation to the given block. NumInstances is the number of |
| 916 /// instances expected to use this candidate abbreviation. Returns |
| 917 /// true if the corresponding candidate abbreviation is added to this |
| 918 /// set of candidate abbreviations. |
| 919 bool Add(unsigned BlockID, |
| 920 UnrolledAbbreviation &UnrolledAbbrev, |
| 921 unsigned NumInstances); |
| 922 |
| 923 /// Returns the list of candidate abbreviations in this set. |
| 924 const AbbrevCountMap &GetAbbrevsMap() const { |
| 925 return AbbrevsMap; |
| 926 } |
| 927 |
| 928 /// Prints out the current contents of this set. |
| 929 void Print(raw_ostream &Stream, const char *Title = "Candidates") const { |
| 930 Stream << "-- " << Title << ": \n"; |
| 931 for (const_iterator Iter = AbbrevsMap.begin(), IterEnd = AbbrevsMap.end(); |
| 932 Iter != IterEnd; ++Iter) { |
| 933 Stream << format("%12u", Iter->second) << ": "; |
| 934 Iter->first.Print(Stream); |
| 935 } |
| 936 Stream << "--\n"; |
| 937 } |
| 938 |
| 939 private: |
| 940 // The set of abbreviations and corresponding number instances. |
| 941 AbbrevCountMap AbbrevsMap; |
| 942 |
| 943 // The map of (global) abbreviations already associated with each block. |
| 944 BlockAbbrevsMapType &BlockAbbrevsMap; |
| 945 }; |
| 946 |
| 947 bool CandidateAbbrevs::Add(unsigned BlockID, |
| 948 UnrolledAbbreviation &UnrolledAbbrev, |
| 949 unsigned NumInstances) { |
| 950 // Drop if it corresponds to an existing global abbreviation. |
| 951 NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.Restore(); |
| 952 if (BlockAbbrevs* Abbrevs = BlockAbbrevsMap[BlockID]) { |
| 953 if (Abbrevs->FindAbbreviation(Abbrev) != |
| 954 BlockAbbrevs::NO_SUCH_ABBREVIATION) { |
| 955 Abbrev->dropRef(); |
| 956 return false; |
| 957 } |
| 958 } |
| 959 |
| 960 CandBlockAbbrev CandAbbrev(BlockID, Abbrev); |
| 961 AbbrevCountMap::iterator Pos = AbbrevsMap.find(CandAbbrev); |
| 962 if (Pos == AbbrevsMap.end()) { |
| 963 AbbrevsMap[CandAbbrev] = NumInstances; |
| 964 } else { |
| 965 Pos->second += NumInstances; |
| 966 } |
| 967 return true; |
| 968 } |
| 969 |
| 970 // Look for new abbreviations in block BlockID, considering it was |
| 971 // read with the given abbreviation Abbrev, and considering changing |
| 972 // the abbreviation opererator for value Index. ValueDist is how |
| 973 // values at Index are distributed. Any found abbreviations are added |
| 974 // to the candidate abbreviations CandAbbrevs. Returns true only if we |
| 975 // have added new candidate abbreviations to CandAbbrevs. |
| 976 static bool AddNewAbbreviations(unsigned BlockID, |
| 977 const UnrolledAbbreviation &Abbrev, |
| 978 unsigned Index, |
| 979 NaClBitcodeValueDist &ValueDist, |
| 980 CandidateAbbrevs &CandAbbrevs) { |
| 981 // TODO(kschimpf): Add code to try and find a better encoding for |
| 982 // the values, based on the distribution. |
| 983 |
| 984 // If this index is already a literal abbreviation, no improvements can |
| 985 // be made. |
| 986 const NaClBitCodeAbbrevOp Op = Abbrev.AbbrevOps[Index]; |
| 987 if (Op.isLiteral()) return false; |
| 988 |
| 989 // Search based on sorted distribution, which sorts by number of |
| 990 // instances. Start by trying to find possible constants to use. |
| 991 const NaClBitcodeDist::Distribution * |
| 992 Distribution = ValueDist.GetDistribution(); |
| 993 for (NaClBitcodeDist::Distribution::const_iterator |
| 994 Iter = Distribution->begin(), IterEnd = Distribution->end(); |
| 995 Iter != IterEnd; ++Iter) { |
| 996 NaClValueRangeType Range = GetNaClValueRange(Iter->second); |
| 997 if (Range.first != Range.second) continue; // not a constant. |
| 998 |
| 999 // Defines a constant. Try as new candidate range. In addition, |
| 1000 // don't try any more constant values, since this is the one with |
| 1001 // the greatest number of instances. |
| 1002 NaClBitcodeDistElement *Elmt = ValueDist.at(Range.first); |
| 1003 UnrolledAbbreviation CandAbbrev(Abbrev); |
| 1004 CandAbbrev.AbbrevOps[Index] = NaClBitCodeAbbrevOp(Range.first); |
| 1005 return CandAbbrevs.Add(BlockID, CandAbbrev, Elmt->GetNumInstances()); |
| 1006 } |
| 1007 return false; |
| 1008 } |
| 1009 |
| 1010 // Look for new abbreviations in block BlockID, considering it was |
| 1011 // read with the given abbreviation Abbrev. IndexDist is the |
| 1012 // corresponding distribution of value indices associated with the |
| 1013 // abbreviation. Any found abbreviations are added to the candidate |
| 1014 // abbreviations CandAbbrevs. |
| 1015 static void AddNewAbbreviations(unsigned BlockID, |
| 1016 const UnrolledAbbreviation &Abbrev, |
| 1017 NaClBitcodeDist &IndexDist, |
| 1018 CandidateAbbrevs &CandAbbrevs) { |
| 1019 // Search based on sorted distribution, which sorts based on heuristic |
| 1020 // of best index to fix first. |
| 1021 const NaClBitcodeDist::Distribution * |
| 1022 IndexDistribution = IndexDist.GetDistribution(); |
| 1023 for (NaClBitcodeDist::Distribution::const_iterator |
| 1024 IndexIter = IndexDistribution->begin(), |
| 1025 IndexIterEnd = IndexDistribution->end(); |
| 1026 IndexIter != IndexIterEnd; ++IndexIter) { |
| 1027 unsigned Index = static_cast<unsigned>(IndexIter->second); |
| 1028 if (AddNewAbbreviations( |
| 1029 BlockID, Abbrev, Index, |
| 1030 cast<NaClBitcodeValueIndexDistElement>(IndexDist.at(Index)) |
| 1031 ->GetValueDist(), |
| 1032 CandAbbrevs)) { |
| 1033 return; |
| 1034 } |
| 1035 } |
| 1036 } |
| 1037 |
| 1038 // Look for new abbreviations in block BlockID, considering it was |
| 1039 // read with the given abbreviation Abbrev, and the given record Code. |
| 1040 // SizeDist is the corresponding distribution of sizes associated with |
| 1041 // the abbreviation. Any found abbreviations are added to the |
| 1042 // candidate abbreviations CandAbbrevs. |
| 1043 static void AddNewAbbreviations(unsigned BlockID, |
| 1044 NaClBitCodeAbbrev *Abbrev, |
| 1045 unsigned Code, |
| 1046 NaClBitcodeDist &SizeDist, |
| 1047 CandidateAbbrevs &CandAbbrevs) { |
| 1048 const NaClBitcodeDist::Distribution * |
| 1049 SizeDistribution = SizeDist.GetDistribution(); |
| 1050 for (NaClBitcodeDist::Distribution::const_iterator |
| 1051 SizeIter = SizeDistribution->begin(), |
| 1052 SizeIterEnd = SizeDistribution->end(); |
| 1053 SizeIter != SizeIterEnd; ++SizeIter) { |
| 1054 unsigned Size = static_cast<unsigned>(SizeIter->second); |
| 1055 UnrolledAbbreviation UnrolledAbbrev(Abbrev, Size+1 /* Add code! */, |
| 1056 Size >= NaClValueIndexCutoff); |
| 1057 if (!UnrolledAbbrev.CodeOp.isLiteral()) { |
| 1058 // Try making the code a literal. |
| 1059 UnrolledAbbreviation CandAbbrev(UnrolledAbbrev); |
| 1060 CandAbbrev.CodeOp = NaClBitCodeAbbrevOp(Code); |
| 1061 CandAbbrevs.Add(BlockID, CandAbbrev, |
| 1062 SizeDist.at(Size)->GetNumInstances()); |
| 1063 } |
| 1064 // Now process value indices to find candidate abbreviations. |
| 1065 AddNewAbbreviations( |
| 1066 BlockID, UnrolledAbbrev, |
| 1067 cast<NaClBitcodeSizeDistElement>(SizeDist.at(Size)) |
| 1068 ->GetValueIndexDist(), |
| 1069 CandAbbrevs); |
| 1070 } |
| 1071 } |
| 1072 |
| 1073 // Look for new abbreviations in block BlockID. Abbrevs is the map of |
| 1074 // read (globally defined) abbreviations associated with the |
| 1075 // BlockID. AbbrevDist is the distribution map of abbreviations |
| 1076 // associated with BlockID. Any found abbreviations are added to the |
| 1077 // candidate abbreviations CandAbbrevs. |
| 1078 static void AddNewAbbreviations(unsigned BlockID, |
| 1079 BlockAbbrevs *Abbrevs, |
| 1080 NaClBitcodeDist &AbbrevDist, |
| 1081 CandidateAbbrevs &CandAbbrevs) { |
| 1082 const NaClBitcodeDist::Distribution * |
| 1083 AbbrevDistribution = AbbrevDist.GetDistribution(); |
| 1084 for (NaClBitcodeDist::Distribution::const_iterator |
| 1085 AbbrevIter = AbbrevDistribution->begin(), |
| 1086 AbbrevIterEnd = AbbrevDistribution->end(); |
| 1087 AbbrevIter != AbbrevIterEnd; ++AbbrevIter) { |
| 1088 NaClBitcodeDistValue AbbrevIndex = AbbrevIter->second; |
| 1089 NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(AbbrevIndex); |
| 1090 NaClBitcodeAbbrevDistElement *AbbrevElmt = |
| 1091 cast<NaClBitcodeAbbrevDistElement>(AbbrevDist.at(AbbrevIndex)); |
| 1092 NaClBitcodeDist &CodeDist = AbbrevElmt->GetCodeDist(); |
| 1093 |
| 1094 const NaClBitcodeDist::Distribution * |
| 1095 CodeDistribution = CodeDist.GetDistribution(); |
| 1096 for (NaClBitcodeDist::Distribution::const_iterator |
| 1097 CodeIter = CodeDistribution->begin(), |
| 1098 CodeIterEnd = CodeDistribution->end(); |
| 1099 CodeIter != CodeIterEnd; ++CodeIter) { |
| 1100 unsigned Code = static_cast<unsigned>(CodeIter->second); |
| 1101 AddNewAbbreviations( |
| 1102 BlockID, |
| 1103 Abbrev, |
| 1104 Code, |
| 1105 cast<NaClCompressCodeDistElement>(CodeDist.at(CodeIter->second)) |
| 1106 ->GetSizeDist(), |
| 1107 CandAbbrevs); |
| 1108 } |
| 1109 } |
| 1110 } |
| 1111 |
| 1112 typedef std::pair<unsigned, CandBlockAbbrev> CountedAbbrevType; |
| 1113 |
| 1114 // Look for new abbreviations in the given block distribution map |
| 1115 // BlockDist. BlockAbbrevsMap contains the set of read global |
| 1116 // abbreviations. Adds found candidate abbreviations to the set of |
| 1117 // known global abbreviations. |
| 1118 static void AddNewAbbreviations(NaClBitcodeBlockDist &BlockDist, |
| 1119 BlockAbbrevsMapType &BlockAbbrevsMap) { |
| 1120 CandidateAbbrevs CandAbbrevs(BlockAbbrevsMap); |
| 1121 // Start by collecting candidate abbreviations. |
| 1122 const NaClBitcodeDist::Distribution * |
| 1123 Distribution = BlockDist.GetDistribution(); |
| 1124 for (NaClBitcodeDist::Distribution::const_iterator |
| 1125 Iter = Distribution->begin(), IterEnd = Distribution->end(); |
| 1126 Iter != IterEnd; ++Iter) { |
| 1127 unsigned BlockID = static_cast<unsigned>(Iter->second); |
| 1128 AddNewAbbreviations( |
| 1129 BlockID, |
| 1130 BlockAbbrevsMap[BlockID], |
| 1131 cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID)) |
| 1132 ->GetAbbrevDist(), |
| 1133 CandAbbrevs); |
| 1134 } |
| 1135 // Install candidate abbreviations. |
| 1136 // |
| 1137 // Sort the candidate abbreviations by number of instances, so that |
| 1138 // if multiple abbreviations apply, the one with the largest number |
| 1139 // of instances will be chosen when compressing a file. |
| 1140 // |
| 1141 // For example, we may have just generated two abbreviations. The |
| 1142 // first has replaced the 3rd element with the constant 4 while the |
| 1143 // second replaced the 4th element with the constant 5. The first |
| 1144 // abbreviation can apply to 300 records while the second can apply |
| 1145 // to 1000 records. Assuming that both abbreviations shrink the |
| 1146 // record by the same number of bits, we clearly want the tool to |
| 1147 // choose the second abbreviation when selecting the abbreviation |
| 1148 // index to choose (via method GetRecordAbbrevIndex). |
| 1149 // |
| 1150 // Selecting the second is important in that abbreviation are |
| 1151 // refined by successive calls to this tool. We do not want to |
| 1152 // restrict downstream refinements prematurely. Hence, we want the |
| 1153 // tool to choose the abbreviation with the most candidates first. |
| 1154 // |
| 1155 // Since method GetRecordAbbrevIndex chooses the first abbreviation |
| 1156 // that generates the least number of bits, we clearly want to make |
| 1157 // sure that the second abbreviation occurs before the first. |
| 1158 std::vector<CountedAbbrevType> Candidates; |
| 1159 for (CandidateAbbrevs::const_iterator |
| 1160 Iter = CandAbbrevs.GetAbbrevsMap().begin(), |
| 1161 IterEnd = CandAbbrevs.GetAbbrevsMap().end(); |
| 1162 Iter != IterEnd; ++Iter) { |
| 1163 Candidates.push_back(CountedAbbrevType(Iter->second,Iter->first)); |
| 1164 } |
| 1165 std::sort(Candidates.begin(), Candidates.end()); |
| 1166 std::vector<CountedAbbrevType>::const_reverse_iterator |
| 1167 Iter = Candidates.rbegin(), IterEnd = Candidates.rend(); |
| 1168 if (Iter == IterEnd) return; |
| 1169 |
| 1170 if (TraceGeneratedAbbreviations) { |
| 1171 errs() << "-- New abbrevations:\n"; |
| 1172 } |
| 1173 unsigned Min = (Iter->first >> 2); |
| 1174 for (; Iter != IterEnd; ++Iter) { |
| 1175 if (Iter->first < Min) break; |
| 1176 unsigned BlockID = Iter->second.GetBlockID(); |
| 1177 BlockAbbrevs *Abbrevs = BlockAbbrevsMap[BlockID]; |
| 1178 NaClBitCodeAbbrev *Abbrev = Iter->second.GetAbbrev()->Copy(); |
| 1179 if (TraceGeneratedAbbreviations) { |
| 1180 errs() <<format("%12u", Iter->first) << ": "; |
| 1181 PrintAbbrev(errs(), BlockID, Abbrev); |
| 1182 } |
| 1183 Abbrevs->AddAbbreviation(Abbrev); |
| 1184 } |
| 1185 if (TraceGeneratedAbbreviations) { |
| 1186 errs() << "--\n"; |
| 1187 } |
| 1188 } |
| 1189 |
| 1190 // Walks the block distribution (BlockDist), sorting entries based |
| 1191 // on the distribution of blocks and abbreviations, and then |
| 1192 // prints out the frequency of each abbreviation used. |
| 1193 static void DisplayAbbreviationFrequencies( |
| 1194 raw_ostream &Output, |
| 1195 const NaClBitcodeBlockDist &BlockDist, |
| 1196 const BlockAbbrevsMapType &BlockAbbrevsMap) { |
| 1197 const NaClBitcodeDist::Distribution *BlockDistribution = |
| 1198 BlockDist.GetDistribution(); |
| 1199 for (NaClBitcodeDist::Distribution::const_iterator |
| 1200 BlockIter = BlockDistribution->begin(), |
| 1201 BlockIterEnd = BlockDistribution->end(); |
| 1202 BlockIter != BlockIterEnd; ++BlockIter) { |
| 1203 unsigned BlockID = static_cast<unsigned>(BlockIter->second); |
| 1204 BlockAbbrevsMapType::const_iterator BlockPos = BlockAbbrevsMap.find(BlockID)
; |
| 1205 if (BlockPos == BlockAbbrevsMap.end()) continue; |
| 1206 Output << "Block " << BlockID << "\n"; |
| 1207 if (NaClCompressBlockDistElement *BlockElement = |
| 1208 dyn_cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))) { |
| 1209 NaClBitcodeDist &AbbrevDist = BlockElement->GetAbbrevDist(); |
| 1210 const NaClBitcodeDist::Distribution *AbbrevDistribution = |
| 1211 AbbrevDist.GetDistribution(); |
| 1212 unsigned Total = AbbrevDist.GetTotal(); |
| 1213 for (NaClBitcodeDist::Distribution::const_iterator |
| 1214 AbbrevIter = AbbrevDistribution->begin(), |
| 1215 AbbrevIterEnd = AbbrevDistribution->end(); |
| 1216 AbbrevIter != AbbrevIterEnd; ++AbbrevIter) { |
| 1217 unsigned Index = static_cast<unsigned>(AbbrevIter->second); |
| 1218 unsigned Count = AbbrevDist.at(Index)->GetNumInstances(); |
| 1219 Output << format("%8u (%6.2f%%): ", Count, |
| 1220 (double) Count/Total*100.0); |
| 1221 BlockPos->second->GetIndexedAbbrev(Index)->Print(Output); |
| 1222 } |
| 1223 } |
| 1224 Output << '\n'; |
| 1225 } |
| 1226 } |
| 1227 |
| 1228 // Read in bitcode, analyze data, and figure out set of abbreviations |
| 1229 // to use, from memory buffer MemBuf containing the input bitcode file. |
| 1230 static bool AnalyzeBitcode(std::unique_ptr<MemoryBuffer> &MemBuf, |
| 1231 BlockAbbrevsMapType &BlockAbbrevsMap) { |
| 1232 // TODO(kschimpf): The current code only extracts abbreviations |
| 1233 // defined in the bitcode file. This code needs to be updated to |
| 1234 // collect data distributions and figure out better (global) |
| 1235 // abbreviations to use. |
| 1236 |
| 1237 const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart(); |
| 1238 const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize(); |
| 1239 |
| 1240 // First read header and verify it is good. |
| 1241 NaClBitcodeHeader Header; |
| 1242 if (Header.Read(BufPtr, EndBufPtr) || !Header.IsSupported()) |
| 1243 return Error("Invalid PNaCl bitcode header"); |
| 1244 |
| 1245 // Create a bitstream reader to read the bitcode file. |
| 1246 NaClBitstreamReader StreamFile(BufPtr, EndBufPtr); |
| 1247 NaClBitstreamCursor Stream(StreamFile); |
| 1248 |
| 1249 // Parse the the bitcode file. |
| 1250 NaClAnalyzeParser Parser(Stream, BlockAbbrevsMap); |
| 1251 while (!Stream.AtEndOfStream()) { |
| 1252 if (Parser.Parse()) return true; |
| 1253 } |
| 1254 |
| 1255 if (ShowAbbreviationFrequencies || ShowValueDistributions) { |
| 1256 std::error_code EC; |
| 1257 raw_fd_ostream Output(OutputFilename, EC, sys::fs::F_None); |
| 1258 if (EC) { |
| 1259 errs() << EC.message() << "\n"; |
| 1260 exit(1); |
| 1261 } |
| 1262 if (ShowAbbreviationFrequencies) |
| 1263 DisplayAbbreviationFrequencies(Output, Parser.BlockDist, BlockAbbrevsMap); |
| 1264 if (ShowValueDistributions) |
| 1265 Parser.BlockDist.Print(Output); |
| 1266 } |
| 1267 |
| 1268 AddNewAbbreviations(Parser.BlockDist, BlockAbbrevsMap); |
| 1269 return false; |
| 1270 } |
| 1271 |
| 1272 /// Parses the input bitcode file and generates the corresponding |
| 1273 /// compressed bitcode file, by replacing abbreviations in the input |
| 1274 /// file with the corresponding abbreviations defined in |
| 1275 /// BlockAbbrevsMap. |
| 1276 class NaClBitcodeCopyParser : public NaClBitcodeParser { |
| 1277 public: |
| 1278 // Top-level constructor to build the appropriate block parser |
| 1279 // using the given BlockAbbrevsMap to define abbreviations. |
| 1280 NaClBitcodeCopyParser(NaClBitstreamCursor &Cursor, |
| 1281 BlockAbbrevsMapType &BlockAbbrevsMap, |
| 1282 NaClBitstreamWriter &Writer) |
| 1283 : NaClBitcodeParser(Cursor), |
| 1284 BlockAbbrevsMap(BlockAbbrevsMap), |
| 1285 Writer(Writer), |
| 1286 FoundFirstBlockInfo(false) |
| 1287 {} |
| 1288 |
| 1289 virtual ~NaClBitcodeCopyParser() {} |
| 1290 |
| 1291 virtual bool Error(const std::string &Message) { |
| 1292 // Use local error routine so that all errors are treated uniformly. |
| 1293 return ::Error(Message); |
| 1294 } |
| 1295 |
| 1296 virtual bool ParseBlock(unsigned BlockID); |
| 1297 |
| 1298 // The abbreviations to use for the copied bitcode. |
| 1299 BlockAbbrevsMapType &BlockAbbrevsMap; |
| 1300 |
| 1301 // The bitstream to copy the compressed bitcode into. |
| 1302 NaClBitstreamWriter &Writer; |
| 1303 |
| 1304 // True if we have already found the first block info block. |
| 1305 // Used to make sure we don't use abbreviations until we |
| 1306 // have put them into the bitcode file. |
| 1307 bool FoundFirstBlockInfo; |
| 1308 }; |
| 1309 |
| 1310 class NaClBlockCopyParser : public NaClBitcodeParser { |
| 1311 public: |
| 1312 // Top-level constructor to build the appropriate block parser. |
| 1313 NaClBlockCopyParser(unsigned BlockID, |
| 1314 NaClBitcodeCopyParser *Context) |
| 1315 : NaClBitcodeParser(BlockID, Context), |
| 1316 Context(Context), |
| 1317 BlockAbbreviations(0) |
| 1318 {} |
| 1319 |
| 1320 virtual ~NaClBlockCopyParser() {} |
| 1321 |
| 1322 protected: |
| 1323 // The context defining state associated with the block parser. |
| 1324 NaClBitcodeCopyParser *Context; |
| 1325 |
| 1326 // The block abbreviations defined for this block (initialized by |
| 1327 // EnterBlock). |
| 1328 BlockAbbrevs *BlockAbbreviations; |
| 1329 |
| 1330 /// Constructor to parse nested blocks. Creates a block parser to |
| 1331 /// parse in a block with the given BlockID, and write the block |
| 1332 /// back out using the abbreviations in BlockAbbrevsMap. |
| 1333 NaClBlockCopyParser(unsigned BlockID, |
| 1334 NaClBlockCopyParser *EnclosingParser) |
| 1335 : NaClBitcodeParser(BlockID, EnclosingParser), |
| 1336 Context(EnclosingParser->Context) |
| 1337 {} |
| 1338 |
| 1339 virtual bool Error(const std::string &Message) { |
| 1340 // Use local error routine so that all errors are treated uniformly. |
| 1341 return ::Error(Message); |
| 1342 } |
| 1343 |
| 1344 /// Returns the set of (global) block abbreviations defined for the |
| 1345 /// given block ID. |
| 1346 BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) { |
| 1347 BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID]; |
| 1348 if (Abbrevs == 0) { |
| 1349 Abbrevs = new BlockAbbrevs(BlockID); |
| 1350 Context->BlockAbbrevsMap[BlockID] = Abbrevs; |
| 1351 } |
| 1352 return Abbrevs; |
| 1353 } |
| 1354 |
| 1355 virtual bool ParseBlock(unsigned BlockID) { |
| 1356 NaClBlockCopyParser Parser(BlockID, this); |
| 1357 return Parser.ParseThisBlock(); |
| 1358 } |
| 1359 |
| 1360 virtual void EnterBlock(unsigned NumWords) { |
| 1361 unsigned BlockID = GetBlockID(); |
| 1362 BlockAbbreviations = GetGlobalAbbrevs(BlockID); |
| 1363 |
| 1364 // Enter the subblock. |
| 1365 NaClBitcodeSelectorAbbrev |
| 1366 Selector(BlockAbbreviations->GetNumberAbbreviations()-1); |
| 1367 if (RemoveAbbreviations) Selector = NaClBitcodeSelectorAbbrev(); |
| 1368 Context->Writer.EnterSubblock(BlockID, Selector); |
| 1369 |
| 1370 // Note: We must dump module abbreviations as local |
| 1371 // abbreviations, because they are in a yet to be |
| 1372 // dumped BlockInfoBlock. |
| 1373 if (!RemoveAbbreviations && BlockID == naclbitc::MODULE_BLOCK_ID) { |
| 1374 BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(naclbitc::MODULE_BLOCK_ID); |
| 1375 for (unsigned i = 0; i < Abbrevs->GetNumberAbbreviations(); ++i) { |
| 1376 Context->Writer.EmitAbbrev(Abbrevs->GetIndexedAbbrev(i)->Copy()); |
| 1377 } |
| 1378 } |
| 1379 } |
| 1380 |
| 1381 virtual void ExitBlock() { |
| 1382 Context->Writer.ExitBlock(); |
| 1383 } |
| 1384 |
| 1385 virtual void ProcessBlockInfo() { |
| 1386 assert(!Context->FoundFirstBlockInfo && |
| 1387 "Input bitcode has more that one BlockInfoBlock"); |
| 1388 Context->FoundFirstBlockInfo = true; |
| 1389 |
| 1390 // Generate global abbreviations within a blockinfo block. |
| 1391 Context->Writer.EnterBlockInfoBlock(); |
| 1392 if (!RemoveAbbreviations) { |
| 1393 for (BlockAbbrevsMapType::const_iterator |
| 1394 Iter = Context->BlockAbbrevsMap.begin(), |
| 1395 IterEnd = Context->BlockAbbrevsMap.end(); |
| 1396 Iter != IterEnd; ++Iter) { |
| 1397 unsigned BlockID = Iter->first; |
| 1398 // Don't emit module abbreviations, since they have been |
| 1399 // emitted as local abbreviations. |
| 1400 if (BlockID == naclbitc::MODULE_BLOCK_ID) continue; |
| 1401 |
| 1402 BlockAbbrevs *Abbrevs = Iter->second; |
| 1403 if (Abbrevs == 0) continue; |
| 1404 for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation(); |
| 1405 i < Abbrevs->GetNumberAbbreviations(); ++i) { |
| 1406 NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(i); |
| 1407 Context->Writer.EmitBlockInfoAbbrev(BlockID, Abbrev); |
| 1408 } |
| 1409 } |
| 1410 } |
| 1411 Context->Writer.ExitBlock(); |
| 1412 } |
| 1413 |
| 1414 virtual void ProcessRecord() { |
| 1415 const NaClBitcodeRecord::RecordVector &Values = Record.GetValues(); |
| 1416 if (RemoveAbbreviations) { |
| 1417 Context->Writer.EmitRecord(Record.GetCode(), Values, 0); |
| 1418 return; |
| 1419 } |
| 1420 // Find best fitting abbreviation to use, and print out the record |
| 1421 // using that abbreviations. |
| 1422 unsigned AbbrevIndex = |
| 1423 BlockAbbreviations->GetRecordAbbrevIndex(Record.GetRecordData()); |
| 1424 if (AbbrevIndex == naclbitc::UNABBREV_RECORD) { |
| 1425 Context->Writer.EmitRecord(Record.GetCode(), Values, 0); |
| 1426 } else { |
| 1427 Context->Writer.EmitRecord(Record.GetCode(), Values, AbbrevIndex); |
| 1428 } |
| 1429 } |
| 1430 }; |
| 1431 |
| 1432 bool NaClBitcodeCopyParser::ParseBlock(unsigned BlockID) { |
| 1433 NaClBlockCopyParser Parser(BlockID, this); |
| 1434 return Parser.ParseThisBlock(); |
| 1435 } |
| 1436 |
| 1437 // Read in bitcode, and write it back out using the abbreviations in |
| 1438 // BlockAbbrevsMap, from memory buffer MemBuf containing the input |
| 1439 // bitcode file. |
| 1440 static bool CopyBitcode(std::unique_ptr<MemoryBuffer> &MemBuf, |
| 1441 BlockAbbrevsMapType &BlockAbbrevsMap) { |
| 1442 |
| 1443 const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart(); |
| 1444 const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize(); |
| 1445 |
| 1446 // Read header. No verification is needed since AnalyzeBitcode has |
| 1447 // already checked it. |
| 1448 NaClBitcodeHeader Header; |
| 1449 if (Header.Read(BufPtr, EndBufPtr)) |
| 1450 return Error("Invalid PNaCl bitcode header"); |
| 1451 |
| 1452 // Create the bitcode reader. |
| 1453 NaClBitstreamReader StreamFile(BufPtr, EndBufPtr); |
| 1454 NaClBitstreamCursor Stream(StreamFile); |
| 1455 |
| 1456 // Create the bitcode writer. |
| 1457 SmallVector<char, 0> OutputBuffer; |
| 1458 OutputBuffer.reserve(256*1024); |
| 1459 NaClBitstreamWriter StreamWriter(OutputBuffer); |
| 1460 |
| 1461 // Emit the file header. |
| 1462 NaClWriteHeader(Header, StreamWriter); |
| 1463 |
| 1464 // Set up the parser. |
| 1465 NaClBitcodeCopyParser Parser(Stream, BlockAbbrevsMap, StreamWriter); |
| 1466 |
| 1467 // Parse the bitcode and copy. |
| 1468 while (!Stream.AtEndOfStream()) { |
| 1469 if (Parser.Parse()) return true; |
| 1470 } |
| 1471 |
| 1472 // Write out the copied results. |
| 1473 std::error_code EC; |
| 1474 std::unique_ptr<tool_output_file> OutFile( |
| 1475 new tool_output_file(OutputFilename.c_str(), EC, sys::fs::F_None)); |
| 1476 if (EC) |
| 1477 return Error(EC.message()); |
| 1478 |
| 1479 // Write the generated bitstream to "Out". |
| 1480 OutFile->os().write((char*)&OutputBuffer.front(), |
| 1481 OutputBuffer.size()); |
| 1482 OutFile->keep(); |
| 1483 |
| 1484 return false; |
| 1485 } |
| 1486 |
| 1487 // Build fast lookup abbreviation maps for each of the abbreviation blocks |
| 1488 // defined in AbbrevsMap. |
| 1489 static void BuildAbbrevLookupMaps(BlockAbbrevsMapType &AbbrevsMap) { |
| 1490 for (BlockAbbrevsMapType::const_iterator |
| 1491 Iter = AbbrevsMap.begin(), |
| 1492 IterEnd = AbbrevsMap.end(); |
| 1493 Iter != IterEnd; ++Iter) { |
| 1494 Iter->second->BuildAbbrevLookupSizeMap(); |
| 1495 } |
| 1496 } |
| 1497 |
| 1498 } // namespace |
| 1499 |
| 1500 int main(int argc, char **argv) { |
| 1501 // Print a stack trace if we signal out. |
| 1502 sys::PrintStackTraceOnErrorSignal(); |
| 1503 PrettyStackTraceProgram X(argc, argv); |
| 1504 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. |
| 1505 cl::ParseCommandLineOptions(argc, argv, "pnacl-bccompress file analyzer\n"); |
| 1506 |
| 1507 std::unique_ptr<MemoryBuffer> MemBuf; |
| 1508 if (ReadAndBuffer(MemBuf)) return 1; |
| 1509 BlockAbbrevsMapType BlockAbbrevsMap; |
| 1510 if (AnalyzeBitcode(MemBuf, BlockAbbrevsMap)) return 1; |
| 1511 if (ShowAbbreviationFrequencies || ShowValueDistributions) { |
| 1512 return 0; |
| 1513 } |
| 1514 BuildAbbrevLookupMaps(BlockAbbrevsMap); |
| 1515 if (CopyBitcode(MemBuf, BlockAbbrevsMap)) return 1; |
| 1516 return 0; |
| 1517 } |
OLD | NEW |