Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(804)

Side by Side Diff: tools/pnacl-bccompress/pnacl-bccompress.cpp

Issue 939073008: Rebased PNaCl localmods in LLVM to 223109 (Closed)
Patch Set: undo localmod Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/pnacl-bccompress/Makefile ('k') | tools/pnacl-bcdis/CMakeLists.txt » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 //===-- pnacl-bccompress.cpp - Bitcode (abbrev) compression ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tool may be invoked in the following manner:
11 // pnacl-bccompress [options] bcin.pexe -o bcout.pexe
12 // - Read frozen PNaCl bitcode from the bcin.pexe and introduce
13 // abbreviations to compress it into bcout.pexe.
14 //
15 // Options:
16 // --help - Output information about command line switches
17 //
18 // This tool analyzes the data in bcin.pexe, and determines what
19 // abbreviations can be added to compress the bitcode file. The result
20 // is written to bcout.pexe.
21 //
22 // A bitcode file has two types of abbreviations. The first are Global
23 // abbreviations that apply to all instances of a particular type of
24 // block. These abbreviations appear in the BlockInfo block of the
25 // bitcode file.
26 //
27 // The second type of abbreviations are local to a particular instance
28 // of a block.
29 //
30 // In pnacl-bccompress, for simplicity, we will only add global
31 // abbreviations. Local abbreviations are converted to corresponding
32 // global abbreviations, so that they can be added as global
33 // abbreviations.
34 //
35 // The process of compressing is done by reading the input file
36 // twice. In the first round, the records are read and analyzed,
37 // generating a set of (global) abbreviations to use to generate the
38 // compressed output file. Then, the input file is read again, and for
39 // each record, the best fitting global abbreviation is found (or it
40 // figures out that leaving the record unabbreviated is best) and
41 // writes the record out accordingly.
42 //
43 // TODO(kschimpf): The current implementation does a trivial solution
44 // for the first round. It just converts all abbreviations (local and
45 // global) into global abbreviations. Future refinements of this file
46 // will generate better (and more intelligent) global abbreviations,
47 // based on the records found on the first read of the bitcode file.
48 //
49 //===----------------------------------------------------------------------===//
50
51 #include "llvm/ADT/DenseMap.h"
52 #include "llvm/ADT/DenseSet.h"
53 #include "llvm/ADT/SmallVector.h"
54 #include "llvm/Bitcode/NaCl/AbbrevTrieNode.h"
55 #include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
56 #include "llvm/Bitcode/NaCl/NaClBitcodeParser.h"
57 #include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
58 #include "llvm/Bitcode/NaCl/NaClBitstreamWriter.h"
59 #include "llvm/Bitcode/NaCl/NaClCompressBlockDist.h"
60 #include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
61 #include "llvm/Support/CommandLine.h"
62 #include "llvm/Support/FileSystem.h"
63 #include "llvm/Support/Format.h"
64 #include "llvm/Support/ManagedStatic.h"
65 #include "llvm/Support/MemoryBuffer.h"
66 #include "llvm/Support/PrettyStackTrace.h"
67 #include "llvm/Support/Signals.h"
68 #include "llvm/Support/ToolOutputFile.h"
69 #include "llvm/Support/raw_ostream.h"
70 #include <set>
71 #include <map>
72 #include <system_error>
73
74 namespace {
75
76 using namespace llvm;
77
78 static cl::opt<bool>
79 TraceGeneratedAbbreviations(
80 "abbreviations",
81 cl::desc("Trace abbreviations added to compressed file"),
82 cl::init(false));
83
84 static cl::opt<std::string>
85 InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
86
87
88 static cl::opt<std::string>
89 OutputFilename("o", cl::desc("Specify output filename"),
90 cl::value_desc("filename"), cl::init("-"));
91
92 static cl::opt<bool>
93 ShowValueDistributions(
94 "show-distributions",
95 cl::desc("Show collected value distributions in bitcode records. "
96 "Turns off compression."),
97 cl::init(false));
98
99 static cl::opt<bool>
100 ShowAbbrevLookupTries(
101 "show-lookup-tries",
102 cl::desc("Show lookup tries used to minimize search for \n"
103 "matching abbreviations."),
104 cl::init(false));
105
106 static cl::opt<bool>
107 ShowAbbreviationFrequencies(
108 "show-abbreviation-frequencies",
109 cl::desc("Show how often each abbreviation is used. "
110 "Turns off compression."),
111 cl::init(false));
112
113 // Note: When this flag is true, we still generate new abbreviations,
114 // because we don't want to add the complexity of turning it off.
115 // Rather, we simply make sure abbreviations are ignored when writing
116 // out the final copy.
117 static cl::opt<bool>
118 RemoveAbbreviations(
119 "remove-abbreviations",
120 cl::desc("Remove abbreviations from input bitcode file."),
121 cl::init(false));
122
123 /// Error - All bitcode analysis errors go through this function,
124 /// making this a good place to breakpoint if debugging.
125 static bool Error(const std::string &Err) {
126 errs() << Err << "\n";
127 return true;
128 }
129
130 // Prints out the abbreviation in readable form to the given Stream.
131 static void PrintAbbrev(raw_ostream &Stream,
132 unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
133 Stream << "Abbrev(block " << BlockID << "): ";
134 Abbrev->Print(Stream);
135 }
136
137 // Reads the input file into the given buffer.
138 static bool ReadAndBuffer(std::unique_ptr<MemoryBuffer> &MemBuf) {
139 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrOrFile =
140 MemoryBuffer::getFileOrSTDIN(InputFilename);
141 if (std::error_code EC = ErrOrFile.getError())
142 return Error("Error reading '" + InputFilename + "': " + EC.message());
143
144 MemBuf.reset(ErrOrFile.get().release());
145 if (MemBuf->getBufferSize() % 4 != 0)
146 return Error("Bitcode stream should be a multiple of 4 bytes in length");
147 return false;
148 }
149
150 /// type map from bitstream abbreviation index to corresponding
151 /// internal abbreviation index.
152 typedef std::map<unsigned, unsigned> BitstreamToInternalAbbrevMapType;
153
154 /// Defines a mapping from bitstream abbreviation indices, to
155 /// corresponding internal abbreviation indices.
156 class AbbrevBitstreamToInternalMap {
157 public:
158
159 AbbrevBitstreamToInternalMap()
160 : NextBitstreamAbbrevIndex(0) {}
161
162 /// Returns the bitstream abbreviaion index that will be associated
163 /// with the next internal abbreviation index.
164 unsigned GetNextBitstreamAbbrevIndex() {
165 return NextBitstreamAbbrevIndex;
166 }
167
168 /// Changes the next bitstream abbreviation index to the given value.
169 void SetNextBitstreamAbbrevIndex(unsigned NextIndex) {
170 NextBitstreamAbbrevIndex = NextIndex;
171 }
172
173 /// Returns true if there is an internal abbreviation index for the
174 /// given bitstream abbreviation.
175 bool DefinesBitstreamAbbrevIndex(unsigned Index) {
176 return BitstreamToInternalAbbrevMap.find(Index) !=
177 BitstreamToInternalAbbrevMap.end();
178 }
179
180 /// Returns the internal abbreviation index for the given bitstream
181 /// abbreviation index.
182 unsigned GetInternalAbbrevIndex(unsigned Index) {
183 return BitstreamToInternalAbbrevMap.at(Index);
184 }
185
186 /// Installs the given internal abbreviation index using the next
187 /// available bitstream abbreviation index.
188 void InstallNewBitstreamAbbrevIndex(unsigned InternalIndex) {
189 BitstreamToInternalAbbrevMap[NextBitstreamAbbrevIndex++] = InternalIndex;
190 }
191
192 private:
193 // The index of the next bitstream abbreviation to be defined.
194 unsigned NextBitstreamAbbrevIndex;
195 // Map from bitstream abbreviation index to corresponding internal
196 // abbreviation index.
197 BitstreamToInternalAbbrevMapType BitstreamToInternalAbbrevMap;
198 };
199
200 /// Defines the list of abbreviations associated with a block.
201 class BlockAbbrevs {
202 public:
203 // Vector to hold the (ordered) list of abbreviations.
204 typedef SmallVector<NaClBitCodeAbbrev*, 32> AbbrevVector;
205
206 BlockAbbrevs(unsigned BlockID)
207 : BlockID(BlockID) {
208 // backfill internal indices that don't correspond to bitstream
209 // application abbreviations, so that added abbreviations have
210 // valid abbreviation indices.
211 for (unsigned i = 0; i < naclbitc::FIRST_APPLICATION_ABBREV; ++i) {
212 // Make all non-application abbreviations look like the default
213 // abbreviation.
214 NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();
215 Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
216 Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
217 Abbrevs.push_back(Abbrev);
218 }
219 GlobalAbbrevBitstreamToInternalMap.
220 SetNextBitstreamAbbrevIndex(Abbrevs.size());
221 }
222
223 ~BlockAbbrevs() {
224 for (AbbrevVector::const_iterator
225 Iter = Abbrevs.begin(), IterEnd = Abbrevs.end();
226 Iter != IterEnd; ++Iter) {
227 (*Iter)->dropRef();
228 }
229 for (AbbrevLookupSizeMap::const_iterator
230 Iter = LookupMap.begin(), IterEnd = LookupMap.end();
231 Iter != IterEnd; ++Iter) {
232 delete Iter->second;
233 }
234 }
235
236 // Constant used to denote that a given abbreviation is not in the
237 // set of abbreviations defined by the block.
238 static const int NO_SUCH_ABBREVIATION = -1;
239
240 // Returns the index to the corresponding application abbreviation,
241 // if it exists. Otherwise return NO_SUCH_ABBREVIATION;
242 int FindAbbreviation(const NaClBitCodeAbbrev *Abbrev) const {
243 for (unsigned i = GetFirstApplicationAbbreviation();
244 i < GetNumberAbbreviations(); ++i) {
245 if (*Abbrevs[i] == *Abbrev) return i;
246 }
247 return NO_SUCH_ABBREVIATION;
248 }
249
250 /// Adds the given abbreviation to the set of global abbreviations
251 /// defined for the block. Guarantees that duplicate abbreviations
252 /// are not added to the block. Note: Code takes ownership of
253 /// the given abbreviation. Returns true if new abbreviation.
254 /// Updates Index to the index where the abbreviation is located
255 /// in the set of abbreviations.
256 bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev, int &Index) {
257 Index = FindAbbreviation(Abbrev);
258 if (Index != NO_SUCH_ABBREVIATION) {
259 // Already defined, don't install.
260 Abbrev->dropRef();
261 return false;
262 }
263
264 // New abbreviation. Add.
265 Index = Abbrevs.size();
266 Abbrevs.push_back(Abbrev);
267 return true;
268 }
269
270 /// Adds the given abbreviation to the set of global abbreviations
271 /// defined for the block. Guarantees that duplicate abbreviations
272 /// are not added to the block. Note: Code takes ownership of
273 /// the given abbreviation. Returns true if new abbreviation.
274 bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev) {
275 int Index;
276 return AddAbbreviation(Abbrev, Index);
277 }
278
279 /// The block ID associated with the block.
280 unsigned GetBlockID() const {
281 return BlockID;
282 }
283
284 /// Returns the index of the frist application abbreviation for the
285 /// block.
286 unsigned GetFirstApplicationAbbreviation() const {
287 return naclbitc::FIRST_APPLICATION_ABBREV;
288 }
289
290 // Returns the number of abbreviations associated with the block.
291 unsigned GetNumberAbbreviations() const {
292 return Abbrevs.size();
293 }
294
295 /// Returns the abbreviation associated with the given abbreviation
296 /// index.
297 NaClBitCodeAbbrev *GetIndexedAbbrev(unsigned index) {
298 if (index >= Abbrevs.size()) return 0;
299 return Abbrevs[index];
300 }
301
302 // Builds the corresponding fast lookup map for finding abbreviations
303 // that applies to abbreviations in the block
304 void BuildAbbrevLookupSizeMap() {
305 NaClBuildAbbrevLookupMap(GetLookupMap(),
306 GetAbbrevs(),
307 GetFirstApplicationAbbreviation());
308 if (ShowAbbrevLookupTries) PrintLookupMap(errs());
309 }
310
311 AbbrevBitstreamToInternalMap &GetGlobalAbbrevBitstreamToInternalMap() {
312 return GlobalAbbrevBitstreamToInternalMap;
313 }
314
315 AbbrevLookupSizeMap &GetLookupMap() {
316 return LookupMap;
317 }
318
319 // Returns lower level vector of abbreviations.
320 const AbbrevVector &GetAbbrevs() const {
321 return Abbrevs;
322 }
323
324 // Returns the abbreviation (index) to use for the corresponding
325 // record, based on the abbreviations of this block. Note: Assumes
326 // that BuildAbbrevLookupSizeMap has already been called.
327 unsigned GetRecordAbbrevIndex(const NaClBitcodeRecordData &Record) {
328 unsigned BestIndex = 0; // Ignored unless found candidate.
329 unsigned BestScore = 0; // Number of bits associated with BestIndex.
330 bool FoundCandidate = false;
331 NaClBitcodeValues Values(Record);
332 size_t Size = Values.size();
333
334 if (Size > NaClValueIndexCutoff) Size = NaClValueIndexCutoff+1;
335 AbbrevLookupSizeMap::const_iterator Pos = LookupMap.find(Size);
336 if (Pos != LookupMap.end()) {
337 if (const AbbrevTrieNode *Node = Pos->second) {
338 if (const AbbrevTrieNode *MatchNode =
339 Node->MatchRecord(Record)) {
340 const std::set<AbbrevIndexPair> &Abbreviations =
341 MatchNode->GetAbbreviations();
342 for (std::set<AbbrevIndexPair>::const_iterator
343 Iter = Abbreviations.begin(),
344 IterEnd = Abbreviations.end();
345 Iter != IterEnd; ++Iter) {
346 uint64_t NumBits = 0;
347 if (CanUseAbbreviation(Values, Iter->second, NumBits)) {
348 if (!FoundCandidate || NumBits < BestScore) {
349 // Use this as candidate.
350 BestIndex = Iter->first;
351 BestScore = NumBits;
352 FoundCandidate = true;
353 }
354 }
355 }
356 }
357 }
358 }
359 if (FoundCandidate && BestScore <= UnabbreviatedSize(Record)) {
360 return BestIndex;
361 }
362 return naclbitc::UNABBREV_RECORD;
363 }
364
365 // Computes the number of bits that will be generated by the
366 // corresponding read record, if no abbreviation is used.
367 static uint64_t UnabbreviatedSize(const NaClBitcodeRecordData &Record) {
368 uint64_t NumBits = MatchVBRBits(Record.Code, DefaultVBRBits);
369 size_t NumValues = Record.Values.size();
370 NumBits += MatchVBRBits(NumValues, DefaultVBRBits);
371 for (size_t Index = 0; Index < NumValues; ++Index) {
372 NumBits += MatchVBRBits(Record.Values[Index], DefaultVBRBits);
373 }
374 return NumBits;
375 }
376
377 // Returns true if the given abbreviation can be used to represent the
378 // record. Sets NumBits to the number of bits the abbreviation will
379 // generate. Note: Value of NumBits is undefined if this function
380 // return false.
381 static bool CanUseAbbreviation(NaClBitcodeValues &Values,
382 NaClBitCodeAbbrev *Abbrev, uint64_t &NumBits) {
383 NumBits = 0;
384 unsigned OpIndex = 0;
385 unsigned OpIndexEnd = Abbrev->getNumOperandInfos();
386 size_t ValueIndex = 0;
387 size_t ValueIndexEnd = Values.size();
388 while (ValueIndex < ValueIndexEnd && OpIndex < OpIndexEnd) {
389 const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(OpIndex);
390 switch (Op.getEncoding()) {
391 case NaClBitCodeAbbrevOp::Literal:
392 if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
393 ++ValueIndex;
394 ++OpIndex;
395 continue;
396 } else {
397 return false;
398 }
399 case NaClBitCodeAbbrevOp::Array: {
400 assert(OpIndex+2 == OpIndexEnd);
401 const NaClBitCodeAbbrevOp &ElmtOp =
402 Abbrev->getOperandInfo(OpIndex+1);
403
404 // Add size of array.
405 NumBits += MatchVBRBits(Values.size()-ValueIndex, DefaultVBRBits);
406
407 // Add size of each field.
408 for (; ValueIndex != ValueIndexEnd; ++ValueIndex) {
409 uint64_t FieldBits=0;
410 if (!CanUseSimpleAbbrevOp(ElmtOp, Values[ValueIndex], FieldBits)) {
411 return false;
412 }
413 NumBits += FieldBits;
414 }
415 return true;
416 }
417 default: {
418 if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
419 ++ValueIndex;
420 ++OpIndex;
421 break;
422 }
423 return false;
424 }
425 }
426 }
427 return ValueIndex == ValueIndexEnd && OpIndex == OpIndexEnd;
428 }
429
430 // Returns true if the given abbreviation Op defines a single value,
431 // and can be applied to the given Val. Adds the number of bits the
432 // abbreviation Op will generate to NumBits if Op applies.
433 static bool CanUseSimpleAbbrevOp(const NaClBitCodeAbbrevOp &Op,
434 uint64_t Val,
435 uint64_t &NumBits) {
436 switch (Op.getEncoding()) {
437 case NaClBitCodeAbbrevOp::Literal:
438 return Val == Op.getValue();
439 case NaClBitCodeAbbrevOp::Array:
440 return false;
441 case NaClBitCodeAbbrevOp::Fixed: {
442 uint64_t Width = Op.getValue();
443 if (!MatchFixedBits(Val, Width))
444 return false;
445 NumBits += Width;
446 return true;
447 }
448 case NaClBitCodeAbbrevOp::VBR:
449 if (unsigned Width = MatchVBRBits(Val, Op.getValue())) {
450 NumBits += Width;
451 return true;
452 } else {
453 return false;
454 }
455 case NaClBitCodeAbbrevOp::Char6:
456 if (!NaClBitCodeAbbrevOp::isChar6(Val)) return false;
457 NumBits += 6;
458 return true;
459 }
460 llvm_unreachable("unhandled NaClBitCodeAbbrevOp encoding");
461 }
462
463 // Returns true if the given Val can be represented by abbreviation
464 // operand Fixed(Width).
465 static bool MatchFixedBits(uint64_t Val, unsigned Width) {
466 // Note: The reader only allows up to 32 bits for fixed values.
467 if (Val & Mask32) return false;
468 if (Val & ~(~0U >> (32-Width))) return false;
469 return true;
470 }
471
472 // Returns the number of bits needed to represent Val by abbreviation
473 // operand VBR(Width). Note: Returns 0 if Val can't be represented
474 // by VBR(Width).
475 static unsigned MatchVBRBits(uint64_t Val, unsigned Width) {
476 if (Width == 0) return 0;
477 unsigned NumBits = 0;
478 while (1) {
479 // values emitted Width-1 bits at a time (plus a continue bit).
480 NumBits += Width;
481 if ((Val & (1U << (Width-1))) == 0)
482 return NumBits;
483 Val >>= Width-1;
484 }
485 }
486
487 private:
488 // Defines the number of bits used to print VBR array field values.
489 static const unsigned DefaultVBRBits = 6;
490 // Masks out the top-32 bits of a uint64_t value.
491 static const uint64_t Mask32 = 0xFFFFFFFF00000000;
492 // The block ID for which abbreviations are being associated.
493 unsigned BlockID;
494 // The list of abbreviations defined for the block.
495 AbbrevVector Abbrevs;
496 // The mapping from global bitstream abbreviations to the corresponding
497 // block abbreviation index (in Abbrevs).
498 AbbrevBitstreamToInternalMap GlobalAbbrevBitstreamToInternalMap;
499 // A fast lookup map for finding the abbreviation that applies
500 // to a record.
501 AbbrevLookupSizeMap LookupMap;
502
503 void PrintLookupMap(raw_ostream &Stream) {
504 Stream << "------------------------------\n";
505 Stream << "Block " << GetBlockID() << " abbreviation tries:\n";
506 bool IsFirstIteration = true;
507 for (AbbrevLookupSizeMap::const_iterator
508 Iter = LookupMap.begin(), IterEnd = LookupMap.end();
509 Iter != IterEnd; ++Iter) {
510 if (IsFirstIteration)
511 IsFirstIteration = false;
512 else
513 Stream << "-----\n";
514 if (Iter->second) {
515 Stream << "Index " << Iter->first << ":\n";
516 Iter->second->Print(Stream, " ");
517 }
518 }
519 Stream << "------------------------------\n";
520 }
521 };
522
523 /// Defines a map from block ID's to the corresponding abbreviation
524 /// map to use.
525 typedef DenseMap<unsigned, BlockAbbrevs*> BlockAbbrevsMapType;
526
527 /// Parses the bitcode file, analyzes it, and generates the
528 /// corresponding lists of global abbreviations to use in the
529 /// generated (compressed) bitcode file.
530 class NaClAnalyzeParser : public NaClBitcodeParser {
531 NaClAnalyzeParser(const NaClAnalyzeParser&)
532 LLVM_DELETED_FUNCTION;
533 void operator=(const NaClAnalyzeParser&)
534 LLVM_DELETED_FUNCTION;
535
536 public:
537 // Creates the analysis parser, which will fill the given
538 // BlockAbbrevsMap with appropriate abbreviations, after
539 // analyzing the bitcode file defined by Cursor.
540 NaClAnalyzeParser(NaClBitstreamCursor &Cursor,
541 BlockAbbrevsMapType &BlockAbbrevsMap)
542 : NaClBitcodeParser(Cursor),
543 BlockAbbrevsMap(BlockAbbrevsMap),
544 BlockDist(&NaClCompressBlockDistElement::Sentinel),
545 AbbrevListener(this)
546 {
547 SetListener(&AbbrevListener);
548 }
549
550 virtual ~NaClAnalyzeParser() {}
551
552 virtual bool Error(const std::string &Message) {
553 // Use local error routine so that all errors are treated uniformly.
554 return ::Error(Message);
555 }
556
557 virtual bool ParseBlock(unsigned BlockID);
558
559 // Mapping from block ID's to the corresponding list of abbreviations
560 // associated with that block.
561 BlockAbbrevsMapType &BlockAbbrevsMap;
562
563 // Nested distribution capturing distribution of records in bitcode file.
564 NaClBitcodeBlockDist BlockDist;
565
566 // Listener used to get abbreviations as they are read.
567 NaClBitcodeParserListener AbbrevListener;
568 };
569
570 class NaClBlockAnalyzeParser : public NaClBitcodeParser {
571 NaClBlockAnalyzeParser(const NaClBlockAnalyzeParser&)
572 LLVM_DELETED_FUNCTION;
573 void operator=(NaClBlockAnalyzeParser&)
574 LLVM_DELETED_FUNCTION;
575
576 public:
577 /// Top-level constructor to build the top-level block with the
578 /// given BlockID, and collect data (for compression) in that block.
579 NaClBlockAnalyzeParser(unsigned BlockID,
580 NaClAnalyzeParser *Context)
581 : NaClBitcodeParser(BlockID, Context), Context(Context) {
582 Init();
583 }
584
585 virtual ~NaClBlockAnalyzeParser() {
586 Context->BlockDist.AddBlock(GetBlock());
587 }
588
589 protected:
590 /// Nested constructor to parse a block within a block. Creates a
591 /// block parser to parse a block with the given BlockID, and
592 /// collect data (for compression) in that block.
593 NaClBlockAnalyzeParser(unsigned BlockID,
594 NaClBlockAnalyzeParser *EnclosingParser)
595 : NaClBitcodeParser(BlockID, EnclosingParser),
596 Context(EnclosingParser->Context) {
597 Init();
598 }
599
600 public:
601 virtual bool Error(const std::string &Message) {
602 // Use local error routine so that all errors are treated uniformly.
603 return ::Error(Message);
604 }
605
606 virtual bool ParseBlock(unsigned BlockID) {
607 NaClBlockAnalyzeParser Parser(BlockID, this);
608 return Parser.ParseThisBlock();
609 }
610
611 virtual void ProcessRecord() {
612 // Before processing the record, we need to rename the abbreviation
613 // index, so that we can look it up in the set of block abbreviations
614 // being defined.
615 if (Record.UsedAnAbbreviation()) {
616 unsigned AbbrevIndex = Record.GetAbbreviationIndex();
617 if (LocalAbbrevBitstreamToInternalMap.
618 DefinesBitstreamAbbrevIndex(AbbrevIndex)) {
619 Record.SetAbbreviationIndex(
620 LocalAbbrevBitstreamToInternalMap.
621 GetInternalAbbrevIndex(AbbrevIndex));
622 } else {
623 AbbrevBitstreamToInternalMap &GlobalAbbrevBitstreamToInternalMap =
624 GlobalBlockAbbrevs->GetGlobalAbbrevBitstreamToInternalMap();
625 if (GlobalAbbrevBitstreamToInternalMap.
626 DefinesBitstreamAbbrevIndex(AbbrevIndex)) {
627 Record.SetAbbreviationIndex(
628 GlobalAbbrevBitstreamToInternalMap.
629 GetInternalAbbrevIndex(AbbrevIndex));
630 } else {
631 report_fatal_error("Bad abbreviation index in file");
632 }
633 }
634 }
635
636 cast<NaClCompressBlockDistElement>(
637 Context->BlockDist.GetElement(Record.GetBlockID()))
638 ->GetAbbrevDist().AddRecord(Record);
639 }
640
641 virtual void ProcessAbbreviation(unsigned BlockID,
642 NaClBitCodeAbbrev *Abbrev,
643 bool IsLocal) {
644 int Index;
645 AddAbbreviation(BlockID, Abbrev->Simplify(), Index);
646 if (IsLocal) {
647 LocalAbbrevBitstreamToInternalMap.InstallNewBitstreamAbbrevIndex(Index);
648 } else {
649 GetGlobalAbbrevs(BlockID)->GetGlobalAbbrevBitstreamToInternalMap().
650 InstallNewBitstreamAbbrevIndex(Index);
651 }
652 }
653
654 protected:
655 // The context (i.e. top-level) parser.
656 NaClAnalyzeParser *Context;
657
658 // The global block abbreviations associated with this block.
659 BlockAbbrevs *GlobalBlockAbbrevs;
660
661 // The local abbreviations associated with this block.
662 AbbrevBitstreamToInternalMap LocalAbbrevBitstreamToInternalMap;
663
664 /// Returns the set of (global) block abbreviations defined for the
665 /// given block ID.
666 BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) {
667 BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID];
668 if (Abbrevs == 0) {
669 Abbrevs = new BlockAbbrevs(BlockID);
670 Context->BlockAbbrevsMap[BlockID] = Abbrevs;
671 }
672 return Abbrevs;
673 }
674
675 void SetGlobalAbbrevs(unsigned BlockID, BlockAbbrevs *Abbrevs) {
676 Context->BlockAbbrevsMap[BlockID] = Abbrevs;
677 }
678
679 // Adds the abbreviation to the list of abbreviations for the given
680 // block.
681 void AddAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev,
682 int &Index) {
683 // Get block abbreviations.
684 BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(BlockID);
685
686 // Read abbreviation and add as a global abbreviation.
687 if (Abbrevs->AddAbbreviation(Abbrev, Index)
688 && TraceGeneratedAbbreviations) {
689 PrintAbbrev(errs(), BlockID, Abbrev);
690 }
691 }
692
693 /// Finds the index to the corresponding internal block abbreviation
694 /// for the given abbreviation.
695 int FindAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
696 return GetGlobalAbbrevs(BlockID)->FindAbbreviation(Abbrev);
697 }
698
699 void Init() {
700 GlobalBlockAbbrevs = GetGlobalAbbrevs(GetBlockID());
701 LocalAbbrevBitstreamToInternalMap.SetNextBitstreamAbbrevIndex(
702 GlobalBlockAbbrevs->
703 GetGlobalAbbrevBitstreamToInternalMap().GetNextBitstreamAbbrevIndex());
704 }
705 };
706
707 bool NaClAnalyzeParser::ParseBlock(unsigned BlockID) {
708 NaClBlockAnalyzeParser Parser(BlockID, this);
709 return Parser.ParseThisBlock();
710 }
711
712 /// Models the unrolling of an abbreviation into its sequence of
713 /// individual operators. That is, unrolling arrays to match the width
714 /// of the abbreviation.
715 ///
716 /// For example, consider the abbreviation [Array(VBR(6))]. If the
717 /// distribution map has data for records of size 3, and the
718 /// distribution map suggests that a constant 4 appears as the second
719 /// element in the record, it is nontrivial to figure out how to
720 /// encorporate this into this abbrevation. Hence, we unroll the array
721 /// (3 times) to get [VBR(6), VBR(6), VBR(6), Array(VBR(6))]. To
722 /// update the second element to match the literal 4, we only need to
723 /// replace the second element in the unrolled abbreviation resulting
724 /// in [VBR(6), Lit(4), VBR(6), Array(VBR(6))].
725 ///
726 /// After we have done appropriate substitutions, we can simplify the
727 /// unrolled abbreviation by calling method Restore.
728 ///
729 /// Note: We unroll in the form that best matches the distribution
730 /// map. Hence, the code is stored as a separate operator. We also
731 /// keep the array abbreviation op, for untracked elements within the
732 /// distribution maps.
733 class UnrolledAbbreviation {
734 void operator=(const UnrolledAbbreviation&) LLVM_DELETED_FUNCTION;
735 public:
736 /// Unroll the given abbreviation, assuming it has the given size
737 /// (as specified in the distribution maps).
738 ///
739 /// If argument CanBeBigger is true, then we do not assume that we
740 /// can remove the trailing array when expanding, because the
741 /// actual size of the corresponding record using this abbreviation
742 /// may be bigger.
743 UnrolledAbbreviation(NaClBitCodeAbbrev *Abbrev, unsigned Size,
744 bool CanBeBigger = false)
745 : CodeOp(0) {
746 unsigned NextOp = 0;
747 UnrollAbbrevOp(CodeOp, Abbrev, NextOp);
748 --Size;
749 for (unsigned i = 0; i < Size; ++i) {
750 // Create slot and then fill with appropriate operator.
751 AbbrevOps.push_back(CodeOp);
752 UnrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp);
753 }
754 if (CanBeBigger) {
755 for (; NextOp < Abbrev->getNumOperandInfos(); ++NextOp) {
756 MoreOps.push_back(Abbrev->getOperandInfo(NextOp));
757 }
758 } else if (NextOp < Abbrev->getNumOperandInfos() &&
759 !Abbrev->getOperandInfo(NextOp).isArrayOp()) {
760 errs() << (Size+1) << ": ";
761 Abbrev->Print(errs());
762 llvm_unreachable("Malformed abbreviation/size pair");
763 }
764 }
765
766 explicit UnrolledAbbreviation(const UnrolledAbbreviation &Abbrev)
767 : CodeOp(Abbrev.CodeOp),
768 AbbrevOps(Abbrev.AbbrevOps),
769 MoreOps(Abbrev.MoreOps) {
770 }
771
772 /// Prints out the abbreviation modeled by the unrolled
773 /// abbreviation.
774 void Print(raw_ostream &Stream) const {
775 NaClBitCodeAbbrev *Abbrev = Restore(false);
776 Abbrev->Print(Stream);
777 Abbrev->dropRef();
778 }
779
780 /// Converts the unrolled abbreviation back into a regular
781 /// abbreviation. If Simplify is true, we simplify the
782 /// unrolled abbreviation as well.
783 NaClBitCodeAbbrev *Restore(bool Simplify=true) const {
784 NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();
785 Abbrev->Add(CodeOp);
786 for (std::vector<NaClBitCodeAbbrevOp>::const_iterator
787 Iter = AbbrevOps.begin(), IterEnd = AbbrevOps.end();
788 Iter != IterEnd; ++Iter) {
789 Abbrev->Add(*Iter);
790 }
791 for (std::vector<NaClBitCodeAbbrevOp>::const_iterator
792 Iter = MoreOps.begin(), IterEnd = MoreOps.end();
793 Iter != IterEnd; ++Iter) {
794 Abbrev->Add(*Iter);
795 }
796 if (Simplify) {
797 NaClBitCodeAbbrev *SimpAbbrev = Abbrev->Simplify();
798 Abbrev->dropRef();
799 return SimpAbbrev;
800 } else {
801 return Abbrev;
802 }
803 }
804
805 // The abbreviation used for the record code.
806 NaClBitCodeAbbrevOp CodeOp;
807
808 // The abbreviations used for each tracked value index.
809 std::vector<NaClBitCodeAbbrevOp> AbbrevOps;
810
811 private:
812 // Any remaining abbreviation operators not part of the unrolling.
813 std::vector<NaClBitCodeAbbrevOp> MoreOps;
814
815 // Extracts out the next abbreviation operator from the abbreviation
816 // Abbrev, given the index NextOp, and assigns it to AbbrevOp
817 void UnrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp,
818 NaClBitCodeAbbrev *Abbrev,
819 unsigned &NextOp) {
820 assert(NextOp < Abbrev->getNumOperandInfos());
821 const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(NextOp);
822 if (Op.isArrayOp()) {
823 // Do not advance. The array operator assumes that all remaining
824 // elements should match its argument.
825 AbbrevOp = Abbrev->getOperandInfo(NextOp+1);
826 } else {
827 AbbrevOp = Op;
828 NextOp++;
829 }
830 }
831 };
832
833 /// Models a candidate block abbreviation, which is a blockID, and the
834 /// corresponding abbreviation to be considered for addition. Note:
835 /// Constructors and assignment take ownership of the abbreviation.
836 class CandBlockAbbrev {
837 public:
838 CandBlockAbbrev(unsigned BlockID, NaClBitCodeAbbrev *Abbrev)
839 : BlockID(BlockID), Abbrev(Abbrev) {
840 }
841
842 CandBlockAbbrev(const CandBlockAbbrev &BlockAbbrev)
843 : BlockID(BlockAbbrev.BlockID),
844 Abbrev(BlockAbbrev.Abbrev) {
845 Abbrev->addRef();
846 }
847
848 void operator=(const CandBlockAbbrev &BlockAbbrev) {
849 Abbrev->dropRef();
850 BlockID = BlockAbbrev.BlockID;
851 Abbrev = BlockAbbrev.Abbrev;
852 Abbrev->addRef();
853 }
854
855 ~CandBlockAbbrev() {
856 Abbrev->dropRef();
857 }
858
859 /// The block ID of the candidate abbreviation.
860 unsigned GetBlockID() const {
861 return BlockID;
862 }
863
864 /// The abbreviation of the candidate abbreviation.
865 const NaClBitCodeAbbrev *GetAbbrev() const {
866 return Abbrev;
867 }
868
869 /// orders this against the candidate abbreviation.
870 int Compare(const CandBlockAbbrev &CandAbbrev) const {
871 unsigned diff = BlockID - CandAbbrev.BlockID;
872 if (diff) return diff;
873 return Abbrev->Compare(*CandAbbrev.Abbrev);
874 }
875
876 /// Prints the candidate abbreviation to the given stream.
877 void Print(raw_ostream &Stream) const {
878 PrintAbbrev(Stream, BlockID, Abbrev);
879 }
880
881 private:
882 // The block the abbreviation applies to.
883 unsigned BlockID;
884 // The candidate abbreviation.
885 NaClBitCodeAbbrev *Abbrev;
886 };
887
888 static inline bool operator<(const CandBlockAbbrev &A1,
889 const CandBlockAbbrev &A2) {
890 return A1.Compare(A2) < 0;
891 }
892
893 /// Models the set of candidate abbreviations being considered, and
894 /// the number of abbreviations associated with each candidate
895 /// Abbreviation.
896 ///
897 /// Note: Because we may have abbreviation refinements of A->B->C and
898 /// A->D->C, we need to accumulate instance counts in such cases.
899 class CandidateAbbrevs {
900 public:
901 // Map from candidate abbreviations to the corresponding number of
902 // instances.
903 typedef std::map<CandBlockAbbrev, unsigned> AbbrevCountMap;
904 typedef AbbrevCountMap::const_iterator const_iterator;
905
906 /// Creates an empty set of candidate abbreviations, to be
907 /// (potentially) added to the given set of block abbreviations.
908 /// Argument is the (global) block abbreviations map, which is
909 /// used to determine if it already exists.
910 CandidateAbbrevs(BlockAbbrevsMapType &BlockAbbrevsMap)
911 : BlockAbbrevsMap(BlockAbbrevsMap)
912 {}
913
914 /// Adds the given (unrolled) abbreviation as a candidate
915 /// abbreviation to the given block. NumInstances is the number of
916 /// instances expected to use this candidate abbreviation. Returns
917 /// true if the corresponding candidate abbreviation is added to this
918 /// set of candidate abbreviations.
919 bool Add(unsigned BlockID,
920 UnrolledAbbreviation &UnrolledAbbrev,
921 unsigned NumInstances);
922
923 /// Returns the list of candidate abbreviations in this set.
924 const AbbrevCountMap &GetAbbrevsMap() const {
925 return AbbrevsMap;
926 }
927
928 /// Prints out the current contents of this set.
929 void Print(raw_ostream &Stream, const char *Title = "Candidates") const {
930 Stream << "-- " << Title << ": \n";
931 for (const_iterator Iter = AbbrevsMap.begin(), IterEnd = AbbrevsMap.end();
932 Iter != IterEnd; ++Iter) {
933 Stream << format("%12u", Iter->second) << ": ";
934 Iter->first.Print(Stream);
935 }
936 Stream << "--\n";
937 }
938
939 private:
940 // The set of abbreviations and corresponding number instances.
941 AbbrevCountMap AbbrevsMap;
942
943 // The map of (global) abbreviations already associated with each block.
944 BlockAbbrevsMapType &BlockAbbrevsMap;
945 };
946
947 bool CandidateAbbrevs::Add(unsigned BlockID,
948 UnrolledAbbreviation &UnrolledAbbrev,
949 unsigned NumInstances) {
950 // Drop if it corresponds to an existing global abbreviation.
951 NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.Restore();
952 if (BlockAbbrevs* Abbrevs = BlockAbbrevsMap[BlockID]) {
953 if (Abbrevs->FindAbbreviation(Abbrev) !=
954 BlockAbbrevs::NO_SUCH_ABBREVIATION) {
955 Abbrev->dropRef();
956 return false;
957 }
958 }
959
960 CandBlockAbbrev CandAbbrev(BlockID, Abbrev);
961 AbbrevCountMap::iterator Pos = AbbrevsMap.find(CandAbbrev);
962 if (Pos == AbbrevsMap.end()) {
963 AbbrevsMap[CandAbbrev] = NumInstances;
964 } else {
965 Pos->second += NumInstances;
966 }
967 return true;
968 }
969
970 // Look for new abbreviations in block BlockID, considering it was
971 // read with the given abbreviation Abbrev, and considering changing
972 // the abbreviation opererator for value Index. ValueDist is how
973 // values at Index are distributed. Any found abbreviations are added
974 // to the candidate abbreviations CandAbbrevs. Returns true only if we
975 // have added new candidate abbreviations to CandAbbrevs.
976 static bool AddNewAbbreviations(unsigned BlockID,
977 const UnrolledAbbreviation &Abbrev,
978 unsigned Index,
979 NaClBitcodeValueDist &ValueDist,
980 CandidateAbbrevs &CandAbbrevs) {
981 // TODO(kschimpf): Add code to try and find a better encoding for
982 // the values, based on the distribution.
983
984 // If this index is already a literal abbreviation, no improvements can
985 // be made.
986 const NaClBitCodeAbbrevOp Op = Abbrev.AbbrevOps[Index];
987 if (Op.isLiteral()) return false;
988
989 // Search based on sorted distribution, which sorts by number of
990 // instances. Start by trying to find possible constants to use.
991 const NaClBitcodeDist::Distribution *
992 Distribution = ValueDist.GetDistribution();
993 for (NaClBitcodeDist::Distribution::const_iterator
994 Iter = Distribution->begin(), IterEnd = Distribution->end();
995 Iter != IterEnd; ++Iter) {
996 NaClValueRangeType Range = GetNaClValueRange(Iter->second);
997 if (Range.first != Range.second) continue; // not a constant.
998
999 // Defines a constant. Try as new candidate range. In addition,
1000 // don't try any more constant values, since this is the one with
1001 // the greatest number of instances.
1002 NaClBitcodeDistElement *Elmt = ValueDist.at(Range.first);
1003 UnrolledAbbreviation CandAbbrev(Abbrev);
1004 CandAbbrev.AbbrevOps[Index] = NaClBitCodeAbbrevOp(Range.first);
1005 return CandAbbrevs.Add(BlockID, CandAbbrev, Elmt->GetNumInstances());
1006 }
1007 return false;
1008 }
1009
1010 // Look for new abbreviations in block BlockID, considering it was
1011 // read with the given abbreviation Abbrev. IndexDist is the
1012 // corresponding distribution of value indices associated with the
1013 // abbreviation. Any found abbreviations are added to the candidate
1014 // abbreviations CandAbbrevs.
1015 static void AddNewAbbreviations(unsigned BlockID,
1016 const UnrolledAbbreviation &Abbrev,
1017 NaClBitcodeDist &IndexDist,
1018 CandidateAbbrevs &CandAbbrevs) {
1019 // Search based on sorted distribution, which sorts based on heuristic
1020 // of best index to fix first.
1021 const NaClBitcodeDist::Distribution *
1022 IndexDistribution = IndexDist.GetDistribution();
1023 for (NaClBitcodeDist::Distribution::const_iterator
1024 IndexIter = IndexDistribution->begin(),
1025 IndexIterEnd = IndexDistribution->end();
1026 IndexIter != IndexIterEnd; ++IndexIter) {
1027 unsigned Index = static_cast<unsigned>(IndexIter->second);
1028 if (AddNewAbbreviations(
1029 BlockID, Abbrev, Index,
1030 cast<NaClBitcodeValueIndexDistElement>(IndexDist.at(Index))
1031 ->GetValueDist(),
1032 CandAbbrevs)) {
1033 return;
1034 }
1035 }
1036 }
1037
1038 // Look for new abbreviations in block BlockID, considering it was
1039 // read with the given abbreviation Abbrev, and the given record Code.
1040 // SizeDist is the corresponding distribution of sizes associated with
1041 // the abbreviation. Any found abbreviations are added to the
1042 // candidate abbreviations CandAbbrevs.
1043 static void AddNewAbbreviations(unsigned BlockID,
1044 NaClBitCodeAbbrev *Abbrev,
1045 unsigned Code,
1046 NaClBitcodeDist &SizeDist,
1047 CandidateAbbrevs &CandAbbrevs) {
1048 const NaClBitcodeDist::Distribution *
1049 SizeDistribution = SizeDist.GetDistribution();
1050 for (NaClBitcodeDist::Distribution::const_iterator
1051 SizeIter = SizeDistribution->begin(),
1052 SizeIterEnd = SizeDistribution->end();
1053 SizeIter != SizeIterEnd; ++SizeIter) {
1054 unsigned Size = static_cast<unsigned>(SizeIter->second);
1055 UnrolledAbbreviation UnrolledAbbrev(Abbrev, Size+1 /* Add code! */,
1056 Size >= NaClValueIndexCutoff);
1057 if (!UnrolledAbbrev.CodeOp.isLiteral()) {
1058 // Try making the code a literal.
1059 UnrolledAbbreviation CandAbbrev(UnrolledAbbrev);
1060 CandAbbrev.CodeOp = NaClBitCodeAbbrevOp(Code);
1061 CandAbbrevs.Add(BlockID, CandAbbrev,
1062 SizeDist.at(Size)->GetNumInstances());
1063 }
1064 // Now process value indices to find candidate abbreviations.
1065 AddNewAbbreviations(
1066 BlockID, UnrolledAbbrev,
1067 cast<NaClBitcodeSizeDistElement>(SizeDist.at(Size))
1068 ->GetValueIndexDist(),
1069 CandAbbrevs);
1070 }
1071 }
1072
1073 // Look for new abbreviations in block BlockID. Abbrevs is the map of
1074 // read (globally defined) abbreviations associated with the
1075 // BlockID. AbbrevDist is the distribution map of abbreviations
1076 // associated with BlockID. Any found abbreviations are added to the
1077 // candidate abbreviations CandAbbrevs.
1078 static void AddNewAbbreviations(unsigned BlockID,
1079 BlockAbbrevs *Abbrevs,
1080 NaClBitcodeDist &AbbrevDist,
1081 CandidateAbbrevs &CandAbbrevs) {
1082 const NaClBitcodeDist::Distribution *
1083 AbbrevDistribution = AbbrevDist.GetDistribution();
1084 for (NaClBitcodeDist::Distribution::const_iterator
1085 AbbrevIter = AbbrevDistribution->begin(),
1086 AbbrevIterEnd = AbbrevDistribution->end();
1087 AbbrevIter != AbbrevIterEnd; ++AbbrevIter) {
1088 NaClBitcodeDistValue AbbrevIndex = AbbrevIter->second;
1089 NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(AbbrevIndex);
1090 NaClBitcodeAbbrevDistElement *AbbrevElmt =
1091 cast<NaClBitcodeAbbrevDistElement>(AbbrevDist.at(AbbrevIndex));
1092 NaClBitcodeDist &CodeDist = AbbrevElmt->GetCodeDist();
1093
1094 const NaClBitcodeDist::Distribution *
1095 CodeDistribution = CodeDist.GetDistribution();
1096 for (NaClBitcodeDist::Distribution::const_iterator
1097 CodeIter = CodeDistribution->begin(),
1098 CodeIterEnd = CodeDistribution->end();
1099 CodeIter != CodeIterEnd; ++CodeIter) {
1100 unsigned Code = static_cast<unsigned>(CodeIter->second);
1101 AddNewAbbreviations(
1102 BlockID,
1103 Abbrev,
1104 Code,
1105 cast<NaClCompressCodeDistElement>(CodeDist.at(CodeIter->second))
1106 ->GetSizeDist(),
1107 CandAbbrevs);
1108 }
1109 }
1110 }
1111
1112 typedef std::pair<unsigned, CandBlockAbbrev> CountedAbbrevType;
1113
1114 // Look for new abbreviations in the given block distribution map
1115 // BlockDist. BlockAbbrevsMap contains the set of read global
1116 // abbreviations. Adds found candidate abbreviations to the set of
1117 // known global abbreviations.
1118 static void AddNewAbbreviations(NaClBitcodeBlockDist &BlockDist,
1119 BlockAbbrevsMapType &BlockAbbrevsMap) {
1120 CandidateAbbrevs CandAbbrevs(BlockAbbrevsMap);
1121 // Start by collecting candidate abbreviations.
1122 const NaClBitcodeDist::Distribution *
1123 Distribution = BlockDist.GetDistribution();
1124 for (NaClBitcodeDist::Distribution::const_iterator
1125 Iter = Distribution->begin(), IterEnd = Distribution->end();
1126 Iter != IterEnd; ++Iter) {
1127 unsigned BlockID = static_cast<unsigned>(Iter->second);
1128 AddNewAbbreviations(
1129 BlockID,
1130 BlockAbbrevsMap[BlockID],
1131 cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))
1132 ->GetAbbrevDist(),
1133 CandAbbrevs);
1134 }
1135 // Install candidate abbreviations.
1136 //
1137 // Sort the candidate abbreviations by number of instances, so that
1138 // if multiple abbreviations apply, the one with the largest number
1139 // of instances will be chosen when compressing a file.
1140 //
1141 // For example, we may have just generated two abbreviations. The
1142 // first has replaced the 3rd element with the constant 4 while the
1143 // second replaced the 4th element with the constant 5. The first
1144 // abbreviation can apply to 300 records while the second can apply
1145 // to 1000 records. Assuming that both abbreviations shrink the
1146 // record by the same number of bits, we clearly want the tool to
1147 // choose the second abbreviation when selecting the abbreviation
1148 // index to choose (via method GetRecordAbbrevIndex).
1149 //
1150 // Selecting the second is important in that abbreviation are
1151 // refined by successive calls to this tool. We do not want to
1152 // restrict downstream refinements prematurely. Hence, we want the
1153 // tool to choose the abbreviation with the most candidates first.
1154 //
1155 // Since method GetRecordAbbrevIndex chooses the first abbreviation
1156 // that generates the least number of bits, we clearly want to make
1157 // sure that the second abbreviation occurs before the first.
1158 std::vector<CountedAbbrevType> Candidates;
1159 for (CandidateAbbrevs::const_iterator
1160 Iter = CandAbbrevs.GetAbbrevsMap().begin(),
1161 IterEnd = CandAbbrevs.GetAbbrevsMap().end();
1162 Iter != IterEnd; ++Iter) {
1163 Candidates.push_back(CountedAbbrevType(Iter->second,Iter->first));
1164 }
1165 std::sort(Candidates.begin(), Candidates.end());
1166 std::vector<CountedAbbrevType>::const_reverse_iterator
1167 Iter = Candidates.rbegin(), IterEnd = Candidates.rend();
1168 if (Iter == IterEnd) return;
1169
1170 if (TraceGeneratedAbbreviations) {
1171 errs() << "-- New abbrevations:\n";
1172 }
1173 unsigned Min = (Iter->first >> 2);
1174 for (; Iter != IterEnd; ++Iter) {
1175 if (Iter->first < Min) break;
1176 unsigned BlockID = Iter->second.GetBlockID();
1177 BlockAbbrevs *Abbrevs = BlockAbbrevsMap[BlockID];
1178 NaClBitCodeAbbrev *Abbrev = Iter->second.GetAbbrev()->Copy();
1179 if (TraceGeneratedAbbreviations) {
1180 errs() <<format("%12u", Iter->first) << ": ";
1181 PrintAbbrev(errs(), BlockID, Abbrev);
1182 }
1183 Abbrevs->AddAbbreviation(Abbrev);
1184 }
1185 if (TraceGeneratedAbbreviations) {
1186 errs() << "--\n";
1187 }
1188 }
1189
1190 // Walks the block distribution (BlockDist), sorting entries based
1191 // on the distribution of blocks and abbreviations, and then
1192 // prints out the frequency of each abbreviation used.
1193 static void DisplayAbbreviationFrequencies(
1194 raw_ostream &Output,
1195 const NaClBitcodeBlockDist &BlockDist,
1196 const BlockAbbrevsMapType &BlockAbbrevsMap) {
1197 const NaClBitcodeDist::Distribution *BlockDistribution =
1198 BlockDist.GetDistribution();
1199 for (NaClBitcodeDist::Distribution::const_iterator
1200 BlockIter = BlockDistribution->begin(),
1201 BlockIterEnd = BlockDistribution->end();
1202 BlockIter != BlockIterEnd; ++BlockIter) {
1203 unsigned BlockID = static_cast<unsigned>(BlockIter->second);
1204 BlockAbbrevsMapType::const_iterator BlockPos = BlockAbbrevsMap.find(BlockID) ;
1205 if (BlockPos == BlockAbbrevsMap.end()) continue;
1206 Output << "Block " << BlockID << "\n";
1207 if (NaClCompressBlockDistElement *BlockElement =
1208 dyn_cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))) {
1209 NaClBitcodeDist &AbbrevDist = BlockElement->GetAbbrevDist();
1210 const NaClBitcodeDist::Distribution *AbbrevDistribution =
1211 AbbrevDist.GetDistribution();
1212 unsigned Total = AbbrevDist.GetTotal();
1213 for (NaClBitcodeDist::Distribution::const_iterator
1214 AbbrevIter = AbbrevDistribution->begin(),
1215 AbbrevIterEnd = AbbrevDistribution->end();
1216 AbbrevIter != AbbrevIterEnd; ++AbbrevIter) {
1217 unsigned Index = static_cast<unsigned>(AbbrevIter->second);
1218 unsigned Count = AbbrevDist.at(Index)->GetNumInstances();
1219 Output << format("%8u (%6.2f%%): ", Count,
1220 (double) Count/Total*100.0);
1221 BlockPos->second->GetIndexedAbbrev(Index)->Print(Output);
1222 }
1223 }
1224 Output << '\n';
1225 }
1226 }
1227
1228 // Read in bitcode, analyze data, and figure out set of abbreviations
1229 // to use, from memory buffer MemBuf containing the input bitcode file.
1230 static bool AnalyzeBitcode(std::unique_ptr<MemoryBuffer> &MemBuf,
1231 BlockAbbrevsMapType &BlockAbbrevsMap) {
1232 // TODO(kschimpf): The current code only extracts abbreviations
1233 // defined in the bitcode file. This code needs to be updated to
1234 // collect data distributions and figure out better (global)
1235 // abbreviations to use.
1236
1237 const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
1238 const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
1239
1240 // First read header and verify it is good.
1241 NaClBitcodeHeader Header;
1242 if (Header.Read(BufPtr, EndBufPtr) || !Header.IsSupported())
1243 return Error("Invalid PNaCl bitcode header");
1244
1245 // Create a bitstream reader to read the bitcode file.
1246 NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
1247 NaClBitstreamCursor Stream(StreamFile);
1248
1249 // Parse the the bitcode file.
1250 NaClAnalyzeParser Parser(Stream, BlockAbbrevsMap);
1251 while (!Stream.AtEndOfStream()) {
1252 if (Parser.Parse()) return true;
1253 }
1254
1255 if (ShowAbbreviationFrequencies || ShowValueDistributions) {
1256 std::error_code EC;
1257 raw_fd_ostream Output(OutputFilename, EC, sys::fs::F_None);
1258 if (EC) {
1259 errs() << EC.message() << "\n";
1260 exit(1);
1261 }
1262 if (ShowAbbreviationFrequencies)
1263 DisplayAbbreviationFrequencies(Output, Parser.BlockDist, BlockAbbrevsMap);
1264 if (ShowValueDistributions)
1265 Parser.BlockDist.Print(Output);
1266 }
1267
1268 AddNewAbbreviations(Parser.BlockDist, BlockAbbrevsMap);
1269 return false;
1270 }
1271
1272 /// Parses the input bitcode file and generates the corresponding
1273 /// compressed bitcode file, by replacing abbreviations in the input
1274 /// file with the corresponding abbreviations defined in
1275 /// BlockAbbrevsMap.
1276 class NaClBitcodeCopyParser : public NaClBitcodeParser {
1277 public:
1278 // Top-level constructor to build the appropriate block parser
1279 // using the given BlockAbbrevsMap to define abbreviations.
1280 NaClBitcodeCopyParser(NaClBitstreamCursor &Cursor,
1281 BlockAbbrevsMapType &BlockAbbrevsMap,
1282 NaClBitstreamWriter &Writer)
1283 : NaClBitcodeParser(Cursor),
1284 BlockAbbrevsMap(BlockAbbrevsMap),
1285 Writer(Writer),
1286 FoundFirstBlockInfo(false)
1287 {}
1288
1289 virtual ~NaClBitcodeCopyParser() {}
1290
1291 virtual bool Error(const std::string &Message) {
1292 // Use local error routine so that all errors are treated uniformly.
1293 return ::Error(Message);
1294 }
1295
1296 virtual bool ParseBlock(unsigned BlockID);
1297
1298 // The abbreviations to use for the copied bitcode.
1299 BlockAbbrevsMapType &BlockAbbrevsMap;
1300
1301 // The bitstream to copy the compressed bitcode into.
1302 NaClBitstreamWriter &Writer;
1303
1304 // True if we have already found the first block info block.
1305 // Used to make sure we don't use abbreviations until we
1306 // have put them into the bitcode file.
1307 bool FoundFirstBlockInfo;
1308 };
1309
1310 class NaClBlockCopyParser : public NaClBitcodeParser {
1311 public:
1312 // Top-level constructor to build the appropriate block parser.
1313 NaClBlockCopyParser(unsigned BlockID,
1314 NaClBitcodeCopyParser *Context)
1315 : NaClBitcodeParser(BlockID, Context),
1316 Context(Context),
1317 BlockAbbreviations(0)
1318 {}
1319
1320 virtual ~NaClBlockCopyParser() {}
1321
1322 protected:
1323 // The context defining state associated with the block parser.
1324 NaClBitcodeCopyParser *Context;
1325
1326 // The block abbreviations defined for this block (initialized by
1327 // EnterBlock).
1328 BlockAbbrevs *BlockAbbreviations;
1329
1330 /// Constructor to parse nested blocks. Creates a block parser to
1331 /// parse in a block with the given BlockID, and write the block
1332 /// back out using the abbreviations in BlockAbbrevsMap.
1333 NaClBlockCopyParser(unsigned BlockID,
1334 NaClBlockCopyParser *EnclosingParser)
1335 : NaClBitcodeParser(BlockID, EnclosingParser),
1336 Context(EnclosingParser->Context)
1337 {}
1338
1339 virtual bool Error(const std::string &Message) {
1340 // Use local error routine so that all errors are treated uniformly.
1341 return ::Error(Message);
1342 }
1343
1344 /// Returns the set of (global) block abbreviations defined for the
1345 /// given block ID.
1346 BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) {
1347 BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID];
1348 if (Abbrevs == 0) {
1349 Abbrevs = new BlockAbbrevs(BlockID);
1350 Context->BlockAbbrevsMap[BlockID] = Abbrevs;
1351 }
1352 return Abbrevs;
1353 }
1354
1355 virtual bool ParseBlock(unsigned BlockID) {
1356 NaClBlockCopyParser Parser(BlockID, this);
1357 return Parser.ParseThisBlock();
1358 }
1359
1360 virtual void EnterBlock(unsigned NumWords) {
1361 unsigned BlockID = GetBlockID();
1362 BlockAbbreviations = GetGlobalAbbrevs(BlockID);
1363
1364 // Enter the subblock.
1365 NaClBitcodeSelectorAbbrev
1366 Selector(BlockAbbreviations->GetNumberAbbreviations()-1);
1367 if (RemoveAbbreviations) Selector = NaClBitcodeSelectorAbbrev();
1368 Context->Writer.EnterSubblock(BlockID, Selector);
1369
1370 // Note: We must dump module abbreviations as local
1371 // abbreviations, because they are in a yet to be
1372 // dumped BlockInfoBlock.
1373 if (!RemoveAbbreviations && BlockID == naclbitc::MODULE_BLOCK_ID) {
1374 BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(naclbitc::MODULE_BLOCK_ID);
1375 for (unsigned i = 0; i < Abbrevs->GetNumberAbbreviations(); ++i) {
1376 Context->Writer.EmitAbbrev(Abbrevs->GetIndexedAbbrev(i)->Copy());
1377 }
1378 }
1379 }
1380
1381 virtual void ExitBlock() {
1382 Context->Writer.ExitBlock();
1383 }
1384
1385 virtual void ProcessBlockInfo() {
1386 assert(!Context->FoundFirstBlockInfo &&
1387 "Input bitcode has more that one BlockInfoBlock");
1388 Context->FoundFirstBlockInfo = true;
1389
1390 // Generate global abbreviations within a blockinfo block.
1391 Context->Writer.EnterBlockInfoBlock();
1392 if (!RemoveAbbreviations) {
1393 for (BlockAbbrevsMapType::const_iterator
1394 Iter = Context->BlockAbbrevsMap.begin(),
1395 IterEnd = Context->BlockAbbrevsMap.end();
1396 Iter != IterEnd; ++Iter) {
1397 unsigned BlockID = Iter->first;
1398 // Don't emit module abbreviations, since they have been
1399 // emitted as local abbreviations.
1400 if (BlockID == naclbitc::MODULE_BLOCK_ID) continue;
1401
1402 BlockAbbrevs *Abbrevs = Iter->second;
1403 if (Abbrevs == 0) continue;
1404 for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation();
1405 i < Abbrevs->GetNumberAbbreviations(); ++i) {
1406 NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(i);
1407 Context->Writer.EmitBlockInfoAbbrev(BlockID, Abbrev);
1408 }
1409 }
1410 }
1411 Context->Writer.ExitBlock();
1412 }
1413
1414 virtual void ProcessRecord() {
1415 const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
1416 if (RemoveAbbreviations) {
1417 Context->Writer.EmitRecord(Record.GetCode(), Values, 0);
1418 return;
1419 }
1420 // Find best fitting abbreviation to use, and print out the record
1421 // using that abbreviations.
1422 unsigned AbbrevIndex =
1423 BlockAbbreviations->GetRecordAbbrevIndex(Record.GetRecordData());
1424 if (AbbrevIndex == naclbitc::UNABBREV_RECORD) {
1425 Context->Writer.EmitRecord(Record.GetCode(), Values, 0);
1426 } else {
1427 Context->Writer.EmitRecord(Record.GetCode(), Values, AbbrevIndex);
1428 }
1429 }
1430 };
1431
1432 bool NaClBitcodeCopyParser::ParseBlock(unsigned BlockID) {
1433 NaClBlockCopyParser Parser(BlockID, this);
1434 return Parser.ParseThisBlock();
1435 }
1436
1437 // Read in bitcode, and write it back out using the abbreviations in
1438 // BlockAbbrevsMap, from memory buffer MemBuf containing the input
1439 // bitcode file.
1440 static bool CopyBitcode(std::unique_ptr<MemoryBuffer> &MemBuf,
1441 BlockAbbrevsMapType &BlockAbbrevsMap) {
1442
1443 const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
1444 const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
1445
1446 // Read header. No verification is needed since AnalyzeBitcode has
1447 // already checked it.
1448 NaClBitcodeHeader Header;
1449 if (Header.Read(BufPtr, EndBufPtr))
1450 return Error("Invalid PNaCl bitcode header");
1451
1452 // Create the bitcode reader.
1453 NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
1454 NaClBitstreamCursor Stream(StreamFile);
1455
1456 // Create the bitcode writer.
1457 SmallVector<char, 0> OutputBuffer;
1458 OutputBuffer.reserve(256*1024);
1459 NaClBitstreamWriter StreamWriter(OutputBuffer);
1460
1461 // Emit the file header.
1462 NaClWriteHeader(Header, StreamWriter);
1463
1464 // Set up the parser.
1465 NaClBitcodeCopyParser Parser(Stream, BlockAbbrevsMap, StreamWriter);
1466
1467 // Parse the bitcode and copy.
1468 while (!Stream.AtEndOfStream()) {
1469 if (Parser.Parse()) return true;
1470 }
1471
1472 // Write out the copied results.
1473 std::error_code EC;
1474 std::unique_ptr<tool_output_file> OutFile(
1475 new tool_output_file(OutputFilename.c_str(), EC, sys::fs::F_None));
1476 if (EC)
1477 return Error(EC.message());
1478
1479 // Write the generated bitstream to "Out".
1480 OutFile->os().write((char*)&OutputBuffer.front(),
1481 OutputBuffer.size());
1482 OutFile->keep();
1483
1484 return false;
1485 }
1486
1487 // Build fast lookup abbreviation maps for each of the abbreviation blocks
1488 // defined in AbbrevsMap.
1489 static void BuildAbbrevLookupMaps(BlockAbbrevsMapType &AbbrevsMap) {
1490 for (BlockAbbrevsMapType::const_iterator
1491 Iter = AbbrevsMap.begin(),
1492 IterEnd = AbbrevsMap.end();
1493 Iter != IterEnd; ++Iter) {
1494 Iter->second->BuildAbbrevLookupSizeMap();
1495 }
1496 }
1497
1498 } // namespace
1499
1500 int main(int argc, char **argv) {
1501 // Print a stack trace if we signal out.
1502 sys::PrintStackTraceOnErrorSignal();
1503 PrettyStackTraceProgram X(argc, argv);
1504 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
1505 cl::ParseCommandLineOptions(argc, argv, "pnacl-bccompress file analyzer\n");
1506
1507 std::unique_ptr<MemoryBuffer> MemBuf;
1508 if (ReadAndBuffer(MemBuf)) return 1;
1509 BlockAbbrevsMapType BlockAbbrevsMap;
1510 if (AnalyzeBitcode(MemBuf, BlockAbbrevsMap)) return 1;
1511 if (ShowAbbreviationFrequencies || ShowValueDistributions) {
1512 return 0;
1513 }
1514 BuildAbbrevLookupMaps(BlockAbbrevsMap);
1515 if (CopyBitcode(MemBuf, BlockAbbrevsMap)) return 1;
1516 return 0;
1517 }
OLDNEW
« no previous file with comments | « tools/pnacl-bccompress/Makefile ('k') | tools/pnacl-bcdis/CMakeLists.txt » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698