Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(241)

Side by Side Diff: include/llvm/Bitcode/NaCl/NaClBitstreamReader.h

Issue 939073008: Rebased PNaCl localmods in LLVM to 223109 (Closed)
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 //===- NaClBitstreamReader.h -----------------------------------*- C++ -*-===//
2 // Low-level bitstream reader interface
3 //
4 // The LLVM Compiler Infrastructure
5 //
6 // This file is distributed under the University of Illinois Open Source
7 // License. See LICENSE.TXT for details.
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 // This header defines the BitstreamReader class. This class can be used to
12 // read an arbitrary bitstream, regardless of its contents.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #ifndef LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H
17 #define LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H
18
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/StreamingMemoryObject.h"
23 #include <climits>
24 #include <vector>
25
26 namespace llvm {
27
28 class Deserializer;
29
30 /// NaClBitstreamReader - This class is used to read from a NaCl
31 /// bitcode wire format stream, maintaining information that is global
32 /// to decoding the entire file. While a file is being read, multiple
33 /// cursors can be independently advanced or skipped around within the
34 /// file. These are represented by the NaClBitstreamCursor class.
35 class NaClBitstreamReader {
36 public:
37 /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
38 /// These describe abbreviations that all blocks of the specified ID inherit.
39 struct BlockInfo {
40 unsigned BlockID;
41 std::vector<NaClBitCodeAbbrev*> Abbrevs;
42 };
43 private:
44 std::unique_ptr<MemoryObject> BitcodeBytes;
45
46 std::vector<BlockInfo> BlockInfoRecords;
47
48 /// \brief Holds the offset of the first byte after the header.
49 size_t InitialAddress;
50
51 NaClBitstreamReader(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION;
52 void operator=(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION;
53 public:
54 NaClBitstreamReader() : InitialAddress(0) {}
55
56 NaClBitstreamReader(const unsigned char *Start, const unsigned char *End,
57 size_t MyInitialAddress=0) {
58 InitialAddress = MyInitialAddress;
59 init(Start, End);
60 }
61
62 NaClBitstreamReader(MemoryObject *Bytes, size_t MyInitialAddress=0)
63 : InitialAddress(MyInitialAddress) {
64 BitcodeBytes.reset(Bytes);
65 }
66
67 void init(const unsigned char *Start, const unsigned char *End) {
68 assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
69 BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
70 }
71
72 MemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
73
74 ~NaClBitstreamReader() {
75 // Free the BlockInfoRecords.
76 while (!BlockInfoRecords.empty()) {
77 BlockInfo &Info = BlockInfoRecords.back();
78 // Free blockinfo abbrev info.
79 for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
80 i != e; ++i)
81 Info.Abbrevs[i]->dropRef();
82 BlockInfoRecords.pop_back();
83 }
84 }
85
86 /// \brief Returns the initial address (after the header) of the input stream.
87 size_t getInitialAddress() const {
88 return InitialAddress;
89 }
90
91 //===--------------------------------------------------------------------===//
92 // Block Manipulation
93 //===--------------------------------------------------------------------===//
94
95 /// hasBlockInfoRecords - Return true if we've already read and processed the
96 /// block info block for this Bitstream. We only process it for the first
97 /// cursor that walks over it.
98 bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
99
100 /// getBlockInfo - If there is block info for the specified ID, return it,
101 /// otherwise return null.
102 const BlockInfo *getBlockInfo(unsigned BlockID) const {
103 // Common case, the most recent entry matches BlockID.
104 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
105 return &BlockInfoRecords.back();
106
107 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
108 i != e; ++i)
109 if (BlockInfoRecords[i].BlockID == BlockID)
110 return &BlockInfoRecords[i];
111 return 0;
112 }
113
114 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
115 if (const BlockInfo *BI = getBlockInfo(BlockID))
116 return *const_cast<BlockInfo*>(BI);
117
118 // Otherwise, add a new record.
119 BlockInfoRecords.push_back(BlockInfo());
120 BlockInfoRecords.back().BlockID = BlockID;
121 return BlockInfoRecords.back();
122 }
123 };
124
125
126 /// NaClBitstreamEntry - When advancing through a bitstream cursor,
127 /// each advance can discover a few different kinds of entries:
128 /// Error - Malformed bitcode was found.
129 /// EndBlock - We've reached the end of the current block, (or the end of the
130 /// file, which is treated like a series of EndBlock records.
131 /// SubBlock - This is the start of a new subblock of a specific ID.
132 /// Record - This is a record with a specific AbbrevID.
133 ///
134 struct NaClBitstreamEntry {
135 enum {
136 Error,
137 EndBlock,
138 SubBlock,
139 Record
140 } Kind;
141
142 unsigned ID;
143
144 static NaClBitstreamEntry getError() {
145 NaClBitstreamEntry E; E.Kind = Error; return E;
146 }
147 static NaClBitstreamEntry getEndBlock() {
148 NaClBitstreamEntry E; E.Kind = EndBlock; return E;
149 }
150 static NaClBitstreamEntry getSubBlock(unsigned ID) {
151 NaClBitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
152 }
153 static NaClBitstreamEntry getRecord(unsigned AbbrevID) {
154 NaClBitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
155 }
156 };
157
158 /// Models default view of a bitcode record.
159 typedef SmallVector<uint64_t, 8> NaClBitcodeRecordVector;
160
161 /// Class NaClAbbrevListener is used to allow instances of class
162 /// NaClBitcodeParser to listen to record details when processing
163 /// abbreviations. The major reason for using a listener is that the
164 /// NaCl bitcode reader would require a major rewrite (including the
165 /// introduction of more overhead) if we were to lift abbreviations up
166 /// to the bitcode reader. That is, not only would we have to lift the
167 /// block processing up into the readers (i.e. many blocks in
168 /// NaClBitcodeReader and NaClBitcodeParser), but add many new API's
169 /// to allow the readers to update internals of the bit stream reader
170 /// appropriately.
171 class NaClAbbrevListener {
172 NaClAbbrevListener(const NaClAbbrevListener&) LLVM_DELETED_FUNCTION;
173 void operator=(const NaClAbbrevListener&) LLVM_DELETED_FUNCTION;
174 public:
175 NaClAbbrevListener() {}
176 virtual ~NaClAbbrevListener() {}
177
178 /// Called to process the read abbreviation.
179 virtual void ProcessAbbreviation(NaClBitCodeAbbrev *Abbrev,
180 bool IsLocal) = 0;
181
182 /// Called after entering block. NumWords is the number of words
183 /// in the block.
184 virtual void BeginBlockInfoBlock(unsigned NumWords) = 0;
185
186 /// Called if a naclbitc::BLOCKINFO_CODE_SETBID record is found in
187 /// NaClBitstreamCursor::ReadBlockInfoBlock.
188 virtual void SetBID() = 0;
189
190 /// Called just before an EndBlock record is processed by
191 /// NaClBitstreamCursor::ReadBlockInfoBlock
192 virtual void EndBlockInfoBlock() = 0;
193
194 /// The values of the bitcode record associated with the called
195 /// virtual function.
196 NaClBitcodeRecordVector Values;
197
198 /// Start bit for current record being processed in
199 /// NaClBitstreamCursor::ReadBlockInfoBlock.
200 uint64_t StartBit;
201 };
202
203 /// NaClBitstreamCursor - This represents a position within a bitcode
204 /// file. There may be multiple independent cursors reading within
205 /// one bitstream, each maintaining their own local state.
206 ///
207 /// Unlike iterators, NaClBitstreamCursors are heavy-weight objects
208 /// that should not be passed by value.
209 class NaClBitstreamCursor {
210 friend class Deserializer;
211 NaClBitstreamReader *BitStream;
212 size_t NextChar;
213
214 // The size of the bitcode. 0 if we don't know it yet.
215 size_t Size;
216
217 /// CurWord/word_t - This is the current data we have pulled from the stream
218 /// but have not returned to the client. This is specifically and
219 /// intentionally defined to follow the word size of the host machine for
220 /// efficiency. We use word_t in places that are aware of this to make it
221 /// perfectly explicit what is going on.
222 typedef uint32_t word_t;
223 word_t CurWord;
224
225 /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
226 /// is always from [0...31/63] inclusive (depending on word size).
227 unsigned BitsInCurWord;
228
229 // CurCodeSize - This is the declared size of code values used for the current
230 // block, in bits.
231 NaClBitcodeSelectorAbbrev CurCodeSize;
232
233 /// CurAbbrevs - Abbrevs installed at in this block.
234 std::vector<NaClBitCodeAbbrev*> CurAbbrevs;
235
236 struct Block {
237 NaClBitcodeSelectorAbbrev PrevCodeSize;
238 std::vector<NaClBitCodeAbbrev*> PrevAbbrevs;
239 Block() : PrevCodeSize() {}
240 explicit Block(const NaClBitcodeSelectorAbbrev& PCS)
241 : PrevCodeSize(PCS) {}
242 };
243
244 /// BlockScope - This tracks the codesize of parent blocks.
245 SmallVector<Block, 8> BlockScope;
246
247 NaClBitstreamCursor(const NaClBitstreamCursor &) LLVM_DELETED_FUNCTION;
248 NaClBitstreamCursor &operator=(const NaClBitstreamCursor &) LLVM_DELETED_FUNCT ION;
249
250 public:
251 NaClBitstreamCursor() {
252 init(nullptr);
253 }
254
255 explicit NaClBitstreamCursor(NaClBitstreamReader &R) { init(&R); }
256
257 void init(NaClBitstreamReader *R) {
258 freeState();
259 BitStream = R;
260 NextChar = (BitStream == nullptr) ? 0 : BitStream->getInitialAddress();
261 Size = 0;
262 BitsInCurWord = 0;
263 }
264
265 ~NaClBitstreamCursor() {
266 freeState();
267 }
268
269 void freeState();
270
271 bool canSkipToPos(size_t pos) const {
272 // pos can be skipped to if it is a valid address or one byte past the end.
273 return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
274 static_cast<uint64_t>(pos - 1));
275 }
276
277 bool AtEndOfStream() {
278 if (BitsInCurWord != 0)
279 return false;
280 if (Size != 0 && Size == NextChar)
281 return true;
282 fillCurWord();
283 return BitsInCurWord == 0;
284 }
285
286 /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
287 unsigned getAbbrevIDWidth() const { return CurCodeSize.NumBits; }
288
289 /// GetCurrentBitNo - Return the bit # of the bit we are reading.
290 uint64_t GetCurrentBitNo() const {
291 return NextChar*CHAR_BIT - BitsInCurWord;
292 }
293
294 NaClBitstreamReader *getBitStreamReader() {
295 return BitStream;
296 }
297 const NaClBitstreamReader *getBitStreamReader() const {
298 return BitStream;
299 }
300
301 /// Flags that modify the behavior of advance().
302 enum {
303 /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does
304 /// not automatically pop the block scope when the end of a block is
305 /// reached.
306 AF_DontPopBlockAtEnd = 1,
307
308 /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are
309 /// returned just like normal records.
310 AF_DontAutoprocessAbbrevs = 2
311 };
312
313 /// advance - Advance the current bitstream, returning the next entry in the
314 /// stream. Use the given abbreviation listener (if provided).
315 NaClBitstreamEntry advance(unsigned Flags, NaClAbbrevListener *Listener) {
316 while (1) {
317 unsigned Code = ReadCode();
318 if (Code == naclbitc::END_BLOCK) {
319 // Pop the end of the block unless Flags tells us not to.
320 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
321 return NaClBitstreamEntry::getError();
322 return NaClBitstreamEntry::getEndBlock();
323 }
324
325 if (Code == naclbitc::ENTER_SUBBLOCK)
326 return NaClBitstreamEntry::getSubBlock(ReadSubBlockID());
327
328 if (Code == naclbitc::DEFINE_ABBREV &&
329 !(Flags & AF_DontAutoprocessAbbrevs)) {
330 // We read and accumulate abbrev's, the client can't do anything with
331 // them anyway.
332 ReadAbbrevRecord(true, Listener);
333 continue;
334 }
335
336 return NaClBitstreamEntry::getRecord(Code);
337 }
338 }
339
340 /// advanceSkippingSubblocks - This is a convenience function for clients that
341 /// don't expect any subblocks. This just skips over them automatically.
342 NaClBitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
343 while (1) {
344 // If we found a normal entry, return it.
345 NaClBitstreamEntry Entry = advance(Flags, 0);
346 if (Entry.Kind != NaClBitstreamEntry::SubBlock)
347 return Entry;
348
349 // If we found a sub-block, just skip over it and check the next entry.
350 if (SkipBlock())
351 return NaClBitstreamEntry::getError();
352 }
353 }
354
355 /// JumpToBit - Reset the stream to the specified bit number.
356 void JumpToBit(uint64_t BitNo) {
357 uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1);
358 unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
359 assert(canSkipToPos(ByteNo) && "Invalid location");
360
361 // Move the cursor to the right word.
362 NextChar = ByteNo;
363 BitsInCurWord = 0;
364
365 // Skip over any bits that are already consumed.
366 if (WordBitNo) {
367 if (sizeof(word_t) > 4)
368 Read64(WordBitNo);
369 else
370 Read(WordBitNo);
371 }
372 }
373
374 void fillCurWord() {
375 assert(Size == 0 || NextChar < (unsigned)Size);
376
377 // Read the next word from the stream.
378 uint8_t Array[sizeof(word_t)] = {0};
379
380 uint64_t BytesRead =
381 BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
382
383 // If we run out of data, stop at the end of the stream.
384 if (BytesRead == 0) {
385 Size = NextChar;
386 return;
387 }
388 assert(BytesRead == sizeof(Array));
389
390 // Handle big-endian byte-swapping if necessary.
391 support::detail::packed_endian_specific_integral<
392 word_t, support::little, support::unaligned> EndianValue;
393 memcpy(&EndianValue, Array, sizeof(Array));
394
395 CurWord = EndianValue;
396 NextChar += sizeof(word_t);
397 BitsInCurWord = sizeof(word_t) * 8;
398 }
399
400 uint32_t Read(unsigned NumBits) {
401 assert(NumBits && NumBits <= 32 &&
402 "Cannot return zero or more than 32 bits!");
403
404 // If the field is fully contained by CurWord, return it quickly.
405 if (BitsInCurWord >= NumBits) {
406 uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits));
407
408 // Use a mask to avoid undefined behavior.
409 CurWord >>= (NumBits & 0x1f);
410
411 BitsInCurWord -= NumBits;
412 return R;
413 }
414
415 uint32_t R = BitsInCurWord ? uint32_t(CurWord) : 0;
416 unsigned BitsLeft = NumBits - BitsInCurWord;
417
418 fillCurWord();
419
420 // If we run out of data, stop at the end of the stream.
421 if (BitsLeft > BitsInCurWord)
422 return 0;
423
424 uint32_t R2 = uint32_t(CurWord) & (~0U >> (sizeof(word_t) * 8 - BitsLeft));
425
426 // Use a mask to avoid undefined behavior.
427 CurWord >>= (BitsLeft & 0x1f);
428
429 BitsInCurWord -= BitsLeft;
430
431 R |= uint32_t(R2 << (NumBits - BitsLeft));
432
433 return R;
434 }
435
436 uint64_t Read64(unsigned NumBits) {
437 if (NumBits <= 32) return Read(NumBits);
438
439 uint64_t V = Read(32);
440 return V | (uint64_t)Read(NumBits-32) << 32;
441 }
442
443 uint32_t ReadVBR(unsigned NumBits) {
444 uint32_t Piece = Read(NumBits);
445 if ((Piece & (1U << (NumBits-1))) == 0)
446 return Piece;
447
448 uint32_t Result = 0;
449 unsigned NextBit = 0;
450 while (1) {
451 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
452
453 if ((Piece & (1U << (NumBits-1))) == 0)
454 return Result;
455
456 NextBit += NumBits-1;
457 Piece = Read(NumBits);
458 }
459 }
460
461 // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size. The
462 // chunk size of the VBR must still be <= 32 bits though.
463 uint64_t ReadVBR64(unsigned NumBits) {
464 uint32_t Piece = Read(NumBits);
465 if ((Piece & (1U << (NumBits-1))) == 0)
466 return uint64_t(Piece);
467
468 uint64_t Result = 0;
469 unsigned NextBit = 0;
470 while (1) {
471 Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
472
473 if ((Piece & (1U << (NumBits-1))) == 0)
474 return Result;
475
476 NextBit += NumBits-1;
477 Piece = Read(NumBits);
478 }
479 }
480
481 private:
482 void SkipToFourByteBoundary() {
483 // If word_t is 64-bits and if we've read less than 32 bits, just dump
484 // the bits we have up to the next 32-bit boundary.
485 if (sizeof(word_t) > 4 &&
486 BitsInCurWord >= 32) {
487 CurWord >>= BitsInCurWord-32;
488 BitsInCurWord = 32;
489 return;
490 }
491
492 BitsInCurWord = 0;
493 }
494 public:
495
496 unsigned ReadCode() {
497 return CurCodeSize.IsFixed
498 ? Read(CurCodeSize.NumBits)
499 : ReadVBR(CurCodeSize.NumBits);
500 }
501
502 // Block header:
503 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
504
505 /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
506 /// the block.
507 unsigned ReadSubBlockID() {
508 return ReadVBR(naclbitc::BlockIDWidth);
509 }
510
511 /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
512 /// over the body of this block. If the block record is malformed, return
513 /// true.
514 bool SkipBlock() {
515 // Read and ignore the codelen value. Since we are skipping this block, we
516 // don't care what code widths are used inside of it.
517 ReadVBR(naclbitc::CodeLenWidth);
518 SkipToFourByteBoundary();
519 unsigned NumFourBytes = Read(naclbitc::BlockSizeWidth);
520
521 // Check that the block wasn't partially defined, and that the offset isn't
522 // bogus.
523 size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
524 if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
525 return true;
526
527 JumpToBit(SkipTo);
528 return false;
529 }
530
531 /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
532 /// the block, and return true if the block has an error.
533 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0);
534
535 bool ReadBlockEnd() {
536 if (BlockScope.empty()) return true;
537
538 // Block tail:
539 // [END_BLOCK, <align4bytes>]
540 SkipToFourByteBoundary();
541
542 popBlockScope();
543 return false;
544 }
545
546 private:
547
548 void popBlockScope() {
549 CurCodeSize = BlockScope.back().PrevCodeSize;
550
551 // Delete abbrevs from popped scope.
552 for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
553 i != e; ++i)
554 CurAbbrevs[i]->dropRef();
555
556 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
557 BlockScope.pop_back();
558 }
559
560 //===--------------------------------------------------------------------===//
561 // Record Processing
562 //===--------------------------------------------------------------------===//
563
564 private:
565 void skipAbbreviatedField(const NaClBitCodeAbbrevOp &Op);
566
567 // Reads the next Value using the abbreviation Op. Returns true only
568 // if Op is an array (and sets Value to the number of elements in the
569 // array).
570 inline bool readRecordAbbrevField(const NaClBitCodeAbbrevOp &Op,
571 uint64_t &Value);
572
573 // Reads and returns the next value using the abbreviation Op,
574 // assuming Op appears after an array abbreviation.
575 inline uint64_t readArrayAbbreviatedField(const NaClBitCodeAbbrevOp &Op);
576
577 // Reads the array abbreviation Op, NumArrayElements times, putting
578 // the read values in Vals.
579 inline void readArrayAbbrev(const NaClBitCodeAbbrevOp &Op,
580 unsigned NumArrayElements,
581 SmallVectorImpl<uint64_t> &Vals);
582
583 public:
584
585 /// getAbbrev - Return the abbreviation for the specified AbbrevId.
586 const NaClBitCodeAbbrev *getAbbrev(unsigned AbbrevID) const {
587 unsigned AbbrevNo = AbbrevID-naclbitc::FIRST_APPLICATION_ABBREV;
588 assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
589 return CurAbbrevs[AbbrevNo];
590 }
591
592 /// skipRecord - Read the current record and discard it.
593 void skipRecord(unsigned AbbrevID);
594
595 unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals);
596
597 //===--------------------------------------------------------------------===//
598 // Abbrev Processing
599 //===--------------------------------------------------------------------===//
600 // IsLocal indicates where the abbreviation occurs. If it is in the
601 // BlockInfo block, IsLocal is false. In all other cases, IsLocal is
602 // true.
603 void ReadAbbrevRecord(bool IsLocal,
604 NaClAbbrevListener *Listener);
605
606 // Skips over an abbreviation record. Duplicates code of ReadAbbrevRecord,
607 // except that no abbreviation is built.
608 void SkipAbbrevRecord();
609
610 bool ReadBlockInfoBlock(NaClAbbrevListener *Listener);
611 };
612
613 } // End llvm namespace
614
615 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698