Chromium Code Reviews| Index: include/llvm/Bitcode/NaCl/NaClBitstreamReader.h |
| diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h |
| index 97e216441939906f33f1c2d1f48585005bf51e8f..da48563844a3308be1340122dc79bbca115b7cd1 100644 |
| --- a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h |
| +++ b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h |
| @@ -19,29 +19,29 @@ |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h" |
| #include "llvm/Support/Endian.h" |
| -#include "llvm/Support/StreamableMemoryObject.h" |
| +#include "llvm/Support/StreamingMemoryObject.h" |
| #include <climits> |
| #include <vector> |
| namespace llvm { |
| - class Deserializer; |
| +class Deserializer; |
| -/// NaClBitstreamReader - This class is used to read from a NaCl |
| -/// bitcode wire format stream, maintaining information that is global |
| -/// to decoding the entire file. While a file is being read, multiple |
| -/// cursors can be independently advanced or skipped around within the |
| -/// file. These are represented by the NaClBitstreamCursor class. |
| +/// This class is used to read from a NaCl bitcode wire format stream, |
| +/// maintaining information that is global to decoding the entire file. |
| +/// While a file is being read, multiple cursors can be independently |
| +/// advanced or skipped around within the file. These are represented by |
| +/// the NaClBitstreamCursor class. |
| class NaClBitstreamReader { |
| public: |
| - /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. |
| - /// These describe abbreviations that all blocks of the specified ID inherit. |
| + /// This contains information emitted to BLOCKINFO_BLOCK blocks. These |
| + /// describe abbreviations that all blocks of the specified ID inherit. |
| struct BlockInfo { |
| unsigned BlockID; |
| std::vector<NaClBitCodeAbbrev*> Abbrevs; |
| }; |
| private: |
| - std::unique_ptr<StreamableMemoryObject> BitcodeBytes; |
| + std::unique_ptr<MemoryObject> BitcodeBytes; |
| std::vector<BlockInfo> BlockInfoRecords; |
| @@ -59,10 +59,8 @@ public: |
| init(Start, End); |
| } |
| - NaClBitstreamReader(StreamableMemoryObject *Bytes, |
| - size_t MyInitialAddress=0) |
| - : InitialAddress(MyInitialAddress) |
| - { |
| + NaClBitstreamReader(MemoryObject *Bytes, size_t MyInitialAddress=0) |
| + : InitialAddress(MyInitialAddress) { |
| BitcodeBytes.reset(Bytes); |
| } |
| @@ -71,7 +69,7 @@ public: |
| BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End)); |
| } |
| - StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; } |
| + MemoryObject &getBitcodeBytes() { return *BitcodeBytes; } |
| ~NaClBitstreamReader() { |
| // Free the BlockInfoRecords. |
| @@ -94,13 +92,13 @@ public: |
| // Block Manipulation |
| //===--------------------------------------------------------------------===// |
| - /// hasBlockInfoRecords - Return true if we've already read and processed the |
| - /// block info block for this Bitstream. We only process it for the first |
| - /// cursor that walks over it. |
| + /// Return true if we've already read and processed the block info block for |
| + /// this Bitstream. We only process it for the first cursor that walks over |
| + /// it. |
| bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); } |
| - /// getBlockInfo - If there is block info for the specified ID, return it, |
| - /// otherwise return null. |
| + /// If there is block info for the specified ID, return it, otherwise return |
| + /// null. |
| const BlockInfo *getBlockInfo(unsigned BlockID) const { |
| // Common case, the most recent entry matches BlockID. |
| if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) |
| @@ -110,7 +108,7 @@ public: |
| i != e; ++i) |
| if (BlockInfoRecords[i].BlockID == BlockID) |
| return &BlockInfoRecords[i]; |
| - return 0; |
| + return nullptr; |
| } |
| BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { |
| @@ -124,23 +122,17 @@ public: |
| } |
| }; |
| - |
| -/// NaClBitstreamEntry - When advancing through a bitstream cursor, |
| -/// each advance can discover a few different kinds of entries: |
| -/// Error - Malformed bitcode was found. |
| -/// EndBlock - We've reached the end of the current block, (or the end of the |
| -/// file, which is treated like a series of EndBlock records. |
| -/// SubBlock - This is the start of a new subblock of a specific ID. |
| -/// Record - This is a record with a specific AbbrevID. |
| -/// |
| +/// When advancing through a bitstream cursor, each advance can discover a few |
| +/// different kinds of entries: |
| struct NaClBitstreamEntry { |
| enum { |
| - Error, |
| - EndBlock, |
| - SubBlock, |
| - Record |
| + Error, // Malformed bitcode was found. |
| + EndBlock, // We've reached the end of the current block, (or the end of the |
| + // file, which is treated like a series of EndBlock records. |
| + SubBlock, // This is the start of a new subblock of a specific ID. |
| + Record // This is a record with a specific AbbrevID. |
| } Kind; |
| - |
| + |
| unsigned ID; |
| static NaClBitstreamEntry getError() { |
| @@ -202,9 +194,9 @@ public: |
| uint64_t StartBit; |
| }; |
| -/// NaClBitstreamCursor - This represents a position within a bitcode |
| -/// file. There may be multiple independent cursors reading within |
| -/// one bitstream, each maintaining their own local state. |
| +/// This represents a position within a bitcode file. There may be multiple |
| +/// independent cursors reading within one bitstream, each maintaining their |
| +/// own local state. |
| /// |
| /// Unlike iterators, NaClBitstreamCursors are heavy-weight objects |
| /// that should not be passed by value. |
| @@ -213,44 +205,44 @@ class NaClBitstreamCursor { |
| NaClBitstreamReader *BitStream; |
| size_t NextChar; |
| - /// CurWord/word_t - This is the current data we have pulled from the stream |
| - /// but have not returned to the client. This is specifically and |
| - /// intentionally defined to follow the word size of the host machine for |
| - /// efficiency. We use word_t in places that are aware of this to make it |
| - /// perfectly explicit what is going on. |
| - typedef uint32_t word_t; |
| + // The size of the bitcode. 0 if we don't know it yet. |
| + size_t Size; |
| + |
| + /// This is the current data we have pulled from the stream but have not |
| + /// returned to the client. This is specifically and intentionally defined to |
| + /// follow the word size of the host machine for efficiency. We use word_t in |
| + /// places that are aware of this to make it perfectly explicit what is going |
| + /// on. |
| + typedef size_t word_t; |
| word_t CurWord; |
| - /// BitsInCurWord - This is the number of bits in CurWord that are valid. This |
| - /// is always from [0...31/63] inclusive (depending on word size). |
| + /// This is the number of bits in CurWord that are valid. This |
| + /// is always from [0...bits_of(size_t)-1] inclusive. |
|
Karl
2015/02/23 17:46:55
word_t rather than size_t?
jvoung (off chromium)
2015/02/23 21:27:19
Done.
|
| unsigned BitsInCurWord; |
| - // CurCodeSize - This is the declared size of code values used for the current |
| - // block, in bits. |
| + /// This is the declared size of code values used for the current |
| + /// block, in bits. |
| NaClBitcodeSelectorAbbrev CurCodeSize; |
| - /// CurAbbrevs - Abbrevs installed at in this block. |
| + /// Abbrevs installed in this block. |
| std::vector<NaClBitCodeAbbrev*> CurAbbrevs; |
| struct Block { |
| NaClBitcodeSelectorAbbrev PrevCodeSize; |
| std::vector<NaClBitCodeAbbrev*> PrevAbbrevs; |
| - explicit Block() : PrevCodeSize() {} |
| + Block() : PrevCodeSize() {} |
| explicit Block(const NaClBitcodeSelectorAbbrev& PCS) |
| : PrevCodeSize(PCS) {} |
| }; |
| - /// BlockScope - This tracks the codesize of parent blocks. |
| + /// This tracks the codesize of parent blocks. |
| SmallVector<Block, 8> BlockScope; |
| NaClBitstreamCursor(const NaClBitstreamCursor &) LLVM_DELETED_FUNCTION; |
| NaClBitstreamCursor &operator=(const NaClBitstreamCursor &) LLVM_DELETED_FUNCTION; |
| public: |
| - |
| - NaClBitstreamCursor() { |
| - init(nullptr); |
| - } |
| + NaClBitstreamCursor() { init(nullptr); } |
| explicit NaClBitstreamCursor(NaClBitstreamReader &R) { init(&R); } |
| @@ -258,7 +250,7 @@ public: |
| freeState(); |
| BitStream = R; |
| NextChar = (BitStream == nullptr) ? 0 : BitStream->getInitialAddress(); |
| - CurWord = 0; |
| + Size = 0; |
| BitsInCurWord = 0; |
| } |
| @@ -268,10 +260,6 @@ public: |
| void freeState(); |
| - bool isEndPos(size_t pos) { |
| - return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos)); |
| - } |
| - |
| bool canSkipToPos(size_t pos) const { |
| // pos can be skipped to if it is a valid address or one byte past the end. |
| return pos == 0 || BitStream->getBitcodeBytes().isValidAddress( |
| @@ -279,13 +267,18 @@ public: |
| } |
| bool AtEndOfStream() { |
| - return BitsInCurWord == 0 && isEndPos(NextChar); |
| + if (BitsInCurWord != 0) |
| + return false; |
| + if (Size != 0) |
| + return Size == NextChar; |
|
Karl
2015/02/23 17:46:55
What if NextChar is at the beginning of the file,
jvoung (off chromium)
2015/02/23 21:27:19
There is a check that Size != 0 first, so this bra
Karl
2015/02/23 21:40:27
I meant if NextChar is at the beginning of the fil
|
| + fillCurWord(); |
| + return BitsInCurWord == 0; |
| } |
| - /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #. |
| + /// Return the number of bits used to encode an abbrev #. |
| unsigned getAbbrevIDWidth() const { return CurCodeSize.NumBits; } |
| - /// GetCurrentBitNo - Return the bit # of the bit we are reading. |
| + /// Return the bit # of the bit we are reading. |
| uint64_t GetCurrentBitNo() const { |
| return NextChar*CHAR_BIT - BitsInCurWord; |
| } |
| @@ -299,18 +292,17 @@ public: |
| /// Flags that modify the behavior of advance(). |
| enum { |
| - /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does |
| - /// not automatically pop the block scope when the end of a block is |
| - /// reached. |
| + /// If this flag is used, the advance() method does not automatically pop |
| + /// the block scope when the end of a block is reached. |
| AF_DontPopBlockAtEnd = 1, |
| - /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are |
| - /// returned just like normal records. |
| + /// If this flag is used, abbrev entries are returned just like normal |
| + /// records. |
| AF_DontAutoprocessAbbrevs = 2 |
| }; |
| - |
| - /// advance - Advance the current bitstream, returning the next entry in the |
| - /// stream. Use the given abbreviation listener (if provided). |
| + |
| + /// Advance the current bitstream, returning the next entry in the stream. |
| + /// Use the given abbreviation listener (if provided). |
| NaClBitstreamEntry advance(unsigned Flags, NaClAbbrevListener *Listener) { |
| while (1) { |
| unsigned Code = ReadCode(); |
| @@ -320,10 +312,10 @@ public: |
| return NaClBitstreamEntry::getError(); |
| return NaClBitstreamEntry::getEndBlock(); |
| } |
| - |
| + |
| if (Code == naclbitc::ENTER_SUBBLOCK) |
| return NaClBitstreamEntry::getSubBlock(ReadSubBlockID()); |
| - |
| + |
| if (Code == naclbitc::DEFINE_ABBREV && |
| !(Flags & AF_DontAutoprocessAbbrevs)) { |
| // We read and accumulate abbrev's, the client can't do anything with |
| @@ -336,22 +328,22 @@ public: |
| } |
| } |
| - /// advanceSkippingSubblocks - This is a convenience function for clients that |
| - /// don't expect any subblocks. This just skips over them automatically. |
| + /// This is a convenience function for clients that don't expect any |
| + /// subblocks. This just skips over them automatically. |
| NaClBitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { |
| while (1) { |
| // If we found a normal entry, return it. |
| NaClBitstreamEntry Entry = advance(Flags, 0); |
| if (Entry.Kind != NaClBitstreamEntry::SubBlock) |
| return Entry; |
| - |
| + |
| // If we found a sub-block, just skip over it and check the next entry. |
| if (SkipBlock()) |
| return NaClBitstreamEntry::getError(); |
| } |
| } |
| - /// JumpToBit - Reset the stream to the specified bit number. |
| + /// Reset the stream to the specified bit number. |
| void JumpToBit(uint64_t BitNo) { |
| uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1); |
| unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); |
| @@ -360,74 +352,72 @@ public: |
| // Move the cursor to the right word. |
| NextChar = ByteNo; |
| BitsInCurWord = 0; |
| - CurWord = 0; |
| // Skip over any bits that are already consumed. |
| - if (WordBitNo) { |
| - if (sizeof(word_t) > 4) |
| - Read64(WordBitNo); |
| - else |
| - Read(WordBitNo); |
| + if (WordBitNo) |
| + Read(WordBitNo); |
| + } |
| + |
| + void fillCurWord() { |
| + assert(Size == 0 || NextChar < (unsigned)Size); |
| + |
| + // Read the next word from the stream. |
| + uint8_t Array[sizeof(word_t)] = {0}; |
| + |
| + uint64_t BytesRead = |
| + BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar); |
| + |
| + // If we run out of data, stop at the end of the stream. |
| + if (BytesRead == 0) { |
| + Size = NextChar; |
| + return; |
| } |
| + |
| + CurWord = |
| + support::endian::read<word_t, support::little, support::unaligned>( |
| + Array); |
| + NextChar += BytesRead; |
| + BitsInCurWord = BytesRead * 8; |
| } |
| - uint32_t Read(unsigned NumBits) { |
| - assert(NumBits && NumBits <= 32 && |
| - "Cannot return zero or more than 32 bits!"); |
| - |
| + word_t Read(unsigned NumBits) { |
| + static const unsigned BitsInWord = sizeof(word_t) * 8; |
| + |
| + assert(NumBits && NumBits <= BitsInWord && |
| + "Cannot return zero or more than BitsInWord bits!"); |
| + |
| + static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; |
| + |
| // If the field is fully contained by CurWord, return it quickly. |
| if (BitsInCurWord >= NumBits) { |
| - uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits)); |
| - CurWord >>= NumBits; |
| + word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); |
| + |
| + // Use a mask to avoid undefined behavior. |
| + CurWord >>= (NumBits & Mask); |
| + |
| BitsInCurWord -= NumBits; |
| return R; |
| } |
| + word_t R = BitsInCurWord ? CurWord : 0; |
| + unsigned BitsLeft = NumBits - BitsInCurWord; |
| + |
| + fillCurWord(); |
| + |
| // If we run out of data, stop at the end of the stream. |
| - if (isEndPos(NextChar)) { |
| - CurWord = 0; |
| - BitsInCurWord = 0; |
| + if (BitsLeft > BitsInCurWord) |
| return 0; |
| - } |
| - uint32_t R = uint32_t(CurWord); |
| + word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); |
| - // Read the next word from the stream. |
| - uint8_t Array[sizeof(word_t)] = {0}; |
| - |
| - BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array), Array); |
| - |
| - // Handle big-endian byte-swapping if necessary. |
| - support::detail::packed_endian_specific_integral |
| - <word_t, support::little, support::unaligned> EndianValue; |
| - memcpy(&EndianValue, Array, sizeof(Array)); |
| - |
| - CurWord = EndianValue; |
| - |
| - NextChar += sizeof(word_t); |
| - |
| - // Extract NumBits-BitsInCurWord from what we just read. |
| - unsigned BitsLeft = NumBits-BitsInCurWord; |
| - |
| - // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive. |
| - R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft))) |
| - << BitsInCurWord); |
| - |
| - // BitsLeft bits have just been used up from CurWord. BitsLeft is in the |
| - // range [1..32]/[1..64] so be careful how we shift. |
| - if (BitsLeft != sizeof(word_t)*8) |
| - CurWord >>= BitsLeft; |
| - else |
| - CurWord = 0; |
| - BitsInCurWord = sizeof(word_t)*8-BitsLeft; |
| - return R; |
| - } |
| + // Use a mask to avoid undefined behavior. |
| + CurWord >>= (BitsLeft & Mask); |
| - uint64_t Read64(unsigned NumBits) { |
| - if (NumBits <= 32) return Read(NumBits); |
| + BitsInCurWord -= BitsLeft; |
| - uint64_t V = Read(32); |
| - return V | (uint64_t)Read(NumBits-32) << 32; |
| + R |= R2 << (NumBits - BitsLeft); |
| + |
| + return R; |
| } |
| uint32_t ReadVBR(unsigned NumBits) { |
| @@ -448,8 +438,8 @@ public: |
| } |
| } |
| - // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size. The |
| - // chunk size of the VBR must still be <= 32 bits though. |
| + // Read a VBR that may have a value up to 64-bits in size. The chunk size of |
| + // the VBR must still be <= 32 bits though. |
| uint64_t ReadVBR64(unsigned NumBits) { |
| uint32_t Piece = Read(NumBits); |
| if ((Piece & (1U << (NumBits-1))) == 0) |
| @@ -478,9 +468,8 @@ private: |
| BitsInCurWord = 32; |
| return; |
| } |
| - |
| + |
| BitsInCurWord = 0; |
| - CurWord = 0; |
| } |
| public: |
| @@ -493,15 +482,13 @@ public: |
| // Block header: |
| // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] |
| - /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for |
| - /// the block. |
| + /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. |
| unsigned ReadSubBlockID() { |
| return ReadVBR(naclbitc::BlockIDWidth); |
| } |
| - /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip |
| - /// over the body of this block. If the block record is malformed, return |
| - /// true. |
| + /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body |
| + /// of this block. If the block record is malformed, return true. |
| bool SkipBlock() { |
| // Read and ignore the codelen value. Since we are skipping this block, we |
| // don't care what code widths are used inside of it. |
| @@ -519,10 +506,10 @@ public: |
| return false; |
| } |
| - /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter |
| - /// the block, and return true if the block has an error. |
| - bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0); |
| - |
| + /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true |
| + /// if the block has an error. |
| + bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); |
| + |
| bool ReadBlockEnd() { |
| if (BlockScope.empty()) return true; |
| @@ -573,14 +560,14 @@ private: |
| public: |
| - /// getAbbrev - Return the abbreviation for the specified AbbrevId. |
| + /// Return the abbreviation for the specified AbbrevId. |
| const NaClBitCodeAbbrev *getAbbrev(unsigned AbbrevID) const { |
| unsigned AbbrevNo = AbbrevID-naclbitc::FIRST_APPLICATION_ABBREV; |
| assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); |
| return CurAbbrevs[AbbrevNo]; |
| } |
| - /// skipRecord - Read the current record and discard it. |
| + /// Read the current record and discard it. |
| void skipRecord(unsigned AbbrevID); |
| unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals); |