Index: include/llvm/Bitcode/NaCl/NaClBitstreamReader.h |
diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h |
index 97e216441939906f33f1c2d1f48585005bf51e8f..dd53fd041db0a648068125fc3bd875f87a333835 100644 |
--- a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h |
+++ b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h |
@@ -19,29 +19,29 @@ |
#include "llvm/ADT/SmallVector.h" |
#include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h" |
#include "llvm/Support/Endian.h" |
-#include "llvm/Support/StreamableMemoryObject.h" |
+#include "llvm/Support/StreamingMemoryObject.h" |
#include <climits> |
#include <vector> |
namespace llvm { |
- class Deserializer; |
+class Deserializer; |
-/// NaClBitstreamReader - This class is used to read from a NaCl |
-/// bitcode wire format stream, maintaining information that is global |
-/// to decoding the entire file. While a file is being read, multiple |
-/// cursors can be independently advanced or skipped around within the |
-/// file. These are represented by the NaClBitstreamCursor class. |
+/// This class is used to read from a NaCl bitcode wire format stream, |
+/// maintaining information that is global to decoding the entire file. |
+/// While a file is being read, multiple cursors can be independently |
+/// advanced or skipped around within the file. These are represented by |
+/// the NaClBitstreamCursor class. |
class NaClBitstreamReader { |
public: |
- /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. |
- /// These describe abbreviations that all blocks of the specified ID inherit. |
+ /// This contains information emitted to BLOCKINFO_BLOCK blocks. These |
+ /// describe abbreviations that all blocks of the specified ID inherit. |
struct BlockInfo { |
unsigned BlockID; |
std::vector<NaClBitCodeAbbrev*> Abbrevs; |
}; |
private: |
- std::unique_ptr<StreamableMemoryObject> BitcodeBytes; |
+ std::unique_ptr<MemoryObject> BitcodeBytes; |
std::vector<BlockInfo> BlockInfoRecords; |
@@ -59,10 +59,8 @@ public: |
init(Start, End); |
} |
- NaClBitstreamReader(StreamableMemoryObject *Bytes, |
- size_t MyInitialAddress=0) |
- : InitialAddress(MyInitialAddress) |
- { |
+ NaClBitstreamReader(MemoryObject *Bytes, size_t MyInitialAddress=0) |
+ : InitialAddress(MyInitialAddress) { |
BitcodeBytes.reset(Bytes); |
} |
@@ -71,7 +69,7 @@ public: |
BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End)); |
} |
- StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; } |
+ MemoryObject &getBitcodeBytes() { return *BitcodeBytes; } |
~NaClBitstreamReader() { |
// Free the BlockInfoRecords. |
@@ -94,13 +92,13 @@ public: |
// Block Manipulation |
//===--------------------------------------------------------------------===// |
- /// hasBlockInfoRecords - Return true if we've already read and processed the |
- /// block info block for this Bitstream. We only process it for the first |
- /// cursor that walks over it. |
+ /// Return true if we've already read and processed the block info block for |
+ /// this Bitstream. We only process it for the first cursor that walks over |
+ /// it. |
bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); } |
- /// getBlockInfo - If there is block info for the specified ID, return it, |
- /// otherwise return null. |
+ /// If there is block info for the specified ID, return it, otherwise return |
+ /// null. |
const BlockInfo *getBlockInfo(unsigned BlockID) const { |
// Common case, the most recent entry matches BlockID. |
if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) |
@@ -110,7 +108,7 @@ public: |
i != e; ++i) |
if (BlockInfoRecords[i].BlockID == BlockID) |
return &BlockInfoRecords[i]; |
- return 0; |
+ return nullptr; |
} |
BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { |
@@ -124,23 +122,17 @@ public: |
} |
}; |
- |
-/// NaClBitstreamEntry - When advancing through a bitstream cursor, |
-/// each advance can discover a few different kinds of entries: |
-/// Error - Malformed bitcode was found. |
-/// EndBlock - We've reached the end of the current block, (or the end of the |
-/// file, which is treated like a series of EndBlock records. |
-/// SubBlock - This is the start of a new subblock of a specific ID. |
-/// Record - This is a record with a specific AbbrevID. |
-/// |
+/// When advancing through a bitstream cursor, each advance can discover a few |
+/// different kinds of entries: |
struct NaClBitstreamEntry { |
enum { |
- Error, |
- EndBlock, |
- SubBlock, |
- Record |
+ Error, // Malformed bitcode was found. |
+ EndBlock, // We've reached the end of the current block, (or the end of the |
+ // file, which is treated like a series of EndBlock records. |
+ SubBlock, // This is the start of a new subblock of a specific ID. |
+ Record // This is a record with a specific AbbrevID. |
} Kind; |
- |
+ |
unsigned ID; |
static NaClBitstreamEntry getError() { |
@@ -202,9 +194,9 @@ public: |
uint64_t StartBit; |
}; |
-/// NaClBitstreamCursor - This represents a position within a bitcode |
-/// file. There may be multiple independent cursors reading within |
-/// one bitstream, each maintaining their own local state. |
+/// This represents a position within a bitcode file. There may be multiple |
+/// independent cursors reading within one bitstream, each maintaining their |
+/// own local state. |
/// |
/// Unlike iterators, NaClBitstreamCursors are heavy-weight objects |
/// that should not be passed by value. |
@@ -213,44 +205,44 @@ class NaClBitstreamCursor { |
NaClBitstreamReader *BitStream; |
size_t NextChar; |
- /// CurWord/word_t - This is the current data we have pulled from the stream |
- /// but have not returned to the client. This is specifically and |
- /// intentionally defined to follow the word size of the host machine for |
- /// efficiency. We use word_t in places that are aware of this to make it |
- /// perfectly explicit what is going on. |
- typedef uint32_t word_t; |
+ // The size of the bitcode. 0 if we don't know it yet. |
+ size_t Size; |
+ |
+ /// This is the current data we have pulled from the stream but have not |
+ /// returned to the client. This is specifically and intentionally defined to |
+ /// follow the word size of the host machine for efficiency. We use word_t in |
+ /// places that are aware of this to make it perfectly explicit what is going |
+ /// on. |
+ typedef size_t word_t; |
word_t CurWord; |
- /// BitsInCurWord - This is the number of bits in CurWord that are valid. This |
- /// is always from [0...31/63] inclusive (depending on word size). |
+ /// This is the number of bits in CurWord that are valid. This |
+ /// is always from [0...bits_of(word_t)-1] inclusive. |
unsigned BitsInCurWord; |
- // CurCodeSize - This is the declared size of code values used for the current |
- // block, in bits. |
+ /// This is the declared size of code values used for the current |
+ /// block, in bits. |
NaClBitcodeSelectorAbbrev CurCodeSize; |
- /// CurAbbrevs - Abbrevs installed at in this block. |
+ /// Abbrevs installed in this block. |
std::vector<NaClBitCodeAbbrev*> CurAbbrevs; |
struct Block { |
NaClBitcodeSelectorAbbrev PrevCodeSize; |
std::vector<NaClBitCodeAbbrev*> PrevAbbrevs; |
- explicit Block() : PrevCodeSize() {} |
+ Block() : PrevCodeSize() {} |
explicit Block(const NaClBitcodeSelectorAbbrev& PCS) |
: PrevCodeSize(PCS) {} |
}; |
- /// BlockScope - This tracks the codesize of parent blocks. |
+ /// This tracks the codesize of parent blocks. |
SmallVector<Block, 8> BlockScope; |
NaClBitstreamCursor(const NaClBitstreamCursor &) LLVM_DELETED_FUNCTION; |
NaClBitstreamCursor &operator=(const NaClBitstreamCursor &) LLVM_DELETED_FUNCTION; |
public: |
- |
- NaClBitstreamCursor() { |
- init(nullptr); |
- } |
+ NaClBitstreamCursor() { init(nullptr); } |
explicit NaClBitstreamCursor(NaClBitstreamReader &R) { init(&R); } |
@@ -258,7 +250,7 @@ public: |
freeState(); |
BitStream = R; |
NextChar = (BitStream == nullptr) ? 0 : BitStream->getInitialAddress(); |
- CurWord = 0; |
+ Size = 0; |
BitsInCurWord = 0; |
} |
@@ -268,10 +260,6 @@ public: |
void freeState(); |
- bool isEndPos(size_t pos) { |
- return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos)); |
- } |
- |
bool canSkipToPos(size_t pos) const { |
// pos can be skipped to if it is a valid address or one byte past the end. |
return pos == 0 || BitStream->getBitcodeBytes().isValidAddress( |
@@ -279,13 +267,18 @@ public: |
} |
bool AtEndOfStream() { |
- return BitsInCurWord == 0 && isEndPos(NextChar); |
+ if (BitsInCurWord != 0) |
+ return false; |
+ if (Size != 0) |
+ return Size == NextChar; |
+ fillCurWord(); |
+ return BitsInCurWord == 0; |
} |
- /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #. |
+ /// Return the number of bits used to encode an abbrev #. |
unsigned getAbbrevIDWidth() const { return CurCodeSize.NumBits; } |
- /// GetCurrentBitNo - Return the bit # of the bit we are reading. |
+ /// Return the bit # of the bit we are reading. |
uint64_t GetCurrentBitNo() const { |
return NextChar*CHAR_BIT - BitsInCurWord; |
} |
@@ -299,18 +292,17 @@ public: |
/// Flags that modify the behavior of advance(). |
enum { |
- /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does |
- /// not automatically pop the block scope when the end of a block is |
- /// reached. |
+ /// If this flag is used, the advance() method does not automatically pop |
+ /// the block scope when the end of a block is reached. |
AF_DontPopBlockAtEnd = 1, |
- /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are |
- /// returned just like normal records. |
+ /// If this flag is used, abbrev entries are returned just like normal |
+ /// records. |
AF_DontAutoprocessAbbrevs = 2 |
}; |
- |
- /// advance - Advance the current bitstream, returning the next entry in the |
- /// stream. Use the given abbreviation listener (if provided). |
+ |
+ /// Advance the current bitstream, returning the next entry in the stream. |
+ /// Use the given abbreviation listener (if provided). |
NaClBitstreamEntry advance(unsigned Flags, NaClAbbrevListener *Listener) { |
while (1) { |
unsigned Code = ReadCode(); |
@@ -320,10 +312,10 @@ public: |
return NaClBitstreamEntry::getError(); |
return NaClBitstreamEntry::getEndBlock(); |
} |
- |
+ |
if (Code == naclbitc::ENTER_SUBBLOCK) |
return NaClBitstreamEntry::getSubBlock(ReadSubBlockID()); |
- |
+ |
if (Code == naclbitc::DEFINE_ABBREV && |
!(Flags & AF_DontAutoprocessAbbrevs)) { |
// We read and accumulate abbrev's, the client can't do anything with |
@@ -336,22 +328,22 @@ public: |
} |
} |
- /// advanceSkippingSubblocks - This is a convenience function for clients that |
- /// don't expect any subblocks. This just skips over them automatically. |
+ /// This is a convenience function for clients that don't expect any |
+ /// subblocks. This just skips over them automatically. |
NaClBitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { |
while (1) { |
// If we found a normal entry, return it. |
NaClBitstreamEntry Entry = advance(Flags, 0); |
if (Entry.Kind != NaClBitstreamEntry::SubBlock) |
return Entry; |
- |
+ |
// If we found a sub-block, just skip over it and check the next entry. |
if (SkipBlock()) |
return NaClBitstreamEntry::getError(); |
} |
} |
- /// JumpToBit - Reset the stream to the specified bit number. |
+ /// Reset the stream to the specified bit number. |
void JumpToBit(uint64_t BitNo) { |
uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1); |
unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); |
@@ -360,74 +352,72 @@ public: |
// Move the cursor to the right word. |
NextChar = ByteNo; |
BitsInCurWord = 0; |
- CurWord = 0; |
// Skip over any bits that are already consumed. |
- if (WordBitNo) { |
- if (sizeof(word_t) > 4) |
- Read64(WordBitNo); |
- else |
- Read(WordBitNo); |
+ if (WordBitNo) |
+ Read(WordBitNo); |
+ } |
+ |
+ void fillCurWord() { |
+ assert(Size == 0 || NextChar < (unsigned)Size); |
+ |
+ // Read the next word from the stream. |
+ uint8_t Array[sizeof(word_t)] = {0}; |
+ |
+ uint64_t BytesRead = |
+ BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar); |
+ |
+ // If we run out of data, stop at the end of the stream. |
+ if (BytesRead == 0) { |
+ Size = NextChar; |
+ return; |
} |
+ |
+ CurWord = |
+ support::endian::read<word_t, support::little, support::unaligned>( |
+ Array); |
+ NextChar += BytesRead; |
+ BitsInCurWord = BytesRead * 8; |
} |
- uint32_t Read(unsigned NumBits) { |
- assert(NumBits && NumBits <= 32 && |
- "Cannot return zero or more than 32 bits!"); |
- |
+ word_t Read(unsigned NumBits) { |
+ static const unsigned BitsInWord = sizeof(word_t) * 8; |
+ |
+ assert(NumBits && NumBits <= BitsInWord && |
+ "Cannot return zero or more than BitsInWord bits!"); |
+ |
+ static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; |
+ |
// If the field is fully contained by CurWord, return it quickly. |
if (BitsInCurWord >= NumBits) { |
- uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits)); |
- CurWord >>= NumBits; |
+ word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); |
+ |
+ // Use a mask to avoid undefined behavior. |
+ CurWord >>= (NumBits & Mask); |
+ |
BitsInCurWord -= NumBits; |
return R; |
} |
+ word_t R = BitsInCurWord ? CurWord : 0; |
+ unsigned BitsLeft = NumBits - BitsInCurWord; |
+ |
+ fillCurWord(); |
+ |
// If we run out of data, stop at the end of the stream. |
- if (isEndPos(NextChar)) { |
- CurWord = 0; |
- BitsInCurWord = 0; |
+ if (BitsLeft > BitsInCurWord) |
return 0; |
- } |
- uint32_t R = uint32_t(CurWord); |
+ word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); |
- // Read the next word from the stream. |
- uint8_t Array[sizeof(word_t)] = {0}; |
- |
- BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array), Array); |
- |
- // Handle big-endian byte-swapping if necessary. |
- support::detail::packed_endian_specific_integral |
- <word_t, support::little, support::unaligned> EndianValue; |
- memcpy(&EndianValue, Array, sizeof(Array)); |
- |
- CurWord = EndianValue; |
- |
- NextChar += sizeof(word_t); |
- |
- // Extract NumBits-BitsInCurWord from what we just read. |
- unsigned BitsLeft = NumBits-BitsInCurWord; |
- |
- // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive. |
- R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft))) |
- << BitsInCurWord); |
- |
- // BitsLeft bits have just been used up from CurWord. BitsLeft is in the |
- // range [1..32]/[1..64] so be careful how we shift. |
- if (BitsLeft != sizeof(word_t)*8) |
- CurWord >>= BitsLeft; |
- else |
- CurWord = 0; |
- BitsInCurWord = sizeof(word_t)*8-BitsLeft; |
- return R; |
- } |
+ // Use a mask to avoid undefined behavior. |
+ CurWord >>= (BitsLeft & Mask); |
- uint64_t Read64(unsigned NumBits) { |
- if (NumBits <= 32) return Read(NumBits); |
+ BitsInCurWord -= BitsLeft; |
- uint64_t V = Read(32); |
- return V | (uint64_t)Read(NumBits-32) << 32; |
+ R |= R2 << (NumBits - BitsLeft); |
+ |
+ return R; |
} |
uint32_t ReadVBR(unsigned NumBits) { |
@@ -448,8 +438,8 @@ public: |
} |
} |
- // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size. The |
- // chunk size of the VBR must still be <= 32 bits though. |
+ // Read a VBR that may have a value up to 64-bits in size. The chunk size of |
+ // the VBR must still be <= 32 bits though. |
uint64_t ReadVBR64(unsigned NumBits) { |
uint32_t Piece = Read(NumBits); |
if ((Piece & (1U << (NumBits-1))) == 0) |
@@ -478,9 +468,8 @@ private: |
BitsInCurWord = 32; |
return; |
} |
- |
+ |
BitsInCurWord = 0; |
- CurWord = 0; |
} |
public: |
@@ -493,15 +482,13 @@ public: |
// Block header: |
// [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] |
- /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for |
- /// the block. |
+ /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. |
unsigned ReadSubBlockID() { |
return ReadVBR(naclbitc::BlockIDWidth); |
} |
- /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip |
- /// over the body of this block. If the block record is malformed, return |
- /// true. |
+ /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body |
+ /// of this block. If the block record is malformed, return true. |
bool SkipBlock() { |
// Read and ignore the codelen value. Since we are skipping this block, we |
// don't care what code widths are used inside of it. |
@@ -519,10 +506,10 @@ public: |
return false; |
} |
- /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter |
- /// the block, and return true if the block has an error. |
- bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0); |
- |
+ /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true |
+ /// if the block has an error. |
+ bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); |
+ |
bool ReadBlockEnd() { |
if (BlockScope.empty()) return true; |
@@ -573,14 +560,14 @@ private: |
public: |
- /// getAbbrev - Return the abbreviation for the specified AbbrevId. |
+ /// Return the abbreviation for the specified AbbrevId. |
const NaClBitCodeAbbrev *getAbbrev(unsigned AbbrevID) const { |
unsigned AbbrevNo = AbbrevID-naclbitc::FIRST_APPLICATION_ABBREV; |
assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); |
return CurAbbrevs[AbbrevNo]; |
} |
- /// skipRecord - Read the current record and discard it. |
+ /// Read the current record and discard it. |
void skipRecord(unsigned AbbrevID); |
unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals); |