Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(85)

Unified Diff: include/llvm/Bitcode/BitstreamReader.h

Issue 8393017: Bitcode streaming (Closed)
Patch Set: rebase against upstream LLVM Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: include/llvm/Bitcode/BitstreamReader.h
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 0437f53134dc6cbc237c512467f2a57f0b6c4048..782b193ba622d4c018ef6d93334f1617eb6fb659 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -15,7 +15,9 @@
#ifndef BITSTREAM_READER_H
#define BITSTREAM_READER_H
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/Bitcode/BitCodes.h"
+#include "llvm/Bitcode/BitcodeStream.h"
#include <climits>
#include <string>
#include <vector>
@@ -24,6 +26,174 @@ namespace llvm {
class Deserializer;
+class BitstreamVector {
nlewycky 2011/11/05 00:45:06 This interface doesn't act much like a std::vector
+public:
+ BitstreamVector() { }
+
+ virtual ~BitstreamVector() { }
+
+ // Is Pos the ending file position (one byte past the last valid byte).
nlewycky 2011/11/05 00:45:06 "Is Pos the ending file position (one byte past th
(google.com) Derek Schuff 2011/11/08 00:53:23 Done.
+ // May block until Pos bytes have been read, or EOF is reached.
+ virtual bool isEndPos(size_t Pos) = 0;
+
+ // Returns the ending file position (one byte past the last valid byte).
+ // May block until EOF is reached.
+ virtual size_t getEndPos() = 0;
nlewycky 2011/11/05 00:45:06 Extra space in "() = 0".
(google.com) Derek Schuff 2011/11/08 00:53:23 Done.
+
+ // Returns true if seeking to Pos is within the file or one past the end.
+ // May block until Pos bytes have been read, or EOF is reached.
+ virtual bool canSkipToPos(size_t Pos) = 0;
+
+ // Returns the in memory address of Pos from the beginning of the file.
+ // May block until Pos bytes have been read, or EOF is reached.
+ // Note that the first byte past the end may be skipped to, but may not have
+ // its address taken.
+ virtual const unsigned char* addressOf(size_t Pos) = 0;
+
+ // Returns the character at Pos from the beginning of the file.
+ // May block until Pos bytes have been read, or EOF is reached.
+ virtual unsigned char operator[](size_t Pos) = 0;
+
+private:
+ BitstreamVector(const BitstreamVector&); // NOT IMPLEMENTED
+ void operator=(const BitstreamVector&); // NOT IMPLEMENTED
+};
+
+class MemoryBitstreamVector : public BitstreamVector {
+public:
+ MemoryBitstreamVector() { }
+
+ MemoryBitstreamVector(const unsigned char* Start, const unsigned char* End)
+ : FirstChar(Start), LastChar(End) {
+ }
+
+ virtual ~MemoryBitstreamVector() { }
+
+ virtual bool isEndPos(size_t Pos) {
+ return Pos == static_cast<size_t>(LastChar-FirstChar);
+ }
+
+ virtual size_t getEndPos() {
+ return static_cast<size_t>(LastChar-FirstChar);
+ }
+
+ virtual bool canSkipToPos(size_t Pos) {
+ return Pos <= static_cast<size_t>(LastChar-FirstChar);
+ }
+
+ virtual const unsigned char* addressOf(size_t Pos) {
+ assert(canSkipToPos(Pos) && Pos != static_cast<size_t>(LastChar-FirstChar)
+ && "taking address outside of buffer");
+ return FirstChar + Pos;
+ }
+
+ virtual unsigned char operator [](size_t Pos) {
nlewycky 2011/11/05 00:45:06 This is confusing. Should it instead be returning
(google.com) Derek Schuff 2011/11/08 00:53:23 This container isn't supposed to be mutable. also,
+ assert(canSkipToPos(Pos) && Pos != static_cast<size_t>(LastChar-FirstChar)
+ && "indexing outside of buffer");
nlewycky 2011/11/05 00:45:06 Bad indent. (Can the && go on the previous line? W
(google.com) Derek Schuff 2011/11/08 00:53:23 Done.
+ return *(FirstChar + Pos);
+ }
+
+private:
+ const unsigned char* FirstChar;
+ const unsigned char* LastChar;
+
+ MemoryBitstreamVector(const MemoryBitstreamVector&); // NOT IMPLEMENTED
+ void operator=(const MemoryBitstreamVector&); // NOT IMPLEMENTED
+};
+
+class LazyBitstreamVector : public BitstreamVector {
+public:
+ LazyBitstreamVector(BitcodeStreamer* streamer) : Bytes(kChunkSize),
nlewycky 2011/11/05 00:45:06 Please most the constructor list to starting on th
(google.com) Derek Schuff 2011/11/08 00:53:23 Done.
+ Streamer(streamer), BytesRead(0), BytesSkipped(0), BitcodeSize(0),
+ EOFReached(false) {
+ BytesRead = streamer->GetBytes(&Bytes[0], kChunkSize);
+ }
+
+ virtual ~LazyBitstreamVector() {}
+
+ virtual bool isEndPos(size_t Pos) {
+ if (BitcodeSize) return Pos == BitcodeSize;
+ fetchToPos(Pos);
+ return Pos == BytesRead;
+ }
+
+ virtual size_t getEndPos() {
+ if (BitcodeSize) return BitcodeSize;
+ size_t pos = BytesRead + kChunkSize;
+ // keep fetching until we run out of bytes
+ while(fetchToPos(pos)) pos += kChunkSize;
+ return BitcodeSize;
+ }
+
+ // If the bitcode has a header, then its size is known, and we don't have to
+ // block until we actually want to read it.
+ virtual bool canSkipToPos(size_t Pos) {
+ if (BitcodeSize && Pos <= BitcodeSize) return true;
+ return fetchToPos(Pos) || Pos == BitcodeSize;
+ }
+
+ virtual const unsigned char* addressOf(size_t Pos) {
+ assert(0 && "addressOf inside streaming vectors not allowed");
+ return NULL;
+ }
+
+ virtual unsigned char operator [](size_t Pos) {
+ fetchToPos(Pos);
+ assert(Pos < BytesRead && "indexing outside of buffer");
+ return Bytes[Pos + BytesSkipped];
+ }
+
+ // Drop s bytes from the front of the vector, pushing the positions of the
+ // remaining bytes down by s. This is used to skip past the bitcode header,
+ // since we don't know a priori if it's present, and we can't put bytes
+ // back into the stream once we've read them.
+ bool dropLeadingBytes(size_t s) {
+ if (BytesRead < s) return true;
+ BytesSkipped = s;
+ BytesRead -= s;
+ return false;
+ }
+
+ void setKnownBitcodeSize(size_t size) {
+ BitcodeSize = size;
+ }
+
+private:
+ const static uint32_t kChunkSize = 4096;
+ std::vector<unsigned char> Bytes;
+ OwningPtr<BitcodeStreamer> Streamer;
+ size_t BytesRead; // Bytes read from stream
+ size_t BytesSkipped;// Bytes skipped at start of stream (e.g. wrapper/header)
+ size_t BitcodeSize; // 0 if unknown, set if wrapper was seen or EOF reached
+ bool EOFReached;
+
+ // fetch enough bytes such that Pos can be read or EOF is reached
+ // (i.e. BytesRead > Pos). Return true if Pos can be read.
+ // Unlike most of the functions in BitcodeReader, returns true on success
+ bool fetchToPos(size_t Pos) {
+ if (EOFReached) return Pos < BitcodeSize;
+ while (Pos >= BytesRead) {
+ Bytes.resize(BytesRead + kChunkSize);
+ size_t bytes = Streamer->GetBytes(&Bytes[BytesRead + BytesSkipped],
+ kChunkSize);
+ BytesRead += bytes;
+ if (bytes < kChunkSize) {
+ if (BitcodeSize && BytesRead < Pos)
+ assert(0 && "Unexpected short read fetching bitcode");
+ if (BytesRead <= Pos) { // reached EOF/ran out of bytes
+ BitcodeSize = BytesRead;
+ EOFReached = true;
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ LazyBitstreamVector(const LazyBitstreamVector&); // NOT IMPLEMENTED
+ void operator=(const LazyBitstreamVector&); // NOT IMPLEMENTED
+};
+
class BitstreamReader {
public:
/// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
@@ -36,9 +206,7 @@ public:
std::vector<std::pair<unsigned, std::string> > RecordNames;
};
private:
- /// FirstChar/LastChar - This remembers the first and last bytes of the
- /// stream.
- const unsigned char *FirstChar, *LastChar;
+ OwningPtr<BitstreamVector> BSV;
std::vector<BlockInfo> BlockInfoRecords;
@@ -50,7 +218,7 @@ private:
BitstreamReader(const BitstreamReader&); // NOT IMPLEMENTED
void operator=(const BitstreamReader&); // NOT IMPLEMENTED
public:
- BitstreamReader() : FirstChar(0), LastChar(0), IgnoreBlockInfoNames(true) {
+ BitstreamReader() : IgnoreBlockInfoNames(true) {
}
BitstreamReader(const unsigned char *Start, const unsigned char *End) {
@@ -58,12 +226,17 @@ public:
init(Start, End);
}
+ BitstreamReader(BitstreamVector* bsv) {
nlewycky 2011/11/05 00:45:06 * on the right
(google.com) Derek Schuff 2011/11/08 00:53:23 Done.
+ BSV.reset(bsv);
+ }
+
void init(const unsigned char *Start, const unsigned char *End) {
- FirstChar = Start;
- LastChar = End;
assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+ BSV.reset(new MemoryBitstreamVector(Start, End));
}
+ BitstreamVector& getBSV() { return *BSV; }
nlewycky 2011/11/05 00:45:06 & on the right.
(google.com) Derek Schuff 2011/11/08 00:53:23 Done.
+
~BitstreamReader() {
// Free the BlockInfoRecords.
while (!BlockInfoRecords.empty()) {
@@ -76,9 +249,6 @@ public:
}
}
- const unsigned char *getFirstChar() const { return FirstChar; }
- const unsigned char *getLastChar() const { return LastChar; }
-
/// CollectBlockInfoNames - This is called by clients that want block/record
/// name information.
void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
@@ -122,7 +292,7 @@ public:
class BitstreamCursor {
friend class Deserializer;
BitstreamReader *BitStream;
- const unsigned char *NextChar;
+ size_t NextChar;
/// CurWord - This is the current data we have pulled from the stream but have
/// not returned to the client.
@@ -156,8 +326,7 @@ public:
}
explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
- NextChar = R.getFirstChar();
- assert(NextChar && "Bitstream not initialized yet");
+ NextChar = 0;
CurWord = 0;
BitsInCurWord = 0;
CurCodeSize = 2;
@@ -167,8 +336,7 @@ public:
freeState();
BitStream = &R;
- NextChar = R.getFirstChar();
- assert(NextChar && "Bitstream not initialized yet");
+ NextChar = 0;
CurWord = 0;
BitsInCurWord = 0;
CurCodeSize = 2;
@@ -226,12 +394,12 @@ public:
unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
bool AtEndOfStream() const {
- return NextChar == BitStream->getLastChar() && BitsInCurWord == 0;
+ return BitStream->getBSV().isEndPos(NextChar) && BitsInCurWord == 0;
}
/// GetCurrentBitNo - Return the bit # of the bit we are reading.
uint64_t GetCurrentBitNo() const {
- return (NextChar-BitStream->getFirstChar())*CHAR_BIT - BitsInCurWord;
+ return NextChar*CHAR_BIT - BitsInCurWord;
}
BitstreamReader *getBitStreamReader() {
@@ -246,12 +414,10 @@ public:
void JumpToBit(uint64_t BitNo) {
uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3;
uintptr_t WordBitNo = uintptr_t(BitNo) & 31;
- assert(ByteNo <= (uintptr_t)(BitStream->getLastChar()-
- BitStream->getFirstChar()) &&
- "Invalid location");
+ assert(BitStream->getBSV().canSkipToPos(ByteNo) && "Invalid location");
// Move the cursor to the right word.
- NextChar = BitStream->getFirstChar()+ByteNo;
+ NextChar = ByteNo;
BitsInCurWord = 0;
CurWord = 0;
@@ -272,7 +438,7 @@ public:
}
// If we run out of data, stop at the end of the stream.
- if (NextChar == BitStream->getLastChar()) {
+ if (BitStream->getBSV().isEndPos(NextChar)) {
CurWord = 0;
BitsInCurWord = 0;
return 0;
@@ -281,8 +447,10 @@ public:
unsigned R = CurWord;
// Read the next word from the stream.
- CurWord = (NextChar[0] << 0) | (NextChar[1] << 8) |
- (NextChar[2] << 16) | (NextChar[3] << 24);
+ CurWord = (BitStream->getBSV()[NextChar+0] << 0) |
nlewycky 2011/11/05 00:45:06 Extra space in "<< 0"
(google.com) Derek Schuff 2011/11/08 00:53:23 Done.
+ (BitStream->getBSV()[NextChar+1] << 8) |
+ (BitStream->getBSV()[NextChar+2] << 16) |
+ (BitStream->getBSV()[NextChar+3] << 24);
NextChar += 4;
// Extract NumBits-BitsInCurWord from what we just read.
@@ -376,9 +544,8 @@ public:
// Check that the block wasn't partially defined, and that the offset isn't
// bogus.
- const unsigned char *const SkipTo = NextChar + NumWords*4;
- if (AtEndOfStream() || SkipTo > BitStream->getLastChar() ||
- SkipTo < BitStream->getFirstChar())
+ size_t SkipTo = NextChar + NumWords*4;
+ if (AtEndOfStream() || !BitStream->getBSV().canSkipToPos(SkipTo))
return true;
NextChar = SkipTo;
@@ -409,8 +576,8 @@ public:
if (NumWordsP) *NumWordsP = NumWords;
// Validate that this block is sane.
- if (CurCodeSize == 0 || AtEndOfStream() ||
- NextChar+NumWords*4 > BitStream->getLastChar())
+ if (CurCodeSize == 0 || AtEndOfStream() || 0)
nlewycky 2011/11/05 00:45:06 || 0?
(google.com) Derek Schuff 2011/11/08 00:53:23 Done.
+ // !BitStream->getBSV().canSkipToPos(NextChar+NumWords*4))
nlewycky 2011/11/05 00:45:06 Delete commented out code.
(google.com) Derek Schuff 2011/11/08 00:53:23 Done.
return true;
return false;
@@ -512,24 +679,24 @@ public:
SkipToWord(); // 32-bit alignment
// Figure out where the end of this blob will be including tail padding.
- const unsigned char *NewEnd = NextChar+((NumElts+3)&~3);
+ size_t NewEnd = NextChar+((NumElts+3)&~3);
// If this would read off the end of the bitcode file, just set the
// record to empty and return.
- if (NewEnd > BitStream->getLastChar()) {
+ if (!BitStream->getBSV().canSkipToPos(NewEnd)) {
Vals.append(NumElts, 0);
- NextChar = BitStream->getLastChar();
+ NextChar = BitStream->getBSV().getEndPos();
break;
}
// Otherwise, read the number of bytes. If we can return a reference to
// the data, do so to avoid copying it.
if (BlobStart) {
- *BlobStart = (const char*)NextChar;
+ *BlobStart = (const char*)BitStream->getBSV().addressOf(NextChar);
*BlobLen = NumElts;
} else {
for (; NumElts; ++NextChar, --NumElts)
- Vals.push_back(*NextChar);
+ Vals.push_back(BitStream->getBSV()[NextChar]);
}
// Skip over tail padding.
NextChar = NewEnd;

Powered by Google App Engine
This is Rietveld 408576698