include/llvm/Bitcode/BitstreamReader.h - Issue 8393017: Bitcode streaming

Unified Diff: include/llvm/Bitcode/BitstreamReader.h

Issue 8393017: Bitcode streaming (Closed)

Patch Set: Created 9 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: include/llvm/Bitcode/BitstreamReader.h

diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h

index 0437f53134dc6cbc237c512467f2a57f0b6c4048..f4ba2e12ca04e50cfd55e0112202f41623ce6c05 100644

--- a/include/llvm/Bitcode/BitstreamReader.h

+++ b/include/llvm/Bitcode/BitstreamReader.h

@@ -15,15 +15,170 @@

#ifndef BITSTREAM_READER_H

#define BITSTREAM_READER_H

+#include "llvm/ADT/OwningPtr.h"

#include "llvm/Bitcode/BitCodes.h"

+#include "llvm/Bitcode/BitcodeStream.h"

#include <climits>

#include <string>

#include <vector>

+#include <stdio.h>

namespace llvm {

class Deserializer;

+class BitstreamVector {

+public:

+ BitstreamVector() { }

+ virtual ~BitstreamVector() { }

+ // Is Pos the ending file position (one byte past the last valid byte).

+ // May block until Pos bytes have been read, or EOF is reached.

+ virtual bool isEndPos(size_t Pos) = 0;

+ // Returns the ending file position (one byte past the last valid byte).

+ // May block until EOF is reached.

+ virtual size_t getEndPos() = 0;

+ // Returns true if seeking to Pos is within the file.

+ // May block until Pos bytes have been read, or EOF is reached.

+ virtual bool canSkipToPos(size_t Pos) = 0;

+ // Returns the in memory address of Pos from the beginning of the file.

+ // May block until Pos bytes have been read, or EOF is reached.

+ // Returned pointer may be invalidated by subsequent calls to other methods.

+ virtual const unsigned char* addressOf(size_t Pos) = 0;

+ // Returns the character at Pos from the beginning of the file.

+ // May block until Pos bytes have been read, or EOF is reached.

+ unsigned char operator[](size_t Pos) {

+ return *addressOf(Pos);

+ }

+private:

+ BitstreamVector(const BitstreamVector&); // NOT IMPLEMENTED

+ void operator=(const BitstreamVector&); // NOT IMPLEMENTED

+};

+class MemoryBitstreamVector : public BitstreamVector {

+public:

+ MemoryBitstreamVector() { }

+ MemoryBitstreamVector(const unsigned char* Start, const unsigned char* End)

+ : FirstChar(Start), LastChar(End) {

+ }

+ virtual ~MemoryBitstreamVector() { }

+ virtual bool isEndPos(size_t Pos) {

+ return Pos == static_cast<size_t>(LastChar-FirstChar);

+ }

+ virtual size_t getEndPos() {

+ return static_cast<size_t>(LastChar-FirstChar);

+ }

+ virtual bool canSkipToPos(size_t Pos) {

+ return Pos <= static_cast<size_t>(LastChar-FirstChar);

+ }

+ virtual const unsigned char* addressOf(size_t Pos) {

+ assert(canSkipToPos(Pos) && Pos != static_cast<size_t>(LastChar-FirstChar)

+ && "taking address outside of buffer");

+ return FirstChar + Pos;

+ }

+private:

+ const unsigned char* FirstChar;

+ const unsigned char* LastChar;

+ MemoryBitstreamVector(const MemoryBitstreamVector&); // NOT IMPLEMENTED

+ void operator=(const MemoryBitstreamVector&); // NOT IMPLEMENTED

+};

+class LazyBitstreamVector : public MemoryBitstreamVector {

+public:

+ LazyBitstreamVector(StreamChunkCallback cb) : Bytes(kChunkSize),

+ GetMoreBytes(cb), BytesRead(0), BytesSkipped(0), BitcodeSize(0) {

+ BytesRead = GetMoreBytes(&Bytes[0], kChunkSize);

+ f = fopen("bc.out", "w");

+ fwrite(&Bytes[0], 1,BytesRead, f);

+ }

+ virtual ~LazyBitstreamVector() {fclose(f); }

+ virtual bool isEndPos(size_t Pos) {

+ if (BitcodeSize) return Pos == BitcodeSize;

+ fetchToPos(Pos);

+ return Pos == BytesRead;

+ }

+ virtual size_t getEndPos() {

+ if (BitcodeSize) return BitcodeSize;

+ size_t pos = BytesRead + kChunkSize;

+ while(fetchToPos(pos)) pos += kChunkSize;

+ return BytesRead;

+ }

+ // If the bitcode has a header, then its size is known, and we don't have to

+ // block until we actually want to read it.

+ // TODO(dschuff): wrap pexe files in a bitcode header.

+ virtual bool canSkipToPos(size_t Pos) {

+ if (BitcodeSize && Pos <= BitcodeSize) return true;

+ return fetchToPos(Pos);

+ }

+ virtual const unsigned char* addressOf(size_t Pos) {

+ assert(canSkipToPos(Pos) && Pos != BytesRead

+ && "taking address outside of buffer");

+ fetchToPos(Pos);

+ return &Bytes[Pos + BytesSkipped];

+ }

+ // Drop s bytes from the front of the vector, pushing the positions of the

+ // remaining bytes down by s. This is used to skip past the bitcode header,

+ // since we don't know a priori if it's present, and we can't put bytes

+ // back into the stream once we've read them.

+ bool dropLeadingBytes(size_t s) {

+ if (BytesRead < s) return true;

+ BytesSkipped = s;

+ BytesRead -= s;

+ return false;

+ }

+ void setKnownBitcodeSize(size_t size) {

+ BitcodeSize = size;

+ }

+private:

+ const static uint32_t kChunkSize = 4096;

+ std::vector<unsigned char> Bytes;

+ StreamChunkCallback GetMoreBytes;

+ size_t BytesRead;

+ size_t BytesSkipped;

+ size_t BitcodeSize;

+ FILE *f;

+ // Unlike most of the functions in BitcodeReader, returns true on success

+ bool fetchToPos(size_t Pos) {

+ while (Pos > BytesRead) {

+ Bytes.resize(BytesRead + kChunkSize);

+ size_t bytes = GetMoreBytes(&Bytes[BytesRead + BytesSkipped], kChunkSize);

+ fwrite(&Bytes[BytesRead + BytesSkipped], 1, bytes, f);

+ BytesRead += bytes;

+ if (bytes < kChunkSize) {

+ if (BitcodeSize && BytesRead < Pos)

+ assert(0 && "Unexpected short read fetching bitcode");

+ if (BytesRead < Pos) return false;

+ }

+ return true;

+ }

+ LazyBitstreamVector(const LazyBitstreamVector&); // NOT IMPLEMENTED

+ void operator=(const LazyBitstreamVector&); // NOT IMPLEMENTED

+};

class BitstreamReader {

public:

/// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.

@@ -36,9 +191,7 @@ public:

std::vector<std::pair<unsigned, std::string> > RecordNames;

};

private:

- /// FirstChar/LastChar - This remembers the first and last bytes of the

- /// stream.

- const unsigned char *FirstChar, *LastChar;

+ OwningPtr<BitstreamVector> BSV;

std::vector<BlockInfo> BlockInfoRecords;

@@ -50,7 +203,7 @@ private:

BitstreamReader(const BitstreamReader&); // NOT IMPLEMENTED

void operator=(const BitstreamReader&); // NOT IMPLEMENTED

public:

- BitstreamReader() : FirstChar(0), LastChar(0), IgnoreBlockInfoNames(true) {

+ BitstreamReader() : IgnoreBlockInfoNames(true) {

}

BitstreamReader(const unsigned char *Start, const unsigned char *End) {

@@ -58,12 +211,17 @@ public:

init(Start, End);

}

+ BitstreamReader(BitstreamVector* bsv) {

+ BSV.reset(bsv);

+ }

void init(const unsigned char *Start, const unsigned char *End) {

- FirstChar = Start;

- LastChar = End;

assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");

+ BSV.reset(new MemoryBitstreamVector(Start, End));

}

+ BitstreamVector& getBSV() { return *BSV; }

~BitstreamReader() {

// Free the BlockInfoRecords.

while (!BlockInfoRecords.empty()) {

@@ -76,9 +234,6 @@ public:

}

- const unsigned char *getFirstChar() const { return FirstChar; }

- const unsigned char *getLastChar() const { return LastChar; }

/// CollectBlockInfoNames - This is called by clients that want block/record

/// name information.

void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }

@@ -122,7 +277,7 @@ public:

class BitstreamCursor {

friend class Deserializer;

BitstreamReader *BitStream;

- const unsigned char *NextChar;

+ size_t NextChar;

/// CurWord - This is the current data we have pulled from the stream but have

/// not returned to the client.

@@ -156,8 +311,7 @@ public:

}

explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {

- NextChar = R.getFirstChar();

- assert(NextChar && "Bitstream not initialized yet");

+ NextChar = 0;

CurWord = 0;

BitsInCurWord = 0;

CurCodeSize = 2;

@@ -167,8 +321,7 @@ public:

freeState();

BitStream = &R;

- NextChar = R.getFirstChar();

- assert(NextChar && "Bitstream not initialized yet");

+ NextChar = 0;

CurWord = 0;

BitsInCurWord = 0;

CurCodeSize = 2;

@@ -226,12 +379,12 @@ public:

unsigned GetAbbrevIDWidth() const { return CurCodeSize; }

bool AtEndOfStream() const {

- return NextChar == BitStream->getLastChar() && BitsInCurWord == 0;

+ return BitStream->getBSV().isEndPos(NextChar) && BitsInCurWord == 0;

}

/// GetCurrentBitNo - Return the bit # of the bit we are reading.

uint64_t GetCurrentBitNo() const {

- return (NextChar-BitStream->getFirstChar())*CHAR_BIT - BitsInCurWord;

+ return NextChar*CHAR_BIT - BitsInCurWord;

}

BitstreamReader *getBitStreamReader() {

@@ -246,12 +399,10 @@ public:

void JumpToBit(uint64_t BitNo) {

uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3;

uintptr_t WordBitNo = uintptr_t(BitNo) & 31;

- assert(ByteNo <= (uintptr_t)(BitStream->getLastChar()-

- BitStream->getFirstChar()) &&

- "Invalid location");

+ assert(BitStream->getBSV().canSkipToPos(ByteNo) && "Invalid location");

// Move the cursor to the right word.

- NextChar = BitStream->getFirstChar()+ByteNo;

+ NextChar = ByteNo;

BitsInCurWord = 0;

CurWord = 0;

@@ -272,7 +423,7 @@ public:

}

// If we run out of data, stop at the end of the stream.

- if (NextChar == BitStream->getLastChar()) {

+ if (BitStream->getBSV().isEndPos(NextChar)) {

CurWord = 0;

BitsInCurWord = 0;

return 0;

@@ -281,8 +432,10 @@ public:

unsigned R = CurWord;

// Read the next word from the stream.

- CurWord = (NextChar[0] << 0) | (NextChar[1] << 8) |

- (NextChar[2] << 16) | (NextChar[3] << 24);

+ CurWord = (BitStream->getBSV()[NextChar+0] << 0) |

+ (BitStream->getBSV()[NextChar+1] << 8) |

+ (BitStream->getBSV()[NextChar+2] << 16) |

+ (BitStream->getBSV()[NextChar+3] << 24);

NextChar += 4;

// Extract NumBits-BitsInCurWord from what we just read.

@@ -376,9 +529,8 @@ public:

// Check that the block wasn't partially defined, and that the offset isn't

// bogus.

- const unsigned char *const SkipTo = NextChar + NumWords*4;

- if (AtEndOfStream() || SkipTo > BitStream->getLastChar() ||

- SkipTo < BitStream->getFirstChar())

+ size_t SkipTo = NextChar + NumWords*4;

+ if (AtEndOfStream() || !BitStream->getBSV().canSkipToPos(SkipTo))

return true;

NextChar = SkipTo;

@@ -410,7 +562,7 @@ public:

// Validate that this block is sane.

if (CurCodeSize == 0 || AtEndOfStream() ||

- NextChar+NumWords*4 > BitStream->getLastChar())

+ !BitStream->getBSV().canSkipToPos(NextChar+NumWords*4))

return true;

return false;

@@ -512,24 +664,24 @@ public:

SkipToWord(); // 32-bit alignment

// Figure out where the end of this blob will be including tail padding.

- const unsigned char *NewEnd = NextChar+((NumElts+3)&~3);

+ size_t NewEnd = NextChar+((NumElts+3)&~3);

// If this would read off the end of the bitcode file, just set the

// record to empty and return.

- if (NewEnd > BitStream->getLastChar()) {

+ if (!BitStream->getBSV().canSkipToPos(NewEnd)) {

Vals.append(NumElts, 0);

- NextChar = BitStream->getLastChar();

+ NextChar = BitStream->getBSV().getEndPos();

break;

}

// Otherwise, read the number of bytes. If we can return a reference to

// the data, do so to avoid copying it.

if (BlobStart) {

- *BlobStart = (const char*)NextChar;

+ *BlobStart = (const char*)BitStream->getBSV().addressOf(NextChar);

*BlobLen = NumElts;

} else {

for (; NumElts; ++NextChar, --NumElts)

- Vals.push_back(*NextChar);

+ Vals.push_back(BitStream->getBSV()[NextChar]);

}

// Skip over tail padding.

NextChar = NewEnd;

« no previous file with comments | « include/llvm/Bitcode/BitcodeStream.h ('k') | include/llvm/Bitcode/ReaderWriter.h » ('j') | include/llvm/Support/IRReader.h » ('J')