OLD | NEW |
(Empty) | |
| 1 //===- NaClBitstreamReader.h -----------------------------------*- C++ -*-===// |
| 2 // Low-level bitstream reader interface |
| 3 // |
| 4 // The LLVM Compiler Infrastructure |
| 5 // |
| 6 // This file is distributed under the University of Illinois Open Source |
| 7 // License. See LICENSE.TXT for details. |
| 8 // |
| 9 //===----------------------------------------------------------------------===// |
| 10 // |
| 11 // This header defines the BitstreamReader class. This class can be used to |
| 12 // read an arbitrary bitstream, regardless of its contents. |
| 13 // |
| 14 //===----------------------------------------------------------------------===// |
| 15 |
| 16 #ifndef LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H |
| 17 #define LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H |
| 18 |
| 19 #include "llvm/ADT/SmallVector.h" |
| 20 #include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h" |
| 21 #include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h" |
| 22 #include "llvm/Support/Endian.h" |
| 23 #include "llvm/Support/StreamingMemoryObject.h" |
| 24 #include <atomic> |
| 25 #include <climits> |
| 26 #include <mutex> |
| 27 #include <unordered_map> |
| 28 #include <vector> |
| 29 |
| 30 namespace llvm { |
| 31 |
| 32 class Deserializer; |
| 33 class NaClBitstreamCursor; |
| 34 |
| 35 namespace naclbitc { |
| 36 |
| 37 /// Returns the Bit as a Byte:BitInByte string. |
| 38 std::string getBitAddress(uint64_t Bit); |
| 39 |
| 40 /// Severity levels for reporting errors. |
| 41 enum ErrorLevel { |
| 42 Warning, |
| 43 Error, |
| 44 Fatal |
| 45 }; |
| 46 |
| 47 // Basic printing routine to generate the beginning of an error |
| 48 // message. BitPosition is the bit position the error was found. |
| 49 // Level is the severity of the error. |
| 50 raw_ostream &ErrorAt(raw_ostream &Out, ErrorLevel Level, |
| 51 uint64_t BitPosition); |
| 52 |
| 53 } // End namespace naclbitc. |
| 54 |
| 55 /// This class is used to read from a NaCl bitcode wire format stream, |
| 56 /// maintaining information that is global to decoding the entire file. |
| 57 /// While a file is being read, multiple cursors can be independently |
| 58 /// advanced or skipped around within the file. These are represented by |
| 59 /// the NaClBitstreamCursor class. |
| 60 class NaClBitstreamReader { |
| 61 public: |
| 62 // Models a raw list of abbreviations. |
| 63 static const size_t DefaultAbbrevListSize = 12; |
| 64 using AbbrevListVector = SmallVector<NaClBitCodeAbbrev *, |
| 65 DefaultAbbrevListSize>; |
| 66 |
| 67 // Models and maintains a list of abbreviations. In particular, it maintains |
| 68 // updating reference counts of abbreviation operators within the abbreviation |
| 69 // list. |
| 70 class AbbrevList { |
| 71 public: |
| 72 AbbrevList() = default; |
| 73 explicit AbbrevList(const AbbrevList &NewAbbrevs) { |
| 74 appendList(NewAbbrevs); |
| 75 } |
| 76 AbbrevList &operator=(const AbbrevList &Rhs) { |
| 77 clear(); |
| 78 appendList(Rhs); |
| 79 return *this; |
| 80 } |
| 81 // Creates a new (empty) abbreviation, appends it to this, and then returns |
| 82 // the new abbreviation. |
| 83 NaClBitCodeAbbrev *appendCreate() { |
| 84 NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev(); |
| 85 Abbrevs.push_back(Abbv); |
| 86 return Abbv; |
| 87 } |
| 88 // Appends the given abbreviation to this. |
| 89 void append(NaClBitCodeAbbrev *Abbrv) { |
| 90 Abbrv->addRef(); |
| 91 Abbrevs.push_back(Abbrv); |
| 92 } |
| 93 // Appends the contents of NewAbbrevs to this. |
| 94 void appendList(const AbbrevList &NewAbbrevs) { |
| 95 for (NaClBitCodeAbbrev *Abbrv : NewAbbrevs.Abbrevs) |
| 96 append(Abbrv); |
| 97 } |
| 98 // Returns last abbreviation on list. |
| 99 NaClBitCodeAbbrev *last() { return Abbrevs.back(); } |
| 100 // Removes the last element of the list. |
| 101 void popLast() { |
| 102 Abbrevs.back()->dropRef(); |
| 103 Abbrevs.pop_back(); |
| 104 } |
| 105 // Empties abbreviation list. |
| 106 void clear() { |
| 107 while(!Abbrevs.empty()) |
| 108 popLast(); |
| 109 } |
| 110 // Allow read access to vector defining list. |
| 111 const AbbrevListVector &getVector() const { return Abbrevs; } |
| 112 ~AbbrevList() { clear(); } |
| 113 private: |
| 114 AbbrevListVector Abbrevs; |
| 115 }; |
| 116 |
| 117 /// This contains information about abbreviations in blocks defined in the |
| 118 /// BLOCKINFO_BLOCK block. These describe global abbreviations that apply to |
| 119 /// all succeeding blocks of the specified ID. |
| 120 class BlockInfo { |
| 121 BlockInfo &operator=(const BlockInfo&) = delete; |
| 122 public: |
| 123 BlockInfo() = default; |
| 124 explicit BlockInfo(unsigned BlockID) |
| 125 : BlockID(BlockID), Abbrevs() {} |
| 126 BlockInfo(const BlockInfo&) = default; |
| 127 unsigned getBlockID() const { return BlockID; } |
| 128 void setBlockID(unsigned ID) { BlockID = ID; } |
| 129 AbbrevList &getAbbrevs() { return Abbrevs; } |
| 130 ~BlockInfo() {} |
| 131 private: |
| 132 unsigned BlockID; |
| 133 AbbrevList Abbrevs; |
| 134 }; |
| 135 |
| 136 class BlockInfoRecordsMap; |
| 137 using SharedBlockInfoMap = std::shared_ptr<BlockInfoRecordsMap>; |
| 138 |
| 139 // Holds the global abbreviations in the BlockInfo block of the bitcode file. |
| 140 // Sharing is used to allow parallel parses. Share by using std::share_ptr's |
| 141 // and std::shared_from_this(). |
| 142 // |
| 143 // Note: The BlockInfo block must be parsed before sharing of the |
| 144 // BlockInfoRecordsMap. Therefore, before changing to a parallel parse, the |
| 145 // BlockInfoRecordsMap must be frozen. Failure to do so, can lead to |
| 146 // unexpected behaviour. |
| 147 // |
| 148 // In practice, this means that only function blocks can be parsed in |
| 149 // parallel. |
| 150 class BlockInfoRecordsMap : |
| 151 public std::enable_shared_from_this<BlockInfoRecordsMap> { |
| 152 friend class NaClBitstreamReader; |
| 153 BlockInfoRecordsMap(const BlockInfoRecordsMap&) = delete; |
| 154 BlockInfoRecordsMap &operator=(const BlockInfoRecordsMap&) = delete; |
| 155 public: |
| 156 using InfosMap = std::unordered_map<unsigned, std::unique_ptr<BlockInfo>>; |
| 157 |
| 158 static SharedBlockInfoMap create() { |
| 159 return SharedBlockInfoMap(new BlockInfoRecordsMap()); |
| 160 } |
| 161 ~BlockInfoRecordsMap() = default; |
| 162 |
| 163 bool isFrozen() const { |
| 164 return IsFrozen.load(); |
| 165 } |
| 166 |
| 167 // Returns true if already frozen. |
| 168 bool freeze() { |
| 169 return IsFrozen.exchange(true); |
| 170 } |
| 171 |
| 172 BlockInfo *getBlockInfo(unsigned BlockID) { |
| 173 auto Pos = KnownInfos.find(BlockID); |
| 174 if (Pos != KnownInfos.end()) |
| 175 return Pos->second.get(); |
| 176 return getOrCreateUnknownBlockInfo(BlockID); |
| 177 } |
| 178 |
| 179 // Locks the BlockInfoRecordsMap for the lifetime of the UpdateLock. Used |
| 180 // to allow the parsing of a BlockInfo block, and install global |
| 181 // abbreviations. |
| 182 // |
| 183 // Verifies that the BlockInfoRecordsMap didn't get frozen during the |
| 184 // instance's lifetime as a safety precaution. That is, it checks that no |
| 185 // bitstream reader was created to share the global abbreviations before the |
| 186 // global abbreviations are defined. |
| 187 class UpdateLock { |
| 188 UpdateLock() = delete; |
| 189 UpdateLock(const UpdateLock&) = delete; |
| 190 UpdateLock &operator=(const UpdateLock&) = delete; |
| 191 public: |
| 192 explicit UpdateLock(BlockInfoRecordsMap &BlockInfoRecords); |
| 193 ~UpdateLock(); |
| 194 private: |
| 195 // The BlockInfoRecordsMap to update. |
| 196 BlockInfoRecordsMap &BlockInfoRecords; |
| 197 // The locked mutex from BlockInfoRecordsMap; |
| 198 std::unique_lock<std::mutex> Lock; |
| 199 }; |
| 200 |
| 201 private: |
| 202 // The set of known BlockInfo's. This map is prepopulated so that fast |
| 203 // lookup can be performed thread safe (i.e. without using a lock). |
| 204 InfosMap KnownInfos; |
| 205 // The set of unknown BlockInfo's. This map is to handle unknown (and hence, |
| 206 // invalid) PNaCl bitcode files. This map is updated incrementally, and uses |
| 207 // UnknownBlockInfoLock to make it thread safe. |
| 208 InfosMap UnknownInfos; |
| 209 // True if the known BlockInfo blocks are frozen (i.e. the bitstream reader |
| 210 // will ignore the BlockInfo block). |
| 211 std::atomic_bool IsFrozen; |
| 212 // Lock to use to update this data structure. |
| 213 std::mutex UpdateRecordsLock; |
| 214 // Lock to get/create an unknonw block info. |
| 215 std::mutex UnknownBlockInfoLock; |
| 216 |
| 217 BlockInfoRecordsMap(); |
| 218 |
| 219 BlockInfo *getOrCreateUnknownBlockInfo(unsigned BlockID); |
| 220 }; |
| 221 |
| 222 private: |
| 223 friend class NaClBitstreamCursor; |
| 224 |
| 225 std::unique_ptr<MemoryObject> BitcodeBytes; |
| 226 |
| 227 SharedBlockInfoMap BlockInfoRecords; |
| 228 |
| 229 /// \brief Holds the offset of the first byte after the header. |
| 230 size_t InitialAddress; |
| 231 |
| 232 // Holds the number of bytes to add to the bitcode position, when reporting |
| 233 // errors. Useful when using parallel parses of function blocks. |
| 234 size_t ErrorOffset = 0; |
| 235 |
| 236 // True if filler should be added to byte align records. |
| 237 bool AlignBitcodeRecords = false; |
| 238 NaClBitstreamReader(const NaClBitstreamReader&) = delete; |
| 239 void operator=(const NaClBitstreamReader&) = delete; |
| 240 |
| 241 |
| 242 void initFromHeader(NaClBitcodeHeader &Header) { |
| 243 InitialAddress = Header.getHeaderSize(); |
| 244 AlignBitcodeRecords = Header.getAlignBitcodeRecords(); |
| 245 } |
| 246 |
| 247 public: |
| 248 /// Read stream from sequence of bytes [Start .. End) after parsing |
| 249 /// the given bitcode header. |
| 250 NaClBitstreamReader(const unsigned char *Start, const unsigned char *End, |
| 251 NaClBitcodeHeader &Header) |
| 252 : BitcodeBytes(getNonStreamedMemoryObject(Start, End)), |
| 253 BlockInfoRecords(BlockInfoRecordsMap::create()) { |
| 254 initFromHeader(Header); |
| 255 } |
| 256 |
| 257 /// Read stream from Bytes, after parsing the given bitcode header. |
| 258 NaClBitstreamReader(MemoryObject *Bytes, NaClBitcodeHeader &Header) |
| 259 : BitcodeBytes(Bytes), BlockInfoRecords(BlockInfoRecordsMap::create()) |
| 260 { initFromHeader(Header); } |
| 261 |
| 262 /// Read stream from bytes, starting at the given initial address. |
| 263 /// Provides simple API for unit testing. |
| 264 NaClBitstreamReader(MemoryObject *Bytes, size_t InitialAddress) |
| 265 : BitcodeBytes(Bytes), BlockInfoRecords(BlockInfoRecordsMap::create()), |
| 266 InitialAddress(InitialAddress) {} |
| 267 |
| 268 /// Read stream from sequence of bytes [Start .. End), using the global |
| 269 /// abbreviations of the given bitstream reader. Assumes that [Start .. End) |
| 270 /// is copied from Reader's memory object. |
| 271 NaClBitstreamReader(size_t StartAddress, const unsigned char *Start, |
| 272 const unsigned char *End, NaClBitstreamReader *Reader) |
| 273 : BitcodeBytes(getNonStreamedMemoryObject(Start, End)), |
| 274 BlockInfoRecords(Reader->BlockInfoRecords), InitialAddress(0), |
| 275 ErrorOffset(StartAddress) { BlockInfoRecords->freeze(); } |
| 276 |
| 277 // Returns the memory object that is being read. |
| 278 MemoryObject &getBitcodeBytes() { return *BitcodeBytes; } |
| 279 |
| 280 ~NaClBitstreamReader() {} |
| 281 |
| 282 /// \brief Returns the initial address (after the header) of the input stream. |
| 283 size_t getInitialAddress() const { |
| 284 return InitialAddress; |
| 285 } |
| 286 |
| 287 /// Returns the byte address of the first byte in the bitstream. Used |
| 288 /// for error reporting. |
| 289 size_t getErrorOffset() const { return ErrorOffset; } |
| 290 |
| 291 //===--------------------------------------------------------------------===// |
| 292 // Block Manipulation |
| 293 //===--------------------------------------------------------------------===// |
| 294 |
| 295 BlockInfo *getBlockInfo(unsigned BlockID) { |
| 296 return BlockInfoRecords->getBlockInfo(BlockID); |
| 297 } |
| 298 }; |
| 299 |
| 300 /// When advancing through a bitstream cursor, each advance can discover a few |
| 301 /// different kinds of entries: |
| 302 struct NaClBitstreamEntry { |
| 303 enum { |
| 304 Error, // Malformed bitcode was found. |
| 305 EndBlock, // We've reached the end of the current block, (or the end of the |
| 306 // file, which is treated like a series of EndBlock records. |
| 307 SubBlock, // This is the start of a new subblock of a specific ID. |
| 308 Record // This is a record with a specific AbbrevID. |
| 309 } Kind; |
| 310 |
| 311 unsigned ID; |
| 312 |
| 313 static NaClBitstreamEntry getError() { |
| 314 NaClBitstreamEntry E; E.Kind = Error; return E; |
| 315 } |
| 316 static NaClBitstreamEntry getEndBlock() { |
| 317 NaClBitstreamEntry E; E.Kind = EndBlock; return E; |
| 318 } |
| 319 static NaClBitstreamEntry getSubBlock(unsigned ID) { |
| 320 NaClBitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; |
| 321 } |
| 322 static NaClBitstreamEntry getRecord(unsigned AbbrevID) { |
| 323 NaClBitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; |
| 324 } |
| 325 }; |
| 326 |
| 327 /// Models default view of a bitcode record. |
| 328 typedef SmallVector<uint64_t, 8> NaClBitcodeRecordVector; |
| 329 |
| 330 /// Class NaClAbbrevListener is used to allow instances of class |
| 331 /// NaClBitcodeParser to listen to record details when processing |
| 332 /// abbreviations. The major reason for using a listener is that the |
| 333 /// NaCl bitcode reader would require a major rewrite (including the |
| 334 /// introduction of more overhead) if we were to lift abbreviations up |
| 335 /// to the bitcode reader. That is, not only would we have to lift the |
| 336 /// block processing up into the readers (i.e. many blocks in |
| 337 /// NaClBitcodeReader and NaClBitcodeParser), but add many new API's |
| 338 /// to allow the readers to update internals of the bit stream reader |
| 339 /// appropriately. |
| 340 class NaClAbbrevListener { |
| 341 NaClAbbrevListener(const NaClAbbrevListener&) = delete; |
| 342 void operator=(const NaClAbbrevListener&) = delete; |
| 343 public: |
| 344 NaClAbbrevListener() {} |
| 345 virtual ~NaClAbbrevListener() {} |
| 346 |
| 347 /// Called to process the read abbreviation. |
| 348 virtual void ProcessAbbreviation(NaClBitCodeAbbrev *Abbrv, |
| 349 bool IsLocal) = 0; |
| 350 |
| 351 /// Called after entering block. NumWords is the number of words |
| 352 /// in the block. |
| 353 virtual void BeginBlockInfoBlock(unsigned NumWords) = 0; |
| 354 |
| 355 /// Called if a naclbitc::BLOCKINFO_CODE_SETBID record is found in |
| 356 /// NaClBitstreamCursor::ReadBlockInfoBlock. |
| 357 virtual void SetBID() = 0; |
| 358 |
| 359 /// Called just before an EndBlock record is processed by |
| 360 /// NaClBitstreamCursor::ReadBlockInfoBlock |
| 361 virtual void EndBlockInfoBlock() = 0; |
| 362 |
| 363 /// The values of the bitcode record associated with the called |
| 364 /// virtual function. |
| 365 NaClBitcodeRecordVector Values; |
| 366 |
| 367 /// Start bit for current record being processed in |
| 368 /// NaClBitstreamCursor::ReadBlockInfoBlock. |
| 369 uint64_t StartBit; |
| 370 }; |
| 371 |
| 372 /// This represents a position within a bitcode file. There may be multiple |
| 373 /// independent cursors reading within one bitstream, each maintaining their |
| 374 /// own local state. |
| 375 /// |
| 376 /// Unlike iterators, NaClBitstreamCursors are heavy-weight objects |
| 377 /// that should not be passed by value. |
| 378 class NaClBitstreamCursor { |
| 379 public: |
| 380 /// This class handles errors in the bitstream reader. Redirects |
| 381 /// fatal error messages to virtual method Fatal. |
| 382 class ErrorHandler { |
| 383 ErrorHandler(const ErrorHandler &) = delete; |
| 384 ErrorHandler &operator=(const ErrorHandler &) = delete; |
| 385 public: |
| 386 explicit ErrorHandler(NaClBitstreamCursor &Cursor) : Cursor(Cursor) {} |
| 387 LLVM_ATTRIBUTE_NORETURN |
| 388 virtual void Fatal(const std::string &ErrorMessage) const; |
| 389 virtual ~ErrorHandler() {} |
| 390 uint64_t getCurrentBitNo() const { |
| 391 return Cursor.GetCurrentBitNo(); |
| 392 } |
| 393 private: |
| 394 NaClBitstreamCursor &Cursor; |
| 395 }; |
| 396 |
| 397 private: |
| 398 friend class Deserializer; |
| 399 NaClBitstreamReader *BitStream; |
| 400 size_t NextChar; |
| 401 // The current error handler for the bitstream reader. |
| 402 std::unique_ptr<ErrorHandler> ErrHandler; |
| 403 |
| 404 // The size of the bitcode. 0 if we don't know it yet. |
| 405 size_t Size; |
| 406 |
| 407 /// This is the current data we have pulled from the stream but have not |
| 408 /// returned to the client. This is specifically and intentionally defined to |
| 409 /// follow the word size of the host machine for efficiency. We use word_t in |
| 410 /// places that are aware of this to make it perfectly explicit what is going |
| 411 /// on. |
| 412 typedef size_t word_t; |
| 413 word_t CurWord; |
| 414 |
| 415 /// This is the number of bits in CurWord that are valid. This |
| 416 /// is always from [0...bits_of(word_t)-1] inclusive. |
| 417 unsigned BitsInCurWord; |
| 418 |
| 419 // Data specific to a block being scanned. |
| 420 class Block { |
| 421 public: |
| 422 Block() = delete; |
| 423 Block &operator=(const Block &Rhs) { |
| 424 GlobalAbbrevs = Rhs.GlobalAbbrevs; |
| 425 NumGlobalAbbrevs = Rhs.NumGlobalAbbrevs; |
| 426 LocalAbbrevs = Rhs.LocalAbbrevs; |
| 427 CodeAbbrev = Rhs.CodeAbbrev; |
| 428 return *this; |
| 429 } |
| 430 Block(NaClBitstreamReader::BlockInfo *GlobalAbbrevs, |
| 431 NaClBitcodeSelectorAbbrev& CodeAbbrev) |
| 432 : GlobalAbbrevs(GlobalAbbrevs), |
| 433 NumGlobalAbbrevs(GlobalAbbrevs->getAbbrevs().getVector().size()), |
| 434 LocalAbbrevs(), CodeAbbrev(CodeAbbrev) {} |
| 435 Block(NaClBitstreamReader::BlockInfo *GlobalAbbrevs) |
| 436 : GlobalAbbrevs(GlobalAbbrevs), |
| 437 NumGlobalAbbrevs(GlobalAbbrevs->getAbbrevs().getVector().size()), |
| 438 LocalAbbrevs(), CodeAbbrev() {} |
| 439 ~Block() = default; |
| 440 const NaClBitstreamReader::AbbrevList &getGlobalAbbrevs() const { |
| 441 return GlobalAbbrevs->getAbbrevs(); |
| 442 } |
| 443 unsigned getNumGlobalAbbrevs() const { return NumGlobalAbbrevs; } |
| 444 const NaClBitstreamReader::AbbrevList &getLocalAbbrevs() const { |
| 445 return LocalAbbrevs; |
| 446 } |
| 447 const NaClBitcodeSelectorAbbrev &getCodeAbbrev() const { |
| 448 return CodeAbbrev; |
| 449 } |
| 450 void setCodeAbbrev(NaClBitcodeSelectorAbbrev &Abbrev) { |
| 451 CodeAbbrev = Abbrev; |
| 452 } |
| 453 NaClBitCodeAbbrev *appendLocalCreate() { |
| 454 return LocalAbbrevs.appendCreate(); |
| 455 } |
| 456 void moveLocalAbbrevToAbbrevList(NaClBitstreamReader::AbbrevList *List) { |
| 457 if (List != &LocalAbbrevs) { |
| 458 NaClBitCodeAbbrev *Abbv = LocalAbbrevs.last(); |
| 459 List->append(Abbv); |
| 460 LocalAbbrevs.popLast(); |
| 461 } |
| 462 } |
| 463 private: |
| 464 friend class NaClBitstreamCursor; |
| 465 // The global abbreviations associated with this scope. |
| 466 NaClBitstreamReader::BlockInfo *GlobalAbbrevs; |
| 467 // Number of abbreviations when block was entered. Used to limit scope of |
| 468 // CurBlockInfo, since any abbreviation added inside a BlockInfo block |
| 469 // (within this block) must not effect global abbreviations. |
| 470 unsigned NumGlobalAbbrevs; |
| 471 NaClBitstreamReader::AbbrevList LocalAbbrevs; |
| 472 // This is the declared size of code values used for the current block, in |
| 473 // bits. |
| 474 NaClBitcodeSelectorAbbrev CodeAbbrev; |
| 475 }; |
| 476 |
| 477 /// This tracks the Block-specific information for each nested block. |
| 478 SmallVector<Block, 8> BlockScope; |
| 479 |
| 480 NaClBitstreamCursor(const NaClBitstreamCursor &) = delete; |
| 481 NaClBitstreamCursor &operator=(const NaClBitstreamCursor &) = delete; |
| 482 |
| 483 public: |
| 484 NaClBitstreamCursor() : ErrHandler(new ErrorHandler(*this)) { |
| 485 init(nullptr); |
| 486 } |
| 487 |
| 488 explicit NaClBitstreamCursor(NaClBitstreamReader &R) |
| 489 : ErrHandler(new ErrorHandler(*this)) { init(&R); } |
| 490 |
| 491 void init(NaClBitstreamReader *R) { |
| 492 freeState(); |
| 493 BitStream = R; |
| 494 NextChar = (BitStream == nullptr) ? 0 : BitStream->getInitialAddress(); |
| 495 Size = 0; |
| 496 BitsInCurWord = 0; |
| 497 if (BitStream) { |
| 498 BlockScope.push_back( |
| 499 Block(BitStream->getBlockInfo(naclbitc::TOP_LEVEL_BLOCKID))); |
| 500 } |
| 501 } |
| 502 |
| 503 ~NaClBitstreamCursor() { |
| 504 freeState(); |
| 505 } |
| 506 |
| 507 void freeState() { |
| 508 while (!BlockScope.empty()) |
| 509 BlockScope.pop_back(); |
| 510 } |
| 511 |
| 512 // Replaces the current bitstream error handler with the new |
| 513 // handler. Takes ownership of the new handler and deletes it when |
| 514 // it is no longer needed. |
| 515 void setErrorHandler(std::unique_ptr<ErrorHandler> &NewHandler) { |
| 516 ErrHandler = std::move(NewHandler); |
| 517 } |
| 518 |
| 519 bool canSkipToPos(size_t pos) const { |
| 520 // pos can be skipped to if it is a valid address or one byte past the end. |
| 521 return pos == 0 || BitStream->getBitcodeBytes().isValidAddress( |
| 522 static_cast<uint64_t>(pos - 1)); |
| 523 } |
| 524 |
| 525 bool AtEndOfStream() { |
| 526 if (BitsInCurWord != 0) |
| 527 return false; |
| 528 if (Size != 0) |
| 529 return Size == NextChar; |
| 530 fillCurWord(); |
| 531 return BitsInCurWord == 0; |
| 532 } |
| 533 |
| 534 /// Return the number of bits used to encode an abbrev #. |
| 535 unsigned getAbbrevIDWidth() const { |
| 536 return BlockScope.back().getCodeAbbrev().NumBits; |
| 537 } |
| 538 |
| 539 /// Return the bit # of the bit we are reading. |
| 540 uint64_t GetCurrentBitNo() const { |
| 541 return NextChar*CHAR_BIT - BitsInCurWord; |
| 542 } |
| 543 |
| 544 /// Converts the given position into the corresponding Error position. |
| 545 uint64_t getErrorBitNo(uint64_t Position) const { |
| 546 return BitStream->getErrorOffset() * CHAR_BIT + Position; |
| 547 } |
| 548 |
| 549 /// Returns the current bit address for reporting errors. |
| 550 uint64_t getErrorBitNo() const { |
| 551 return getErrorBitNo(GetCurrentBitNo()); |
| 552 } |
| 553 |
| 554 NaClBitstreamReader *getBitStreamReader() { |
| 555 return BitStream; |
| 556 } |
| 557 const NaClBitstreamReader *getBitStreamReader() const { |
| 558 return BitStream; |
| 559 } |
| 560 |
| 561 /// Returns the current bit address (string) of the bit cursor. |
| 562 std::string getCurrentBitAddress() const { |
| 563 return naclbitc::getBitAddress(GetCurrentBitNo()); |
| 564 } |
| 565 |
| 566 /// Flags that modify the behavior of advance(). |
| 567 enum { |
| 568 /// If this flag is used, the advance() method does not automatically pop |
| 569 /// the block scope when the end of a block is reached. |
| 570 AF_DontPopBlockAtEnd = 1, |
| 571 |
| 572 /// If this flag is used, abbrev entries are returned just like normal |
| 573 /// records. |
| 574 AF_DontAutoprocessAbbrevs = 2 |
| 575 }; |
| 576 |
| 577 /// Advance the current bitstream, returning the next entry in the stream. |
| 578 /// Use the given abbreviation listener (if provided). |
| 579 NaClBitstreamEntry advance(unsigned Flags, NaClAbbrevListener *Listener) { |
| 580 while (1) { |
| 581 unsigned Code = ReadCode(); |
| 582 if (Code == naclbitc::END_BLOCK) { |
| 583 // Pop the end of the block unless Flags tells us not to. |
| 584 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) |
| 585 return NaClBitstreamEntry::getError(); |
| 586 return NaClBitstreamEntry::getEndBlock(); |
| 587 } |
| 588 |
| 589 if (Code == naclbitc::ENTER_SUBBLOCK) |
| 590 return NaClBitstreamEntry::getSubBlock(ReadSubBlockID()); |
| 591 |
| 592 if (Code == naclbitc::DEFINE_ABBREV && |
| 593 !(Flags & AF_DontAutoprocessAbbrevs)) { |
| 594 // We read and accumulate abbrev's, the client can't do anything with |
| 595 // them anyway. |
| 596 ReadAbbrevRecord(true, Listener); |
| 597 continue; |
| 598 } |
| 599 |
| 600 return NaClBitstreamEntry::getRecord(Code); |
| 601 } |
| 602 } |
| 603 |
| 604 /// This is a convenience function for clients that don't expect any |
| 605 /// subblocks. This just skips over them automatically. |
| 606 NaClBitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { |
| 607 while (1) { |
| 608 // If we found a normal entry, return it. |
| 609 NaClBitstreamEntry Entry = advance(Flags, 0); |
| 610 if (Entry.Kind != NaClBitstreamEntry::SubBlock) |
| 611 return Entry; |
| 612 |
| 613 // If we found a sub-block, just skip over it and check the next entry. |
| 614 if (SkipBlock()) |
| 615 return NaClBitstreamEntry::getError(); |
| 616 } |
| 617 } |
| 618 |
| 619 /// Returns the starting byte of the word containing BitNo. |
| 620 uintptr_t getStartWordByteForBit(uint64_t BitNo) const { |
| 621 return uintptr_t(BitNo/CHAR_BIT) & ~(sizeof(word_t)-1); |
| 622 } |
| 623 |
| 624 /// Returns the index of BitNo within the word it appears in. |
| 625 unsigned getWordBitNo(uint64_t BitNo) const { |
| 626 return unsigned(BitNo & (sizeof(word_t)*CHAR_BIT-1)); |
| 627 } |
| 628 |
| 629 /// Returns the ending byte of the word containing BitNo. |
| 630 uintptr_t getEndWordByteForBit(uint64_t BitNo) const { |
| 631 return getStartWordByteForBit(BitNo) + |
| 632 (getWordBitNo(BitNo) |
| 633 ? sizeof(word_t) |
| 634 : 0); |
| 635 } |
| 636 |
| 637 /// Fills Buffer[Size] using bytes at Address (in the memory object being |
| 638 /// read). Returns number of bytes filled (less than Size if at end of memory |
| 639 /// object). |
| 640 uint64_t fillBuffer(uint8_t *Buffer, size_t Size, size_t Address) const { |
| 641 return BitStream->getBitcodeBytes().readBytes(Buffer, Size, Address); |
| 642 } |
| 643 |
| 644 /// Reset the stream to the specified bit number. |
| 645 void JumpToBit(uint64_t BitNo) { |
| 646 const uintptr_t ByteNo = getStartWordByteForBit(BitNo); |
| 647 const unsigned WordBitNo = getWordBitNo(BitNo); |
| 648 if (!canSkipToPos(ByteNo)) |
| 649 reportInvalidJumpToBit(BitNo); |
| 650 |
| 651 // Move the cursor to the right word. |
| 652 NextChar = ByteNo; |
| 653 BitsInCurWord = 0; |
| 654 |
| 655 // Skip over any bits that are already consumed. |
| 656 if (WordBitNo) |
| 657 Read(WordBitNo); |
| 658 } |
| 659 |
| 660 void fillCurWord() { |
| 661 assert(Size == 0 || NextChar < (unsigned)Size); |
| 662 |
| 663 // Read the next word from the stream. |
| 664 uint8_t Array[sizeof(word_t)] = {0}; |
| 665 |
| 666 uint64_t BytesRead = fillBuffer(Array, sizeof(Array), NextChar); |
| 667 |
| 668 // If we run out of data, stop at the end of the stream. |
| 669 if (BytesRead == 0) { |
| 670 Size = NextChar; |
| 671 return; |
| 672 } |
| 673 |
| 674 CurWord = |
| 675 support::endian::read<word_t, support::little, support::unaligned>( |
| 676 Array); |
| 677 NextChar += BytesRead; |
| 678 BitsInCurWord = BytesRead * CHAR_BIT; |
| 679 } |
| 680 |
| 681 word_t Read(unsigned NumBits) { |
| 682 static const unsigned BitsInWord = sizeof(word_t) * CHAR_BIT; |
| 683 |
| 684 assert(NumBits && NumBits <= BitsInWord && |
| 685 "Cannot return zero or more than BitsInWord bits!"); |
| 686 |
| 687 static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; |
| 688 |
| 689 // If the field is fully contained by CurWord, return it quickly. |
| 690 if (BitsInCurWord >= NumBits) { |
| 691 word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); |
| 692 |
| 693 // Use a mask to avoid undefined behavior. |
| 694 CurWord >>= (NumBits & Mask); |
| 695 |
| 696 BitsInCurWord -= NumBits; |
| 697 return R; |
| 698 } |
| 699 |
| 700 word_t R = BitsInCurWord ? CurWord : 0; |
| 701 unsigned BitsLeft = NumBits - BitsInCurWord; |
| 702 |
| 703 fillCurWord(); |
| 704 |
| 705 // If we run out of data, stop at the end of the stream. |
| 706 if (BitsLeft > BitsInCurWord) |
| 707 return 0; |
| 708 |
| 709 word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); |
| 710 |
| 711 // Use a mask to avoid undefined behavior. |
| 712 CurWord >>= (BitsLeft & Mask); |
| 713 |
| 714 BitsInCurWord -= BitsLeft; |
| 715 |
| 716 R |= R2 << (NumBits - BitsLeft); |
| 717 |
| 718 return R; |
| 719 } |
| 720 |
| 721 uint32_t ReadVBR(unsigned NumBits) { |
| 722 uint32_t Piece = Read(NumBits); |
| 723 if ((Piece & (1U << (NumBits-1))) == 0) |
| 724 return Piece; |
| 725 |
| 726 uint32_t Result = 0; |
| 727 unsigned NextBit = 0; |
| 728 while (1) { |
| 729 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; |
| 730 |
| 731 if ((Piece & (1U << (NumBits-1))) == 0) |
| 732 return Result; |
| 733 |
| 734 NextBit += NumBits-1; |
| 735 Piece = Read(NumBits); |
| 736 } |
| 737 } |
| 738 |
| 739 // Read a VBR that may have a value up to 64-bits in size. The chunk size of |
| 740 // the VBR must still be <= 32 bits though. |
| 741 uint64_t ReadVBR64(unsigned NumBits) { |
| 742 uint32_t Piece = Read(NumBits); |
| 743 if ((Piece & (1U << (NumBits-1))) == 0) |
| 744 return uint64_t(Piece); |
| 745 |
| 746 uint64_t Result = 0; |
| 747 unsigned NextBit = 0; |
| 748 while (1) { |
| 749 Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; |
| 750 |
| 751 if ((Piece & (1U << (NumBits-1))) == 0) |
| 752 return Result; |
| 753 |
| 754 NextBit += NumBits-1; |
| 755 Piece = Read(NumBits); |
| 756 } |
| 757 } |
| 758 |
| 759 private: |
| 760 void SkipToByteBoundary() { |
| 761 unsigned BitsToSkip = BitsInCurWord % CHAR_BIT; |
| 762 if (BitsToSkip) { |
| 763 CurWord >>= BitsToSkip; |
| 764 BitsInCurWord -= BitsToSkip; |
| 765 } |
| 766 } |
| 767 |
| 768 void SkipToByteBoundaryIfAligned() { |
| 769 if (BitStream->AlignBitcodeRecords) |
| 770 SkipToByteBoundary(); |
| 771 } |
| 772 |
| 773 void SkipToFourByteBoundary() { |
| 774 // If word_t is 64-bits and if we've read less than 32 bits, just dump |
| 775 // the bits we have up to the next 32-bit boundary. |
| 776 if (sizeof(word_t) > 4 && |
| 777 BitsInCurWord >= 32) { |
| 778 CurWord >>= BitsInCurWord-32; |
| 779 BitsInCurWord = 32; |
| 780 return; |
| 781 } |
| 782 |
| 783 BitsInCurWord = 0; |
| 784 } |
| 785 public: |
| 786 |
| 787 unsigned ReadCode() { |
| 788 const NaClBitcodeSelectorAbbrev &CodeAbbrev = |
| 789 BlockScope.back().getCodeAbbrev(); |
| 790 return CodeAbbrev.IsFixed |
| 791 ? Read(CodeAbbrev.NumBits) |
| 792 : ReadVBR(CodeAbbrev.NumBits); |
| 793 } |
| 794 |
| 795 // Block header: |
| 796 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] |
| 797 |
| 798 /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. |
| 799 unsigned ReadSubBlockID() { |
| 800 return ReadVBR(naclbitc::BlockIDWidth); |
| 801 } |
| 802 |
| 803 /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body |
| 804 /// of this block. If the block record is malformed, return true. |
| 805 bool SkipBlock() { |
| 806 // Read and ignore the codelen value. Since we are skipping this block, we |
| 807 // don't care what code widths are used inside of it. |
| 808 ReadVBR(naclbitc::CodeLenWidth); |
| 809 SkipToFourByteBoundary(); |
| 810 unsigned NumFourBytes = Read(naclbitc::BlockSizeWidth); |
| 811 |
| 812 // Check that the block wasn't partially defined, and that the offset isn't |
| 813 // bogus. |
| 814 size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*CHAR_BIT; |
| 815 if (AtEndOfStream() || !canSkipToPos(SkipTo/CHAR_BIT)) |
| 816 return true; |
| 817 |
| 818 JumpToBit(SkipTo); |
| 819 return false; |
| 820 } |
| 821 |
| 822 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true |
| 823 /// if the block has an error. |
| 824 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); |
| 825 |
| 826 bool ReadBlockEnd() { |
| 827 if (BlockScope.empty()) return true; |
| 828 |
| 829 // Block tail: |
| 830 // [END_BLOCK, <align4bytes>] |
| 831 SkipToFourByteBoundary(); |
| 832 |
| 833 BlockScope.pop_back(); |
| 834 return false; |
| 835 } |
| 836 |
| 837 private: |
| 838 |
| 839 //===--------------------------------------------------------------------===// |
| 840 // Record Processing |
| 841 //===--------------------------------------------------------------------===// |
| 842 |
| 843 private: |
| 844 // Returns abbreviation encoding associated with Value. |
| 845 NaClBitCodeAbbrevOp::Encoding getEncoding(uint64_t Value); |
| 846 |
| 847 void skipAbbreviatedField(const NaClBitCodeAbbrevOp &Op); |
| 848 |
| 849 // Reads the next Value using the abbreviation Op. Returns true only |
| 850 // if Op is an array (and sets Value to the number of elements in the |
| 851 // array). |
| 852 inline bool readRecordAbbrevField(const NaClBitCodeAbbrevOp &Op, |
| 853 uint64_t &Value); |
| 854 |
| 855 // Reads and returns the next value using the abbreviation Op, |
| 856 // assuming Op appears after an array abbreviation. |
| 857 inline uint64_t readArrayAbbreviatedField(const NaClBitCodeAbbrevOp &Op); |
| 858 |
| 859 // Reads the array abbreviation Op, NumArrayElements times, putting |
| 860 // the read values in Vals. |
| 861 inline void readArrayAbbrev(const NaClBitCodeAbbrevOp &Op, |
| 862 unsigned NumArrayElements, |
| 863 SmallVectorImpl<uint64_t> &Vals); |
| 864 |
| 865 // Reports that that abbreviation Index is not valid. |
| 866 void reportInvalidAbbrevNumber(unsigned Index) const; |
| 867 |
| 868 // Reports that jumping to Bit is not valid. |
| 869 void reportInvalidJumpToBit(uint64_t Bit) const; |
| 870 |
| 871 public: |
| 872 |
| 873 /// Return the abbreviation for the specified AbbrevId. |
| 874 const NaClBitCodeAbbrev *getAbbrev(unsigned AbbrevID) const { |
| 875 unsigned AbbrevNo = AbbrevID-naclbitc::FIRST_APPLICATION_ABBREV; |
| 876 const Block &CurBlock = BlockScope.back(); |
| 877 const unsigned NumGlobalAbbrevs = CurBlock.getNumGlobalAbbrevs(); |
| 878 if (AbbrevNo < NumGlobalAbbrevs) |
| 879 return CurBlock.getGlobalAbbrevs().getVector()[AbbrevNo]; |
| 880 unsigned LocalAbbrevNo = AbbrevNo - NumGlobalAbbrevs; |
| 881 NaClBitstreamReader::AbbrevListVector |
| 882 LocalAbbrevs = CurBlock.getLocalAbbrevs().getVector(); |
| 883 if (LocalAbbrevNo >= LocalAbbrevs.size()) |
| 884 reportInvalidAbbrevNumber(AbbrevID); |
| 885 return LocalAbbrevs[LocalAbbrevNo]; |
| 886 } |
| 887 |
| 888 /// Read the current record and discard it. |
| 889 void skipRecord(unsigned AbbrevID); |
| 890 |
| 891 unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals); |
| 892 |
| 893 //===--------------------------------------------------------------------===// |
| 894 // Abbrev Processing |
| 895 //===--------------------------------------------------------------------===// |
| 896 // IsLocal indicates where the abbreviation occurs. If it is in the |
| 897 // BlockInfo block, IsLocal is false. In all other cases, IsLocal is |
| 898 // true. |
| 899 void ReadAbbrevRecord(bool IsLocal, |
| 900 NaClAbbrevListener *Listener); |
| 901 |
| 902 // Skips over an abbreviation record. Duplicates code of ReadAbbrevRecord, |
| 903 // except that no abbreviation is built. |
| 904 void SkipAbbrevRecord(); |
| 905 |
| 906 bool ReadBlockInfoBlock(NaClAbbrevListener *Listener); |
| 907 }; |
| 908 |
| 909 } // End llvm namespace |
| 910 |
| 911 #endif |
OLD | NEW |