 Chromium Code Reviews
 Chromium Code Reviews Issue 2314663002:
  Rework scanner-character-streams.  (Closed)
    
  
    Issue 2314663002:
  Rework scanner-character-streams.  (Closed) 
  | Index: src/parsing/scanner.h | 
| diff --git a/src/parsing/scanner.h b/src/parsing/scanner.h | 
| index cb37f7cc1a5dd7025579d03902a789c6c1e530bb..d4b43931222ffc655e9e9b2c2f8d8e86e12556ea 100644 | 
| --- a/src/parsing/scanner.h | 
| +++ b/src/parsing/scanner.h | 
| @@ -23,77 +23,127 @@ namespace internal { | 
| class AstRawString; | 
| class AstValueFactory; | 
| class DuplicateFinder; | 
| +class ExternalOneByteString; | 
| +class ExternalTwoByteString; | 
| class ParserRecorder; | 
| class UnicodeCache; | 
| - | 
| // --------------------------------------------------------------------- | 
| // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer. | 
| // A code unit is a 16 bit value representing either a 16 bit code point | 
| // or one part of a surrogate pair that make a single 21 bit code point. | 
| - | 
| class Utf16CharacterStream { | 
| public: | 
| - Utf16CharacterStream() : pos_(0) { } | 
| + static const uc32 kEndOfInput = -1; | 
| + | 
| virtual ~Utf16CharacterStream() { } | 
| // Returns and advances past the next UTF-16 code unit in the input | 
| - // stream. If there are no more code units, it returns a negative | 
| - // value. | 
| + // stream. If there are no more code units it returns kEndOfInput. | 
| inline uc32 Advance() { | 
| - if (buffer_cursor_ < buffer_end_ || ReadBlock()) { | 
| - pos_++; | 
| + if (V8_LIKELY(buffer_cursor_ < buffer_end_)) { | 
| return static_cast<uc32>(*(buffer_cursor_++)); | 
| + } else if (ReadBlock()) { | 
| + return static_cast<uc32>(*(buffer_cursor_++)); | 
| + } else { | 
| + // Note: currently the following increment is necessary to avoid a | 
| + // parser problem! The scanner treats the final kEndOfInput as | 
| + // a code unit with a position, and does math relative to that | 
| + // position. | 
| + buffer_cursor_++; | 
| 
marja
2016/09/08 09:46:23
Hmm, I commented on this before, right? foo++ / fo
 
vogelheim
2016/09/08 15:02:18
Why?
[Brief discussion in previous round of answe
 | 
| + return kEndOfInput; | 
| } | 
| - // Note: currently the following increment is necessary to avoid a | 
| - // parser problem! The scanner treats the final kEndOfInput as | 
| - // a code unit with a position, and does math relative to that | 
| - // position. | 
| - pos_++; | 
| - | 
| - return kEndOfInput; | 
| - } | 
| - | 
| - // Return the current position in the code unit stream. | 
| - // Starts at zero. | 
| - inline size_t pos() const { return pos_; } | 
| - | 
| - // Skips forward past the next code_unit_count UTF-16 code units | 
| - // in the input, or until the end of input if that comes sooner. | 
| - // Returns the number of code units actually skipped. If less | 
| - // than code_unit_count, | 
| - inline size_t SeekForward(size_t code_unit_count) { | 
| - size_t buffered_chars = buffer_end_ - buffer_cursor_; | 
| - if (code_unit_count <= buffered_chars) { | 
| - buffer_cursor_ += code_unit_count; | 
| - pos_ += code_unit_count; | 
| - return code_unit_count; | 
| + } | 
| + | 
| + inline void Back() { | 
| + if (V8_LIKELY(buffer_cursor_ > buffer_start_)) { | 
| + buffer_cursor_--; | 
| + } else { | 
| + ReadBlockAt(pos() - 1); | 
| + } | 
| + } | 
| + | 
| + inline void Back2() { | 
| + if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) { | 
| + buffer_cursor_ -= 2; | 
| + } else { | 
| + ReadBlockAt(pos() - 2); | 
| } | 
| - return SlowSeekForward(code_unit_count); | 
| } | 
| - // Pushes back the most recently read UTF-16 code unit (or negative | 
| - // value if at end of input), i.e., the value returned by the most recent | 
| - // call to Advance. | 
| - // Must not be used right after calling SeekForward. | 
| - virtual void PushBack(int32_t code_unit) = 0; | 
| + inline size_t pos() const { | 
| + return buffer_pos_ + (buffer_cursor_ - buffer_start_); | 
| + } | 
| - virtual bool SetBookmark(); | 
| - virtual void ResetToBookmark(); | 
| + inline void Seek(size_t pos) { | 
| + if (V8_LIKELY(pos >= buffer_pos_ && | 
| + pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) { | 
| + buffer_cursor_ = buffer_start_ + (pos - buffer_pos_); | 
| + } else { | 
| + ReadBlockAt(pos); | 
| + } | 
| + } | 
| + | 
| + // Legacy API: | 
| + void SeekForward(size_t code_unit_count) { Seek(pos() + code_unit_count); } | 
| + void PushBack(int32_t code_unit) { | 
| + Back(); | 
| +#ifdef DEBUG | 
| + uc32 t = Advance(); | 
| + DCHECK_EQ(t, code_unit); | 
| + Back(); | 
| +#endif // DEBUG | 
| + } | 
| + void PushBack2(int32_t code_unit_back_1, int32_t code_unit_back_2) { | 
| + Back2(); | 
| +#ifdef DEBUG | 
| + DCHECK_EQ(Advance(), code_unit_back_2); | 
| + DCHECK_EQ(Advance(), code_unit_back_1); | 
| + Back2(); | 
| +#endif // DEBUG | 
| + } | 
| + bool SetBookmark() { | 
| + bookmark_ = pos(); | 
| + return true; | 
| + } | 
| + void ResetToBookmark() { | 
| + DCHECK(bookmark_ != (size_t)-1); | 
| + Seek(bookmark_); | 
| + } | 
| protected: | 
| - static const uc32 kEndOfInput = -1; | 
| + Utf16CharacterStream(const uint16_t* buffer_start, | 
| + const uint16_t* buffer_cursor, | 
| + const uint16_t* buffer_end, size_t buffer_pos) | 
| + : buffer_start_(buffer_start), | 
| + buffer_cursor_(buffer_cursor), | 
| + buffer_end_(buffer_end), | 
| + buffer_pos_(buffer_pos), | 
| + bookmark_((size_t)-1) {} | 
| + Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {} | 
| + | 
| + void ReadBlockAt(size_t new_pos) { | 
| + // The callers of this method (Back/Back2/Seek) should handle the easy | 
| + // case (seeking within the current buffer), and we should only get here | 
| + // if we actually require new data. | 
| + // (This is really an efficiency check, not a correctness invariant.) | 
| + DCHECK(new_pos < buffer_pos_ || | 
| + new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_)); | 
| + buffer_pos_ = new_pos; | 
| + buffer_cursor_ = buffer_start_; | 
| + ReadBlock(); | 
| + } | 
| // Ensures that the buffer_cursor_ points to the code_unit at | 
| - // position pos_ of the input, if possible. If the position | 
| - // is at or after the end of the input, return false. If there | 
| - // are more code_units available, return true. | 
| + // position pos() of the input. Returns true if data is available; false if | 
| + // pos() is at (or after) the end of input | 
| virtual bool ReadBlock() = 0; | 
| - virtual size_t SlowSeekForward(size_t code_unit_count) = 0; | 
| + const uint16_t* buffer_start_; | 
| const uint16_t* buffer_cursor_; | 
| const uint16_t* buffer_end_; | 
| - size_t pos_; | 
| + size_t buffer_pos_; | 
| + size_t bookmark_; | 
| }; | 
| @@ -138,6 +188,7 @@ class Scanner { | 
| // -1 is outside of the range of any real source code. | 
| static const int kNoOctalLocation = -1; | 
| + static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput; | 
| explicit Scanner(UnicodeCache* scanner_contants); |