| Index: src/parsing/scanner.h
|
| diff --git a/src/parsing/scanner.h b/src/parsing/scanner.h
|
| index cb37f7cc1a5dd7025579d03902a789c6c1e530bb..7d8f052558ae6e64adc6633012cf9895636bd7eb 100644
|
| --- a/src/parsing/scanner.h
|
| +++ b/src/parsing/scanner.h
|
| @@ -23,77 +23,157 @@ namespace internal {
|
| class AstRawString;
|
| class AstValueFactory;
|
| class DuplicateFinder;
|
| +class ExternalOneByteString;
|
| +class ExternalTwoByteString;
|
| class ParserRecorder;
|
| class UnicodeCache;
|
|
|
| -
|
| // ---------------------------------------------------------------------
|
| // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
|
| // A code unit is a 16 bit value representing either a 16 bit code point
|
| // or one part of a surrogate pair that make a single 21 bit code point.
|
| -
|
| class Utf16CharacterStream {
|
| public:
|
| - Utf16CharacterStream() : pos_(0) { }
|
| + static const uc32 kEndOfInput = -1;
|
| +
|
| virtual ~Utf16CharacterStream() { }
|
|
|
| // Returns and advances past the next UTF-16 code unit in the input
|
| - // stream. If there are no more code units, it returns a negative
|
| - // value.
|
| + // stream. If there are no more code units it returns kEndOfInput.
|
| inline uc32 Advance() {
|
| - if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
|
| - pos_++;
|
| + if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
|
| return static_cast<uc32>(*(buffer_cursor_++));
|
| + } else if (ReadBlock()) {
|
| + return static_cast<uc32>(*(buffer_cursor_++));
|
| + } else {
|
| + // Note: currently the following increment is necessary to avoid a
|
| + // parser problem! The scanner treats the final kEndOfInput as
|
| + // a code unit with a position, and does math relative to that
|
| + // position.
|
| + buffer_cursor_++;
|
| + return kEndOfInput;
|
| }
|
| - // Note: currently the following increment is necessary to avoid a
|
| - // parser problem! The scanner treats the final kEndOfInput as
|
| - // a code unit with a position, and does math relative to that
|
| - // position.
|
| - pos_++;
|
| -
|
| - return kEndOfInput;
|
| - }
|
| -
|
| - // Return the current position in the code unit stream.
|
| - // Starts at zero.
|
| - inline size_t pos() const { return pos_; }
|
| -
|
| - // Skips forward past the next code_unit_count UTF-16 code units
|
| - // in the input, or until the end of input if that comes sooner.
|
| - // Returns the number of code units actually skipped. If less
|
| - // than code_unit_count,
|
| - inline size_t SeekForward(size_t code_unit_count) {
|
| - size_t buffered_chars = buffer_end_ - buffer_cursor_;
|
| - if (code_unit_count <= buffered_chars) {
|
| - buffer_cursor_ += code_unit_count;
|
| - pos_ += code_unit_count;
|
| - return code_unit_count;
|
| + }
|
| +
|
| + // Go back one by one character in the input stream.
|
| + // This undoes the most recent Advance().
|
| + inline void Back() {
|
| + // The common case - if the previous character is within
|
| + // buffer_start_ .. buffer_end_ will be handles locally.
|
| + // Otherwise, a new block is requested.
|
| + if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
|
| + buffer_cursor_--;
|
| + } else {
|
| + ReadBlockAt(pos() - 1);
|
| }
|
| - return SlowSeekForward(code_unit_count);
|
| }
|
|
|
| - // Pushes back the most recently read UTF-16 code unit (or negative
|
| - // value if at end of input), i.e., the value returned by the most recent
|
| - // call to Advance.
|
| - // Must not be used right after calling SeekForward.
|
| - virtual void PushBack(int32_t code_unit) = 0;
|
| + // Go back one by two characters in the input stream. (This is the same as
|
| + // calling Back() twice. But Back() may - in some instances - do substantial
|
| + // work. Back2() guarantees this work will be done only once.)
|
| + inline void Back2() {
|
| + if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) {
|
| + buffer_cursor_ -= 2;
|
| + } else {
|
| + ReadBlockAt(pos() - 2);
|
| + }
|
| + }
|
|
|
| - virtual bool SetBookmark();
|
| - virtual void ResetToBookmark();
|
| + inline size_t pos() const {
|
| + return buffer_pos_ + (buffer_cursor_ - buffer_start_);
|
| + }
|
|
|
| - protected:
|
| - static const uc32 kEndOfInput = -1;
|
| + inline void Seek(size_t pos) {
|
| + if (V8_LIKELY(pos >= buffer_pos_ &&
|
| + pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
|
| + buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
|
| + } else {
|
| + ReadBlockAt(pos);
|
| + }
|
| + }
|
| +
|
| + // Legacy API:
|
| + void SeekForward(size_t code_unit_count) { Seek(pos() + code_unit_count); }
|
| + void PushBack(int32_t code_unit) {
|
| + Back();
|
| +#ifdef DEBUG
|
| + uc32 t = Advance();
|
| + DCHECK_EQ(t, code_unit);
|
| + Back();
|
| +#endif // DEBUG
|
| + }
|
| + void PushBack2(int32_t code_unit_back_1, int32_t code_unit_back_2) {
|
| + Back2();
|
| +#ifdef DEBUG
|
| + DCHECK_EQ(Advance(), code_unit_back_2);
|
| + DCHECK_EQ(Advance(), code_unit_back_1);
|
| + Back2();
|
| +#endif // DEBUG
|
| + }
|
| + bool SetBookmark() {
|
| + bookmark_ = pos();
|
| + return true;
|
| + }
|
| + void ResetToBookmark() {
|
| + DCHECK_NE(bookmark_, kNoBookmark);
|
| + Seek(bookmark_);
|
| + }
|
|
|
| - // Ensures that the buffer_cursor_ points to the code_unit at
|
| - // position pos_ of the input, if possible. If the position
|
| - // is at or after the end of the input, return false. If there
|
| - // are more code_units available, return true.
|
| + protected:
|
| + static const size_t kNoBookmark;
|
| +
|
| + Utf16CharacterStream(const uint16_t* buffer_start,
|
| + const uint16_t* buffer_cursor,
|
| + const uint16_t* buffer_end, size_t buffer_pos)
|
| + : buffer_start_(buffer_start),
|
| + buffer_cursor_(buffer_cursor),
|
| + buffer_end_(buffer_end),
|
| + buffer_pos_(buffer_pos),
|
| + bookmark_(kNoBookmark) {}
|
| + Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}
|
| +
|
| + void ReadBlockAt(size_t new_pos) {
|
| + // The callers of this method (Back/Back2/Seek) should handle the easy
|
| + // case (seeking within the current buffer), and we should only get here
|
| + // if we actually require new data.
|
| + // (This is really an efficiency check, not a correctness invariant.)
|
| + DCHECK(new_pos < buffer_pos_ ||
|
| + new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
|
| +
|
| + // Change pos() to point to new_pos.
|
| + buffer_pos_ = new_pos;
|
| + buffer_cursor_ = buffer_start_;
|
| + bool success = ReadBlock();
|
| + USE(success);
|
| +
|
| + // Post-conditions: 1, on success, we should be at the right position.
|
| + // 2, success == we should have more characters available.
|
| + DCHECK_IMPLIES(success, pos() == new_pos);
|
| + DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
|
| + DCHECK_EQ(success, buffer_start_ < buffer_end_);
|
| + }
|
| +
|
| + // Read more data, and update buffer_*_ to point to it.
|
| + // Returns true if more data was available.
|
| + //
|
| + // ReadBlock() may modify any of the buffer_*_ members, but must sure that
|
| + // the result of pos() remains unaffected.
|
| + //
|
| + // Examples:
|
| + // - a stream could either fill a separate buffer. Then buffer_start_ and
|
| + // buffer_cursor_ would point to the beginning of the buffer, and
|
| + // buffer_pos would be the old pos().
|
| + // - a stream with existing buffer chunks would set buffer_start_ and
|
| + // buffer_end_ to cover the full chunk, and then buffer_cursor_ would
|
| + // point into the middle of the buffer, while buffer_pos_ would describe
|
| + // the start of the buffer.
|
| virtual bool ReadBlock() = 0;
|
| - virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
|
|
|
| + const uint16_t* buffer_start_;
|
| const uint16_t* buffer_cursor_;
|
| const uint16_t* buffer_end_;
|
| - size_t pos_;
|
| + size_t buffer_pos_;
|
| + size_t bookmark_;
|
| };
|
|
|
|
|
| @@ -138,6 +218,7 @@ class Scanner {
|
|
|
| // -1 is outside of the range of any real source code.
|
| static const int kNoOctalLocation = -1;
|
| + static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
|
|
|
| explicit Scanner(UnicodeCache* scanner_contants);
|
|
|
|
|