Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #ifndef V8_PARSING_SCANNER_H_ | 7 #ifndef V8_PARSING_SCANNER_H_ |
| 8 #define V8_PARSING_SCANNER_H_ | 8 #define V8_PARSING_SCANNER_H_ |
| 9 | 9 |
| 10 #include "src/allocation.h" | 10 #include "src/allocation.h" |
| 11 #include "src/base/logging.h" | 11 #include "src/base/logging.h" |
| 12 #include "src/char-predicates.h" | 12 #include "src/char-predicates.h" |
| 13 #include "src/globals.h" | 13 #include "src/globals.h" |
| 14 #include "src/messages.h" | 14 #include "src/messages.h" |
| 15 #include "src/parsing/token.h" | 15 #include "src/parsing/token.h" |
| 16 #include "src/unicode-decoder.h" | 16 #include "src/unicode-decoder.h" |
| 17 #include "src/unicode.h" | 17 #include "src/unicode.h" |
| 18 | 18 |
| 19 namespace v8 { | 19 namespace v8 { |
| 20 namespace internal { | 20 namespace internal { |
| 21 | 21 |
| 22 | 22 |
| 23 class AstRawString; | 23 class AstRawString; |
| 24 class AstValueFactory; | 24 class AstValueFactory; |
| 25 class DuplicateFinder; | 25 class DuplicateFinder; |
| 26 class ExternalOneByteString; | |
| 27 class ExternalTwoByteString; | |
| 26 class ParserRecorder; | 28 class ParserRecorder; |
| 27 class UnicodeCache; | 29 class UnicodeCache; |
| 28 | 30 |
| 29 | |
| 30 // --------------------------------------------------------------------- | 31 // --------------------------------------------------------------------- |
| 31 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer. | 32 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer. |
| 32 // A code unit is a 16 bit value representing either a 16 bit code point | 33 // A code unit is a 16 bit value representing either a 16 bit code point |
| 33 // or one part of a surrogate pair that make a single 21 bit code point. | 34 // or one part of a surrogate pair that make a single 21 bit code point. |
| 34 | |
| 35 class Utf16CharacterStream { | 35 class Utf16CharacterStream { |
| 36 public: | 36 public: |
| 37 Utf16CharacterStream() : pos_(0) { } | 37 static const uc32 kEndOfInput = -1; |
| 38 | |
| 38 virtual ~Utf16CharacterStream() { } | 39 virtual ~Utf16CharacterStream() { } |
| 39 | 40 |
| 40 // Returns and advances past the next UTF-16 code unit in the input | 41 // Returns and advances past the next UTF-16 code unit in the input |
| 41 // stream. If there are no more code units, it returns a negative | 42 // stream. If there are no more code units it returns kEndOfInput. |
| 42 // value. | |
| 43 inline uc32 Advance() { | 43 inline uc32 Advance() { |
| 44 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { | 44 if (V8_LIKELY(buffer_cursor_ < buffer_end_)) { |
| 45 pos_++; | |
| 46 return static_cast<uc32>(*(buffer_cursor_++)); | 45 return static_cast<uc32>(*(buffer_cursor_++)); |
| 46 } else if (ReadBlock()) { | |
|
nickie
2016/09/07 13:28:28
I suppose you did not like how it was before:
if (
vogelheim
2016/09/08 13:09:02
I liked it, but: V8_LIKELY supplies information ab
| |
| 47 return static_cast<uc32>(*(buffer_cursor_++)); | |
| 48 } else { | |
| 49 // Note: currently the following increment is necessary to avoid a | |
| 50 // parser problem! The scanner treats the final kEndOfInput as | |
| 51 // a code unit with a position, and does math relative to that | |
| 52 // position. | |
| 53 buffer_cursor_++; | |
| 54 return kEndOfInput; | |
| 47 } | 55 } |
| 48 // Note: currently the following increment is necessary to avoid a | |
| 49 // parser problem! The scanner treats the final kEndOfInput as | |
| 50 // a code unit with a position, and does math relative to that | |
| 51 // position. | |
| 52 pos_++; | |
| 53 | |
| 54 return kEndOfInput; | |
| 55 } | 56 } |
| 56 | 57 |
| 57 // Return the current position in the code unit stream. | 58 inline void Back() { |
|
nickie
2016/09/07 13:28:28
A comment explaining what this should do would be
vogelheim
2016/09/08 13:09:03
Done.
| |
| 58 // Starts at zero. | 59 if (V8_LIKELY(buffer_cursor_ > buffer_start_)) { |
| 59 inline size_t pos() const { return pos_; } | 60 buffer_cursor_--; |
|
marja
2016/09/07 09:17:57
Nit: --buffer_cursor_;
vogelheim
2016/09/08 13:09:02
Why? [Here and elsewhere.]
------
I find the pre
| |
| 60 | 61 } else { |
| 61 // Skips forward past the next code_unit_count UTF-16 code units | 62 ReadBlockAt(pos() - 1); |
|
nickie
2016/09/07 13:28:28
We discussed this offline. This can make a series
vogelheim
2016/09/08 13:09:02
That's almost the same as Seek. :) The Scanner on
| |
| 62 // in the input, or until the end of input if that comes sooner. | |
| 63 // Returns the number of code units actually skipped. If less | |
| 64 // than code_unit_count, | |
| 65 inline size_t SeekForward(size_t code_unit_count) { | |
| 66 size_t buffered_chars = buffer_end_ - buffer_cursor_; | |
| 67 if (code_unit_count <= buffered_chars) { | |
| 68 buffer_cursor_ += code_unit_count; | |
| 69 pos_ += code_unit_count; | |
| 70 return code_unit_count; | |
| 71 } | 63 } |
| 72 return SlowSeekForward(code_unit_count); | |
| 73 } | 64 } |
| 74 | 65 |
| 75 // Pushes back the most recently read UTF-16 code unit (or negative | 66 inline size_t pos() const { |
| 76 // value if at end of input), i.e., the value returned by the most recent | 67 return buffer_pos_ + (buffer_cursor_ - buffer_start_); |
|
nickie
2016/09/07 13:28:28
This calculation is quite expensive and the pos()
vogelheim
2016/09/08 13:09:02
Hmm. I think I should try it both ways.
(My thin
| |
| 77 // call to Advance. | 68 } |
| 78 // Must not be used right after calling SeekForward. | |
| 79 virtual void PushBack(int32_t code_unit) = 0; | |
| 80 | 69 |
| 81 virtual bool SetBookmark(); | 70 inline void Seek(size_t pos) { |
| 82 virtual void ResetToBookmark(); | 71 if (V8_LIKELY(pos >= buffer_pos_ && |
| 72 pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) { | |
| 73 buffer_cursor_ = buffer_start_ + (pos - buffer_pos_); | |
| 74 } else { | |
| 75 ReadBlockAt(pos); | |
| 76 } | |
| 77 } | |
| 78 | |
| 79 // Legacy API: | |
| 80 void SeekForward(size_t code_unit_count) { Seek(pos() + code_unit_count); } | |
| 81 void PushBack(int32_t code_unit) { | |
| 82 Back(); | |
| 83 #ifdef DEBUG | |
| 84 uc32 t = Advance(); | |
| 85 DCHECK_EQ(t, code_unit); | |
| 86 Back(); | |
|
nickie
2016/09/07 13:28:28
Why not simply this?
DCHECK_EQ(code_unit, static_c
| |
| 87 #endif // DEBUG | |
| 88 } | |
| 89 bool SetBookmark() { | |
|
nickie
2016/09/07 13:28:28
I don't understand why the bookmark should be stor
vogelheim
2016/09/08 13:09:03
You're exactly right: The bookmark doesn't belong
| |
| 90 bookmark_ = pos(); | |
| 91 return true; | |
| 92 } | |
| 93 void ResetToBookmark() { | |
| 94 DCHECK(bookmark_ != (size_t)-1); | |
|
nickie
2016/09/07 13:28:28
How about something like this? (with a better nam
vogelheim
2016/09/08 13:09:03
Done.
| |
| 95 Seek(bookmark_); | |
| 96 } | |
| 83 | 97 |
| 84 protected: | 98 protected: |
| 85 static const uc32 kEndOfInput = -1; | 99 Utf16CharacterStream(const uint16_t* buffer_start, |
| 100 const uint16_t* buffer_cursor, | |
| 101 const uint16_t* buffer_end, size_t buffer_pos) | |
| 102 : buffer_start_(buffer_start), | |
| 103 buffer_cursor_(buffer_cursor), | |
| 104 buffer_end_(buffer_end), | |
| 105 buffer_pos_(buffer_pos), | |
| 106 bookmark_((size_t)-1) {} | |
|
nickie
2016/09/07 13:28:28
Again, NO_BOOKMARK here.
vogelheim
2016/09/08 13:09:03
Done.
| |
| 107 Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {} | |
| 108 | |
| 109 void ReadBlockAt(size_t new_pos) { | |
| 110 // This shouldn't ever be called if new_pos is inside the current buffer. | |
| 111 DCHECK(new_pos < buffer_pos_ || | |
| 112 new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_)); | |
| 113 buffer_pos_ = new_pos; | |
| 114 buffer_cursor_ = buffer_start_; | |
|
nickie
2016/09/07 13:28:28
I don't understand this. Maybe related to my next
vogelheim
2016/09/08 13:09:02
See below.
This wants to set the position for Rea
| |
| 115 ReadBlock(); | |
| 116 } | |
| 86 | 117 |
| 87 // Ensures that the buffer_cursor_ points to the code_unit at | 118 // Ensures that the buffer_cursor_ points to the code_unit at |
| 88 // position pos_ of the input, if possible. If the position | 119 // position pos() of the input. Returns true if data is available; false if |
|
nickie
2016/09/07 13:28:29
How can this not be true?
pos() is calculated in s
vogelheim
2016/09/08 13:09:02
It's meant to explain the post-condition for ReadB
| |
| 89 // is at or after the end of the input, return false. If there | 120 // pos() is at (or after) the end of input |
| 90 // are more code_units available, return true. | |
| 91 virtual bool ReadBlock() = 0; | 121 virtual bool ReadBlock() = 0; |
| 92 virtual size_t SlowSeekForward(size_t code_unit_count) = 0; | |
| 93 | 122 |
| 123 const uint16_t* buffer_start_; | |
| 94 const uint16_t* buffer_cursor_; | 124 const uint16_t* buffer_cursor_; |
| 95 const uint16_t* buffer_end_; | 125 const uint16_t* buffer_end_; |
| 96 size_t pos_; | 126 size_t buffer_pos_; |
| 127 size_t bookmark_; | |
| 97 }; | 128 }; |
| 98 | 129 |
| 99 | 130 |
| 100 // ---------------------------------------------------------------------------- | 131 // ---------------------------------------------------------------------------- |
| 101 // JavaScript Scanner. | 132 // JavaScript Scanner. |
| 102 | 133 |
| 103 class Scanner { | 134 class Scanner { |
| 104 public: | 135 public: |
| 105 // Scoped helper for a re-settable bookmark. | 136 // Scoped helper for a re-settable bookmark. |
| 106 class BookmarkScope { | 137 class BookmarkScope { |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 131 } | 162 } |
| 132 | 163 |
| 133 static Location invalid() { return Location(-1, -1); } | 164 static Location invalid() { return Location(-1, -1); } |
| 134 | 165 |
| 135 int beg_pos; | 166 int beg_pos; |
| 136 int end_pos; | 167 int end_pos; |
| 137 }; | 168 }; |
| 138 | 169 |
| 139 // -1 is outside of the range of any real source code. | 170 // -1 is outside of the range of any real source code. |
| 140 static const int kNoOctalLocation = -1; | 171 static const int kNoOctalLocation = -1; |
| 172 static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput; | |
| 141 | 173 |
| 142 explicit Scanner(UnicodeCache* scanner_contants); | 174 explicit Scanner(UnicodeCache* scanner_contants); |
| 143 | 175 |
| 144 void Initialize(Utf16CharacterStream* source); | 176 void Initialize(Utf16CharacterStream* source); |
| 145 | 177 |
| 146 // Returns the next token and advances input. | 178 // Returns the next token and advances input. |
| 147 Token::Value Next(); | 179 Token::Value Next(); |
| 148 // Returns the token following peek() | 180 // Returns the token following peek() |
| 149 Token::Value PeekAhead(); | 181 Token::Value PeekAhead(); |
| 150 // Returns the current token again. | 182 // Returns the current token again. |
| (...skipping 641 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 792 bool found_html_comment_; | 824 bool found_html_comment_; |
| 793 | 825 |
| 794 MessageTemplate::Template scanner_error_; | 826 MessageTemplate::Template scanner_error_; |
| 795 Location scanner_error_location_; | 827 Location scanner_error_location_; |
| 796 }; | 828 }; |
| 797 | 829 |
| 798 } // namespace internal | 830 } // namespace internal |
| 799 } // namespace v8 | 831 } // namespace v8 |
| 800 | 832 |
| 801 #endif // V8_PARSING_SCANNER_H_ | 833 #endif // V8_PARSING_SCANNER_H_ |
| OLD | NEW |