Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(411)

Side by Side Diff: src/parsing/scanner.h

Issue 2314663002: Rework scanner-character-streams. (Closed)
Patch Set: Niko's feedback and fix compile even harder Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #ifndef V8_PARSING_SCANNER_H_ 7 #ifndef V8_PARSING_SCANNER_H_
8 #define V8_PARSING_SCANNER_H_ 8 #define V8_PARSING_SCANNER_H_
9 9
10 #include "src/allocation.h" 10 #include "src/allocation.h"
11 #include "src/base/logging.h" 11 #include "src/base/logging.h"
12 #include "src/char-predicates.h" 12 #include "src/char-predicates.h"
13 #include "src/globals.h" 13 #include "src/globals.h"
14 #include "src/messages.h" 14 #include "src/messages.h"
15 #include "src/parsing/token.h" 15 #include "src/parsing/token.h"
16 #include "src/unicode-decoder.h" 16 #include "src/unicode-decoder.h"
17 #include "src/unicode.h" 17 #include "src/unicode.h"
18 18
19 namespace v8 { 19 namespace v8 {
20 namespace internal { 20 namespace internal {
21 21
22 22
23 class AstRawString; 23 class AstRawString;
24 class AstValueFactory; 24 class AstValueFactory;
25 class DuplicateFinder; 25 class DuplicateFinder;
26 class ExternalOneByteString;
27 class ExternalTwoByteString;
26 class ParserRecorder; 28 class ParserRecorder;
27 class UnicodeCache; 29 class UnicodeCache;
28 30
29
30 // --------------------------------------------------------------------- 31 // ---------------------------------------------------------------------
31 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer. 32 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
32 // A code unit is a 16 bit value representing either a 16 bit code point 33 // A code unit is a 16 bit value representing either a 16 bit code point
33 // or one part of a surrogate pair that make a single 21 bit code point. 34 // or one part of a surrogate pair that make a single 21 bit code point.
34
35 class Utf16CharacterStream { 35 class Utf16CharacterStream {
36 public: 36 public:
37 Utf16CharacterStream() : pos_(0) { } 37 static const uc32 kEndOfInput = -1;
38
38 virtual ~Utf16CharacterStream() { } 39 virtual ~Utf16CharacterStream() { }
39 40
40 // Returns and advances past the next UTF-16 code unit in the input 41 // Returns and advances past the next UTF-16 code unit in the input
41 // stream. If there are no more code units, it returns a negative 42 // stream. If there are no more code units it returns kEndOfInput.
42 // value.
43 inline uc32 Advance() { 43 inline uc32 Advance() {
44 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { 44 if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
45 pos_++;
46 return static_cast<uc32>(*(buffer_cursor_++)); 45 return static_cast<uc32>(*(buffer_cursor_++));
46 } else if (ReadBlock()) {
47 return static_cast<uc32>(*(buffer_cursor_++));
48 } else {
49 // Note: currently the following increment is necessary to avoid a
50 // parser problem! The scanner treats the final kEndOfInput as
51 // a code unit with a position, and does math relative to that
52 // position.
53 buffer_cursor_++;
54 return kEndOfInput;
47 } 55 }
48 // Note: currently the following increment is necessary to avoid a
49 // parser problem! The scanner treats the final kEndOfInput as
50 // a code unit with a position, and does math relative to that
51 // position.
52 pos_++;
53
54 return kEndOfInput;
55 } 56 }
56 57
57 // Return the current position in the code unit stream. 58 // Go back one by one character in the input stream.
58 // Starts at zero. 59 // This undoes the most recent Advance().
59 inline size_t pos() const { return pos_; } 60 inline void Back() {
60 61 // The common case - if the previous character is within
61 // Skips forward past the next code_unit_count UTF-16 code units 62 // buffer_start_ .. buffer_end_ will be handles locally.
62 // in the input, or until the end of input if that comes sooner. 63 // Otherwise, a new block is requested.
63 // Returns the number of code units actually skipped. If less 64 if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
64 // than code_unit_count, 65 buffer_cursor_--;
65 inline size_t SeekForward(size_t code_unit_count) { 66 } else {
66 size_t buffered_chars = buffer_end_ - buffer_cursor_; 67 ReadBlockAt(pos() - 1);
67 if (code_unit_count <= buffered_chars) {
68 buffer_cursor_ += code_unit_count;
69 pos_ += code_unit_count;
70 return code_unit_count;
71 } 68 }
72 return SlowSeekForward(code_unit_count);
73 } 69 }
74 70
75 // Pushes back the most recently read UTF-16 code unit (or negative 71 // Go back one by two characters in the input stream. (This is the same as
76 // value if at end of input), i.e., the value returned by the most recent 72 // calling Back() twice. But Back() may - in some instances - do substantial
77 // call to Advance. 73 // work. Back2() guarantees this work will be done only once.)
78 // Must not be used right after calling SeekForward. 74 inline void Back2() {
79 virtual void PushBack(int32_t code_unit) = 0; 75 if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) {
76 buffer_cursor_ -= 2;
77 } else {
78 ReadBlockAt(pos() - 2);
79 }
80 }
80 81
81 virtual bool SetBookmark(); 82 inline size_t pos() const {
82 virtual void ResetToBookmark(); 83 return buffer_pos_ + (buffer_cursor_ - buffer_start_);
84 }
85
86 inline void Seek(size_t pos) {
87 if (V8_LIKELY(pos >= buffer_pos_ &&
88 pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
89 buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
90 } else {
91 ReadBlockAt(pos);
92 }
93 }
94
95 // Legacy API:
96 void SeekForward(size_t code_unit_count) { Seek(pos() + code_unit_count); }
97 void PushBack(int32_t code_unit) {
98 Back();
99 #ifdef DEBUG
100 uc32 t = Advance();
101 DCHECK_EQ(t, code_unit);
102 Back();
103 #endif // DEBUG
104 }
105 void PushBack2(int32_t code_unit_back_1, int32_t code_unit_back_2) {
106 Back2();
107 #ifdef DEBUG
108 DCHECK_EQ(Advance(), code_unit_back_2);
109 DCHECK_EQ(Advance(), code_unit_back_1);
110 Back2();
111 #endif // DEBUG
112 }
113 bool SetBookmark() {
114 bookmark_ = pos();
115 return true;
116 }
117 void ResetToBookmark() {
118 DCHECK_NE(bookmark_, kNoBookmark);
119 Seek(bookmark_);
120 }
83 121
84 protected: 122 protected:
85 static const uc32 kEndOfInput = -1; 123 static const size_t kNoBookmark;
86 124
87 // Ensures that the buffer_cursor_ points to the code_unit at 125 Utf16CharacterStream(const uint16_t* buffer_start,
88 // position pos_ of the input, if possible. If the position 126 const uint16_t* buffer_cursor,
89 // is at or after the end of the input, return false. If there 127 const uint16_t* buffer_end, size_t buffer_pos)
90 // are more code_units available, return true. 128 : buffer_start_(buffer_start),
129 buffer_cursor_(buffer_cursor),
130 buffer_end_(buffer_end),
131 buffer_pos_(buffer_pos),
132 bookmark_(kNoBookmark) {}
133 Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}
134
135 void ReadBlockAt(size_t new_pos) {
136 // The callers of this method (Back/Back2/Seek) should handle the easy
137 // case (seeking within the current buffer), and we should only get here
138 // if we actually require new data.
139 // (This is really an efficiency check, not a correctness invariant.)
140 DCHECK(new_pos < buffer_pos_ ||
141 new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
142
143 // Change pos() to point to new_pos.
144 buffer_pos_ = new_pos;
145 buffer_cursor_ = buffer_start_;
146 bool success = ReadBlock();
147 USE(success);
148
149 // Post-conditions: 1, on success, we should be at the right position.
150 // 2, success == we should have more characters available.
151 DCHECK_IMPLIES(success, pos() == new_pos);
152 DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
153 DCHECK_EQ(success, buffer_start_ < buffer_end_);
154 }
155
156 // Read more data, and update buffer_*_ to point to it.
157 // Returns true if more data was available.
158 //
159 // ReadBlock() may modify any of the buffer_*_ members, but must sure that
160 // the result of pos() remains unaffected.
161 //
162 // Examples:
163 // - a stream could either fill a separate buffer. Then buffer_start_ and
164 // buffer_cursor_ would point to the beginning of the buffer, and
165 // buffer_pos would be the old pos().
166 // - a stream with existing buffer chunks would set buffer_start_ and
167 // buffer_end_ to cover the full chunk, and then buffer_cursor_ would
168 // point into the middle of the buffer, while buffer_pos_ would describe
169 // the start of the buffer.
91 virtual bool ReadBlock() = 0; 170 virtual bool ReadBlock() = 0;
92 virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
93 171
172 const uint16_t* buffer_start_;
94 const uint16_t* buffer_cursor_; 173 const uint16_t* buffer_cursor_;
95 const uint16_t* buffer_end_; 174 const uint16_t* buffer_end_;
96 size_t pos_; 175 size_t buffer_pos_;
176 size_t bookmark_;
97 }; 177 };
98 178
99 179
100 // ---------------------------------------------------------------------------- 180 // ----------------------------------------------------------------------------
101 // JavaScript Scanner. 181 // JavaScript Scanner.
102 182
103 class Scanner { 183 class Scanner {
104 public: 184 public:
105 // Scoped helper for a re-settable bookmark. 185 // Scoped helper for a re-settable bookmark.
106 class BookmarkScope { 186 class BookmarkScope {
(...skipping 24 matching lines...) Expand all
131 } 211 }
132 212
133 static Location invalid() { return Location(-1, -1); } 213 static Location invalid() { return Location(-1, -1); }
134 214
135 int beg_pos; 215 int beg_pos;
136 int end_pos; 216 int end_pos;
137 }; 217 };
138 218
139 // -1 is outside of the range of any real source code. 219 // -1 is outside of the range of any real source code.
140 static const int kNoOctalLocation = -1; 220 static const int kNoOctalLocation = -1;
221 static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
141 222
142 explicit Scanner(UnicodeCache* scanner_contants); 223 explicit Scanner(UnicodeCache* scanner_contants);
143 224
144 void Initialize(Utf16CharacterStream* source); 225 void Initialize(Utf16CharacterStream* source);
145 226
146 // Returns the next token and advances input. 227 // Returns the next token and advances input.
147 Token::Value Next(); 228 Token::Value Next();
148 // Returns the token following peek() 229 // Returns the token following peek()
149 Token::Value PeekAhead(); 230 Token::Value PeekAhead();
150 // Returns the current token again. 231 // Returns the current token again.
(...skipping 641 matching lines...) Expand 10 before | Expand all | Expand 10 after
792 bool found_html_comment_; 873 bool found_html_comment_;
793 874
794 MessageTemplate::Template scanner_error_; 875 MessageTemplate::Template scanner_error_;
795 Location scanner_error_location_; 876 Location scanner_error_location_;
796 }; 877 };
797 878
798 } // namespace internal 879 } // namespace internal
799 } // namespace v8 880 } // namespace v8
800 881
801 #endif // V8_PARSING_SCANNER_H_ 882 #endif // V8_PARSING_SCANNER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698