Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(507)

Side by Side Diff: src/parsing/scanner.h

Issue 2314663002: Rework scanner-character-streams. (Closed)
Patch Set: Feedback, round 2. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #ifndef V8_PARSING_SCANNER_H_ 7 #ifndef V8_PARSING_SCANNER_H_
8 #define V8_PARSING_SCANNER_H_ 8 #define V8_PARSING_SCANNER_H_
9 9
10 #include "src/allocation.h" 10 #include "src/allocation.h"
11 #include "src/base/logging.h" 11 #include "src/base/logging.h"
12 #include "src/char-predicates.h" 12 #include "src/char-predicates.h"
13 #include "src/globals.h" 13 #include "src/globals.h"
14 #include "src/messages.h" 14 #include "src/messages.h"
15 #include "src/parsing/token.h" 15 #include "src/parsing/token.h"
16 #include "src/unicode-decoder.h" 16 #include "src/unicode-decoder.h"
17 #include "src/unicode.h" 17 #include "src/unicode.h"
18 18
19 namespace v8 { 19 namespace v8 {
20 namespace internal { 20 namespace internal {
21 21
22 22
23 class AstRawString; 23 class AstRawString;
24 class AstValueFactory; 24 class AstValueFactory;
25 class DuplicateFinder; 25 class DuplicateFinder;
26 class ExternalOneByteString;
27 class ExternalTwoByteString;
26 class ParserRecorder; 28 class ParserRecorder;
27 class UnicodeCache; 29 class UnicodeCache;
28 30
29
30 // --------------------------------------------------------------------- 31 // ---------------------------------------------------------------------
31 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer. 32 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
32 // A code unit is a 16 bit value representing either a 16 bit code point 33 // A code unit is a 16 bit value representing either a 16 bit code point
33 // or one part of a surrogate pair that make a single 21 bit code point. 34 // or one part of a surrogate pair that make a single 21 bit code point.
34
35 class Utf16CharacterStream { 35 class Utf16CharacterStream {
36 public: 36 public:
37 Utf16CharacterStream() : pos_(0) { } 37 static const uc32 kEndOfInput = -1;
38
38 virtual ~Utf16CharacterStream() { } 39 virtual ~Utf16CharacterStream() { }
39 40
40 // Returns and advances past the next UTF-16 code unit in the input 41 // Returns and advances past the next UTF-16 code unit in the input
41 // stream. If there are no more code units, it returns a negative 42 // stream. If there are no more code units it returns kEndOfInput.
42 // value.
43 inline uc32 Advance() { 43 inline uc32 Advance() {
44 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { 44 if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
45 pos_++;
46 return static_cast<uc32>(*(buffer_cursor_++)); 45 return static_cast<uc32>(*(buffer_cursor_++));
46 } else if (ReadBlock()) {
47 return static_cast<uc32>(*(buffer_cursor_++));
48 } else {
49 // Note: currently the following increment is necessary to avoid a
50 // parser problem! The scanner treats the final kEndOfInput as
51 // a code unit with a position, and does math relative to that
52 // position.
53 buffer_cursor_++;
54 return kEndOfInput;
47 } 55 }
48 // Note: currently the following increment is necessary to avoid a
49 // parser problem! The scanner treats the final kEndOfInput as
50 // a code unit with a position, and does math relative to that
51 // position.
52 pos_++;
53
54 return kEndOfInput;
55 } 56 }
56 57
57 // Return the current position in the code unit stream. 58 // Return the scanner by one character. This effectively undoes the most
nickie 2016/09/09 11:21:48 Consider changing the first sentence to "Go back o
vogelheim 2016/09/14 11:28:19 Done.
58 // Starts at zero. 59 // recent Advance().
59 inline size_t pos() const { return pos_; } 60 inline void Back() {
60 61 // The common case - if the previous character is within
61 // Skips forward past the next code_unit_count UTF-16 code units 62 // buffer_start_ .. buffer_end_ will be handles locally. Otherwise, a new
62 // in the input, or until the end of input if that comes sooner. 63 // block is requested.
nickie 2016/09/09 11:21:48 nit: Move "Otherwise..." to the third line of the
vogelheim 2016/09/14 11:28:19 Done. (Not sure if "cl format" will put it back...
63 // Returns the number of code units actually skipped. If less 64 if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
64 // than code_unit_count, 65 buffer_cursor_--;
65 inline size_t SeekForward(size_t code_unit_count) { 66 } else {
66 size_t buffered_chars = buffer_end_ - buffer_cursor_; 67 ReadBlockAt(pos() - 1);
67 if (code_unit_count <= buffered_chars) {
68 buffer_cursor_ += code_unit_count;
69 pos_ += code_unit_count;
70 return code_unit_count;
71 } 68 }
72 return SlowSeekForward(code_unit_count);
73 } 69 }
74 70
75 // Pushes back the most recently read UTF-16 code unit (or negative 71 // Return the scanner by two characters, the same as calling Back() twice.
nickie 2016/09/09 11:21:48 Same.
vogelheim 2016/09/14 11:28:19 Done.
76 // value if at end of input), i.e., the value returned by the most recent 72 // (Back() may - in some instances - do substantial work. Back2() guarantees
77 // call to Advance. 73 // this will be done only twice.
nickie 2016/09/09 11:21:48 I think you mean "guarantees that this will not be
vogelheim 2016/09/14 11:28:19 Done.
78 // Must not be used right after calling SeekForward. 74 inline void Back2() {
79 virtual void PushBack(int32_t code_unit) = 0; 75 if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) {
76 buffer_cursor_ -= 2;
77 } else {
78 ReadBlockAt(pos() - 2);
79 }
80 }
80 81
81 virtual bool SetBookmark(); 82 inline size_t pos() const {
82 virtual void ResetToBookmark(); 83 return buffer_pos_ + (buffer_cursor_ - buffer_start_);
84 }
85
86 inline void Seek(size_t pos) {
87 if (V8_LIKELY(pos >= buffer_pos_ &&
88 pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
89 buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
90 } else {
91 ReadBlockAt(pos);
92 }
93 }
94
95 // Legacy API:
96 void SeekForward(size_t code_unit_count) { Seek(pos() + code_unit_count); }
97 void PushBack(int32_t code_unit) {
98 Back();
99 #ifdef DEBUG
100 uc32 t = Advance();
101 DCHECK_EQ(t, code_unit);
102 Back();
103 #endif // DEBUG
104 }
105 void PushBack2(int32_t code_unit_back_1, int32_t code_unit_back_2) {
106 Back2();
107 #ifdef DEBUG
108 DCHECK_EQ(Advance(), code_unit_back_2);
109 DCHECK_EQ(Advance(), code_unit_back_1);
110 Back2();
111 #endif // DEBUG
112 }
113 bool SetBookmark() {
114 bookmark_ = pos();
115 return true;
116 }
117 void ResetToBookmark() {
118 DCHECK_NE(bookmark_, kNoBookmark);
119 Seek(bookmark_);
120 }
83 121
84 protected: 122 protected:
85 static const uc32 kEndOfInput = -1; 123 static const size_t kNoBookmark;
86 124
87 // Ensures that the buffer_cursor_ points to the code_unit at 125 Utf16CharacterStream(const uint16_t* buffer_start,
88 // position pos_ of the input, if possible. If the position 126 const uint16_t* buffer_cursor,
89 // is at or after the end of the input, return false. If there 127 const uint16_t* buffer_end, size_t buffer_pos)
90 // are more code_units available, return true. 128 : buffer_start_(buffer_start),
129 buffer_cursor_(buffer_cursor),
130 buffer_end_(buffer_end),
131 buffer_pos_(buffer_pos),
132 bookmark_(kNoBookmark) {}
133 Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}
134
135 void ReadBlockAt(size_t new_pos) {
136 // The callers of this method (Back/Back2/Seek) should handle the easy
137 // case (seeking within the current buffer), and we should only get here
138 // if we actually require new data.
139 // (This is really an efficiency check, not a correctness invariant.)
140 DCHECK(new_pos < buffer_pos_ ||
141 new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
142
143 // Change pos() to point to new_pos.
144 buffer_pos_ = new_pos;
145 buffer_cursor_ = buffer_start_;
146 ReadBlock();
147 }
148
149 // Read more data, and update buffer_*_ to point to it.
150 // Returns true if more data was available.
151 //
152 // ReadBlock() may modify any of the buffer_*_ members, but must sure that
153 // the result of pos() remains unaffected.
154 //
155 // Examples:
156 // - a stream could either fill a separate buffer. Then buffer_start_ and
157 // buffer_cursor_ would point to the beginning of the buffer, and
158 // buffer_pos would be the old pos().
159 // - a stream with existing buffer chunks would set buffer_start_ and
160 // buffer_end_ to cover the full chunk, and then buffer_cursor_ would
161 // point into the middle of the buffer, while buffer_pos_ would describe
162 // the start of the buffer.
91 virtual bool ReadBlock() = 0; 163 virtual bool ReadBlock() = 0;
92 virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
93 164
165 const uint16_t* buffer_start_;
94 const uint16_t* buffer_cursor_; 166 const uint16_t* buffer_cursor_;
95 const uint16_t* buffer_end_; 167 const uint16_t* buffer_end_;
96 size_t pos_; 168 size_t buffer_pos_;
169 size_t bookmark_;
97 }; 170 };
98 171
99 172
100 // ---------------------------------------------------------------------------- 173 // ----------------------------------------------------------------------------
101 // JavaScript Scanner. 174 // JavaScript Scanner.
102 175
103 class Scanner { 176 class Scanner {
104 public: 177 public:
105 // Scoped helper for a re-settable bookmark. 178 // Scoped helper for a re-settable bookmark.
106 class BookmarkScope { 179 class BookmarkScope {
(...skipping 24 matching lines...) Expand all
131 } 204 }
132 205
133 static Location invalid() { return Location(-1, -1); } 206 static Location invalid() { return Location(-1, -1); }
134 207
135 int beg_pos; 208 int beg_pos;
136 int end_pos; 209 int end_pos;
137 }; 210 };
138 211
139 // -1 is outside of the range of any real source code. 212 // -1 is outside of the range of any real source code.
140 static const int kNoOctalLocation = -1; 213 static const int kNoOctalLocation = -1;
214 static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
141 215
142 explicit Scanner(UnicodeCache* scanner_contants); 216 explicit Scanner(UnicodeCache* scanner_contants);
143 217
144 void Initialize(Utf16CharacterStream* source); 218 void Initialize(Utf16CharacterStream* source);
145 219
146 // Returns the next token and advances input. 220 // Returns the next token and advances input.
147 Token::Value Next(); 221 Token::Value Next();
148 // Returns the token following peek() 222 // Returns the token following peek()
149 Token::Value PeekAhead(); 223 Token::Value PeekAhead();
150 // Returns the current token again. 224 // Returns the current token again.
(...skipping 641 matching lines...) Expand 10 before | Expand all | Expand 10 after
792 bool found_html_comment_; 866 bool found_html_comment_;
793 867
794 MessageTemplate::Template scanner_error_; 868 MessageTemplate::Template scanner_error_;
795 Location scanner_error_location_; 869 Location scanner_error_location_;
796 }; 870 };
797 871
798 } // namespace internal 872 } // namespace internal
799 } // namespace v8 873 } // namespace v8
800 874
801 #endif // V8_PARSING_SCANNER_H_ 875 #endif // V8_PARSING_SCANNER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698