Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(128)

Side by Side Diff: src/parsing/scanner.h

Issue 2314663002: Rework scanner-character-streams. (Closed)
Patch Set: Marja's feedback, round 1. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #ifndef V8_PARSING_SCANNER_H_ 7 #ifndef V8_PARSING_SCANNER_H_
8 #define V8_PARSING_SCANNER_H_ 8 #define V8_PARSING_SCANNER_H_
9 9
10 #include "src/allocation.h" 10 #include "src/allocation.h"
11 #include "src/base/logging.h" 11 #include "src/base/logging.h"
12 #include "src/char-predicates.h" 12 #include "src/char-predicates.h"
13 #include "src/globals.h" 13 #include "src/globals.h"
14 #include "src/messages.h" 14 #include "src/messages.h"
15 #include "src/parsing/token.h" 15 #include "src/parsing/token.h"
16 #include "src/unicode-decoder.h" 16 #include "src/unicode-decoder.h"
17 #include "src/unicode.h" 17 #include "src/unicode.h"
18 18
19 namespace v8 { 19 namespace v8 {
20 namespace internal { 20 namespace internal {
21 21
22 22
23 class AstRawString; 23 class AstRawString;
24 class AstValueFactory; 24 class AstValueFactory;
25 class DuplicateFinder; 25 class DuplicateFinder;
26 class ExternalOneByteString;
27 class ExternalTwoByteString;
26 class ParserRecorder; 28 class ParserRecorder;
27 class UnicodeCache; 29 class UnicodeCache;
28 30
29
30 // --------------------------------------------------------------------- 31 // ---------------------------------------------------------------------
31 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer. 32 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
32 // A code unit is a 16 bit value representing either a 16 bit code point 33 // A code unit is a 16 bit value representing either a 16 bit code point
33 // or one part of a surrogate pair that make a single 21 bit code point. 34 // or one part of a surrogate pair that make a single 21 bit code point.
34
35 class Utf16CharacterStream { 35 class Utf16CharacterStream {
36 public: 36 public:
37 Utf16CharacterStream() : pos_(0) { } 37 static const uc32 kEndOfInput = -1;
38
38 virtual ~Utf16CharacterStream() { } 39 virtual ~Utf16CharacterStream() { }
39 40
40 // Returns and advances past the next UTF-16 code unit in the input 41 // Returns and advances past the next UTF-16 code unit in the input
41 // stream. If there are no more code units, it returns a negative 42 // stream. If there are no more code units it returns kEndOfInput.
42 // value.
43 inline uc32 Advance() { 43 inline uc32 Advance() {
44 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { 44 if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
45 pos_++;
46 return static_cast<uc32>(*(buffer_cursor_++)); 45 return static_cast<uc32>(*(buffer_cursor_++));
46 } else if (ReadBlock()) {
47 return static_cast<uc32>(*(buffer_cursor_++));
48 } else {
49 // Note: currently the following increment is necessary to avoid a
50 // parser problem! The scanner treats the final kEndOfInput as
51 // a code unit with a position, and does math relative to that
52 // position.
53 buffer_cursor_++;
marja 2016/09/08 09:46:23 Hmm, I commented on this before, right? foo++ / fo
vogelheim 2016/09/08 15:02:18 Why? [Brief discussion in previous round of answe
54 return kEndOfInput;
47 } 55 }
48 // Note: currently the following increment is necessary to avoid a
49 // parser problem! The scanner treats the final kEndOfInput as
50 // a code unit with a position, and does math relative to that
51 // position.
52 pos_++;
53
54 return kEndOfInput;
55 } 56 }
56 57
57 // Return the current position in the code unit stream. 58 inline void Back() {
58 // Starts at zero. 59 if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
59 inline size_t pos() const { return pos_; } 60 buffer_cursor_--;
60 61 } else {
61 // Skips forward past the next code_unit_count UTF-16 code units 62 ReadBlockAt(pos() - 1);
62 // in the input, or until the end of input if that comes sooner.
63 // Returns the number of code units actually skipped. If less
64 // than code_unit_count,
65 inline size_t SeekForward(size_t code_unit_count) {
66 size_t buffered_chars = buffer_end_ - buffer_cursor_;
67 if (code_unit_count <= buffered_chars) {
68 buffer_cursor_ += code_unit_count;
69 pos_ += code_unit_count;
70 return code_unit_count;
71 } 63 }
72 return SlowSeekForward(code_unit_count);
73 } 64 }
74 65
75 // Pushes back the most recently read UTF-16 code unit (or negative 66 inline void Back2() {
76 // value if at end of input), i.e., the value returned by the most recent 67 if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) {
77 // call to Advance. 68 buffer_cursor_ -= 2;
78 // Must not be used right after calling SeekForward. 69 } else {
79 virtual void PushBack(int32_t code_unit) = 0; 70 ReadBlockAt(pos() - 2);
71 }
72 }
80 73
81 virtual bool SetBookmark(); 74 inline size_t pos() const {
82 virtual void ResetToBookmark(); 75 return buffer_pos_ + (buffer_cursor_ - buffer_start_);
76 }
77
78 inline void Seek(size_t pos) {
79 if (V8_LIKELY(pos >= buffer_pos_ &&
80 pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
81 buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
82 } else {
83 ReadBlockAt(pos);
84 }
85 }
86
87 // Legacy API:
88 void SeekForward(size_t code_unit_count) { Seek(pos() + code_unit_count); }
89 void PushBack(int32_t code_unit) {
90 Back();
91 #ifdef DEBUG
92 uc32 t = Advance();
93 DCHECK_EQ(t, code_unit);
94 Back();
95 #endif // DEBUG
96 }
97 void PushBack2(int32_t code_unit_back_1, int32_t code_unit_back_2) {
98 Back2();
99 #ifdef DEBUG
100 DCHECK_EQ(Advance(), code_unit_back_2);
101 DCHECK_EQ(Advance(), code_unit_back_1);
102 Back2();
103 #endif // DEBUG
104 }
105 bool SetBookmark() {
106 bookmark_ = pos();
107 return true;
108 }
109 void ResetToBookmark() {
110 DCHECK(bookmark_ != (size_t)-1);
111 Seek(bookmark_);
112 }
83 113
84 protected: 114 protected:
85 static const uc32 kEndOfInput = -1; 115 Utf16CharacterStream(const uint16_t* buffer_start,
116 const uint16_t* buffer_cursor,
117 const uint16_t* buffer_end, size_t buffer_pos)
118 : buffer_start_(buffer_start),
119 buffer_cursor_(buffer_cursor),
120 buffer_end_(buffer_end),
121 buffer_pos_(buffer_pos),
122 bookmark_((size_t)-1) {}
123 Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}
124
125 void ReadBlockAt(size_t new_pos) {
126 // The callers of this method (Back/Back2/Seek) should handle the easy
127 // case (seeking within the current buffer), and we should only get here
128 // if we actually require new data.
129 // (This is really an efficiency check, not a correctness invariant.)
130 DCHECK(new_pos < buffer_pos_ ||
131 new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
132 buffer_pos_ = new_pos;
133 buffer_cursor_ = buffer_start_;
134 ReadBlock();
135 }
86 136
87 // Ensures that the buffer_cursor_ points to the code_unit at 137 // Ensures that the buffer_cursor_ points to the code_unit at
88 // position pos_ of the input, if possible. If the position 138 // position pos() of the input. Returns true if data is available; false if
89 // is at or after the end of the input, return false. If there 139 // pos() is at (or after) the end of input
90 // are more code_units available, return true.
91 virtual bool ReadBlock() = 0; 140 virtual bool ReadBlock() = 0;
92 virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
93 141
142 const uint16_t* buffer_start_;
94 const uint16_t* buffer_cursor_; 143 const uint16_t* buffer_cursor_;
95 const uint16_t* buffer_end_; 144 const uint16_t* buffer_end_;
96 size_t pos_; 145 size_t buffer_pos_;
146 size_t bookmark_;
97 }; 147 };
98 148
99 149
100 // ---------------------------------------------------------------------------- 150 // ----------------------------------------------------------------------------
101 // JavaScript Scanner. 151 // JavaScript Scanner.
102 152
103 class Scanner { 153 class Scanner {
104 public: 154 public:
105 // Scoped helper for a re-settable bookmark. 155 // Scoped helper for a re-settable bookmark.
106 class BookmarkScope { 156 class BookmarkScope {
(...skipping 24 matching lines...) Expand all
131 } 181 }
132 182
133 static Location invalid() { return Location(-1, -1); } 183 static Location invalid() { return Location(-1, -1); }
134 184
135 int beg_pos; 185 int beg_pos;
136 int end_pos; 186 int end_pos;
137 }; 187 };
138 188
139 // -1 is outside of the range of any real source code. 189 // -1 is outside of the range of any real source code.
140 static const int kNoOctalLocation = -1; 190 static const int kNoOctalLocation = -1;
191 static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
141 192
142 explicit Scanner(UnicodeCache* scanner_contants); 193 explicit Scanner(UnicodeCache* scanner_contants);
143 194
144 void Initialize(Utf16CharacterStream* source); 195 void Initialize(Utf16CharacterStream* source);
145 196
146 // Returns the next token and advances input. 197 // Returns the next token and advances input.
147 Token::Value Next(); 198 Token::Value Next();
148 // Returns the token following peek() 199 // Returns the token following peek()
149 Token::Value PeekAhead(); 200 Token::Value PeekAhead();
150 // Returns the current token again. 201 // Returns the current token again.
(...skipping 641 matching lines...) Expand 10 before | Expand all | Expand 10 after
792 bool found_html_comment_; 843 bool found_html_comment_;
793 844
794 MessageTemplate::Template scanner_error_; 845 MessageTemplate::Template scanner_error_;
795 Location scanner_error_location_; 846 Location scanner_error_location_;
796 }; 847 };
797 848
798 } // namespace internal 849 } // namespace internal
799 } // namespace v8 850 } // namespace v8
800 851
801 #endif // V8_PARSING_SCANNER_H_ 852 #endif // V8_PARSING_SCANNER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698