Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(112)

Side by Side Diff: src/lexer/lexer.h

Issue 187603004: Experimental parser: make utf8 sort of work (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/lexer/lexer.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
50 LexerSet lexers_; 50 LexerSet lexers_;
51 }; 51 };
52 52
53 53
54 class LexerBase { 54 class LexerBase {
55 public: 55 public:
56 struct Location { 56 struct Location {
57 Location(int b, int e) : beg_pos(b), end_pos(e) { } 57 Location(int b, int e) : beg_pos(b), end_pos(e) { }
58 Location() : beg_pos(0), end_pos(0) { } 58 Location() : beg_pos(0), end_pos(0) { }
59 59
60 bool IsValid() const { 60 bool IsValid() const { return beg_pos >= 0 && end_pos >= beg_pos; }
61 return beg_pos >= 0 && end_pos >= beg_pos;
62 }
63
64 static Location invalid() { return Location(-1, -1); } 61 static Location invalid() { return Location(-1, -1); }
65 62
66 int beg_pos; 63 int beg_pos;
67 int end_pos; 64 int end_pos;
68 }; 65 };
69 66
70 explicit LexerBase(UnicodeCache* unicode_cache); 67 explicit LexerBase(UnicodeCache* unicode_cache);
71 68
72 virtual ~LexerBase(); 69 virtual ~LexerBase();
73 70
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
113 110
114 // Returns true if there was a line terminator before the peek'ed token, 111 // Returns true if there was a line terminator before the peek'ed token,
115 // possibly inside a multi-line comment. 112 // possibly inside a multi-line comment.
116 bool HasAnyLineTerminatorBeforeNext() const { 113 bool HasAnyLineTerminatorBeforeNext() const {
117 return has_line_terminator_before_next_ || 114 return has_line_terminator_before_next_ ||
118 has_multiline_comment_before_next_; 115 has_multiline_comment_before_next_;
119 } 116 }
120 117
121 Vector<const uint8_t> literal_one_byte_string() { 118 Vector<const uint8_t> literal_one_byte_string() {
122 EnsureCurrentLiteralIsValid(); 119 EnsureCurrentLiteralIsValid();
123 return current_literal_->one_byte_string; 120 return current_literal_->one_byte_string();
124 } 121 }
125 122
126 Vector<const uint16_t> literal_two_byte_string() { 123 Vector<const uint16_t> literal_two_byte_string() {
127 EnsureCurrentLiteralIsValid(); 124 EnsureCurrentLiteralIsValid();
128 return current_literal_->two_byte_string; 125 return current_literal_->two_byte_string();
129 } 126 }
130 127
131 int literal_length() { 128 int literal_length() {
132 EnsureCurrentLiteralIsValid(); 129 EnsureCurrentLiteralIsValid();
133 return current_literal_->length; 130 return current_literal_->length;
134 } 131 }
135 132
136 bool is_literal_one_byte() { 133 bool is_literal_one_byte() {
137 EnsureCurrentLiteralIsValid(); 134 EnsureCurrentLiteralIsValid();
138 return current_literal_->is_one_byte; 135 return current_literal_->is_one_byte();
139 } 136 }
140 137
141 bool is_literal_contextual_keyword(Vector<const uint8_t> keyword) { 138 bool is_literal_contextual_keyword(Vector<const uint8_t> keyword) {
142 if (!is_literal_one_byte()) return false; 139 if (!is_literal_one_byte()) return false;
143 Vector<const uint8_t> literal = literal_one_byte_string(); 140 Vector<const uint8_t> literal = literal_one_byte_string();
144 return literal.length() == keyword.length() && 141 return literal.length() == keyword.length() &&
145 (memcmp(literal.start(), keyword.start(), literal.length()) == 0); 142 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);
146 } 143 }
147 144
148 bool literal_contains_escapes() const { 145 bool literal_contains_escapes() const {
149 return current_.has_escapes; 146 return current_.has_escapes;
150 } 147 }
151 148
152 Vector<const uint8_t> next_literal_one_byte_string() { 149 Vector<const uint8_t> next_literal_one_byte_string() {
153 EnsureNextLiteralIsValid(); 150 EnsureNextLiteralIsValid();
154 return next_literal_->one_byte_string; 151 return next_literal_->one_byte_string();
155 } 152 }
156 153
157 Vector<const uint16_t> next_literal_two_byte_string() { 154 Vector<const uint16_t> next_literal_two_byte_string() {
158 EnsureNextLiteralIsValid(); 155 EnsureNextLiteralIsValid();
159 return next_literal_->two_byte_string; 156 return next_literal_->two_byte_string();
160 } 157 }
161 158
162 int next_literal_length() { 159 int next_literal_length() {
163 EnsureNextLiteralIsValid(); 160 EnsureNextLiteralIsValid();
164 return next_literal_->length; 161 return next_literal_->length;
165 } 162 }
166 163
167 bool is_next_literal_one_byte() { 164 bool is_next_literal_one_byte() {
168 EnsureNextLiteralIsValid(); 165 EnsureNextLiteralIsValid();
169 return next_literal_->is_one_byte; 166 return next_literal_->is_one_byte();
170 } 167 }
171 168
172 bool is_next_contextual_keyword(Vector<const uint8_t> keyword) { 169 bool is_next_contextual_keyword(Vector<const uint8_t> keyword) {
173 if (!is_next_literal_one_byte()) return false; 170 if (!is_next_literal_one_byte()) return false;
174 Vector<const uint8_t> literal = next_literal_one_byte_string(); 171 Vector<const uint8_t> literal = next_literal_one_byte_string();
175 return literal.length() == keyword.length() && 172 return literal.length() == keyword.length() &&
176 (memcmp(literal.start(), keyword.start(), literal.length()) == 0); 173 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);
177 } 174 }
178 175
179 bool HarmonyScoping() const { 176 bool HarmonyScoping() const {
(...skipping 15 matching lines...) Expand all
195 bool HarmonyNumericLiterals() const { 192 bool HarmonyNumericLiterals() const {
196 return harmony_numeric_literals_; 193 return harmony_numeric_literals_;
197 } 194 }
198 195
199 void SetHarmonyNumericLiterals(bool numeric_literals) { 196 void SetHarmonyNumericLiterals(bool numeric_literals) {
200 harmony_numeric_literals_ = numeric_literals; 197 harmony_numeric_literals_ = numeric_literals;
201 } 198 }
202 199
203 UnicodeCache* unicode_cache() { return unicode_cache_; } 200 UnicodeCache* unicode_cache() { return unicode_cache_; }
204 201
202 class LiteralDesc {
203 public:
204 LiteralDesc()
205 : beg_pos(-1),
206 offset(0),
207 length(0),
208 is_one_byte_(false),
209 is_in_buffer_(false),
210 is_one_byte_string_owned_(false) // TODO(dcarney): move to buffer
211 { }
212
213 ~LiteralDesc() {
214 if (is_one_byte_string_owned_) {
215 one_byte_string_.Dispose();
216 }
217 }
218
219 inline bool is_one_byte() { return is_one_byte_; }
220 inline Vector<const uint8_t> one_byte_string() {
221 ASSERT(is_one_byte_);
222 return one_byte_string_;
223 }
224 inline Vector<const uint16_t> two_byte_string() {
225 ASSERT(!is_one_byte_);
226 return two_byte_string_;
227 }
228
229 inline bool Valid(int pos) { return beg_pos == pos; }
230 inline void Invalidate() { if (is_in_buffer_) beg_pos = -1; }
231
232 // TODO(dcarney): make private as well.
233 int beg_pos;
234 int offset;
235 int length;
236 LiteralBuffer buffer;
237
238 void SetOneByteString(Vector<const uint8_t> string, bool owned);
239 void SetTwoByteString(Vector<const uint16_t> string);
240 void SetStringFromLiteralBuffer();
241
242 private:
243 bool is_one_byte_;
244 bool is_in_buffer_;
245 bool is_one_byte_string_owned_;
246 Vector<const uint8_t> one_byte_string_;
247 Vector<const uint16_t> two_byte_string_;
248
249 DISALLOW_COPY_AND_ASSIGN(LiteralDesc);
250 };
251
205 protected: 252 protected:
206 struct TokenDesc { 253 struct TokenDesc {
207 Token::Value token;
208 int beg_pos; 254 int beg_pos;
209 int end_pos; 255 int end_pos;
256 Token::Value token;
210 bool has_escapes; 257 bool has_escapes;
211 bool is_onebyte; 258 bool is_onebyte;
212 }; 259 };
213 260
214 struct LiteralDesc {
215 int beg_pos;
216 bool is_one_byte;
217 bool is_in_buffer;
218 int offset;
219 int length;
220 Vector<const uint8_t> one_byte_string;
221 Vector<const uint16_t> two_byte_string;
222 LiteralBuffer buffer;
223 LiteralDesc() : beg_pos(-1), is_one_byte(false), is_in_buffer(false),
224 offset(0), length(0) { }
225 bool Valid(int pos) { return beg_pos == pos; }
226 };
227
228 virtual void Scan() = 0; 261 virtual void Scan() = 0;
229
230 virtual void UpdateBufferBasedOnHandle() = 0; 262 virtual void UpdateBufferBasedOnHandle() = 0;
231 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0; 263 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0;
232 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal) = 0; 264 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal) = 0;
233 virtual Handle<String> AllocateLiteral(LiteralDesc* literal, 265 virtual Handle<String> AllocateLiteral(LiteralDesc* literal,
234 PretenureFlag tenured) = 0; 266 PretenureFlag tenured) = 0;
235 267
236 void ResetLiterals() {
237 if (!current_literal_->is_in_buffer) current_literal_->beg_pos = -1;
238 if (!next_literal_->is_in_buffer) next_literal_->beg_pos = -1;
239 }
240
241 void EnsureCurrentLiteralIsValid() { 268 void EnsureCurrentLiteralIsValid() {
242 if (!current_literal_->Valid(current_.beg_pos)) { 269 if (!current_literal_->Valid(current_.beg_pos)) {
243 FillLiteral(current_, current_literal_); 270 FillLiteral(current_, current_literal_);
244 } 271 }
245 } 272 }
246 273
247 void EnsureNextLiteralIsValid() { 274 void EnsureNextLiteralIsValid() {
248 if (!next_literal_->Valid(next_.beg_pos)) { 275 if (!next_literal_->Valid(next_.beg_pos)) {
249 FillLiteral(next_, next_literal_); 276 FillLiteral(next_, next_literal_);
250 } 277 }
251 } 278 }
252 279
253 UnicodeCache* unicode_cache_; 280 UnicodeCache* unicode_cache_;
281 LiteralDesc* current_literal_;
282 LiteralDesc* next_literal_;
283 LiteralDesc literals_[2];
254 284
285 TokenDesc current_; // desc for current token (as returned by Next())
286 TokenDesc next_; // desc for next token (one token look-ahead)
287
288 // TODO(dcarney): encode flags in uint8_t
255 bool has_line_terminator_before_next_; 289 bool has_line_terminator_before_next_;
256 // Whether there is a multiline comment *with a line break* before the next 290 // Whether there is a multiline comment *with a line break* before the next
257 // token. 291 // token.
258 bool has_multiline_comment_before_next_; 292 bool has_multiline_comment_before_next_;
259
260 TokenDesc current_; // desc for current token (as returned by Next())
261 TokenDesc next_; // desc for next token (one token look-ahead)
262
263 LiteralDesc* current_literal_;
264 LiteralDesc* next_literal_;
265 LiteralDesc literals_[2];
266
267 bool harmony_numeric_literals_; 293 bool harmony_numeric_literals_;
268 bool harmony_modules_; 294 bool harmony_modules_;
269 bool harmony_scoping_; 295 bool harmony_scoping_;
270 296
271 friend class Scanner; 297 friend class Scanner;
272 friend class LexerGCHandler; 298 friend class LexerGCHandler;
273 }; 299 };
274 300
275 301
276 template<typename Char> 302 template<typename Char>
277 class Lexer : public LexerBase { 303 class Lexer : public LexerBase {
278 public: 304 public:
279 Lexer(UnicodeCache* unicode_cache, 305 Lexer(UnicodeCache* unicode_cache,
280 Handle<String> source, 306 Handle<String> source,
281 int start_position, 307 int start_position,
282 int end_position); 308 int end_position);
283 Lexer(UnicodeCache* unicode_cache, const Char* source_ptr, int length); 309 Lexer(UnicodeCache* unicode_cache, const Char* source_ptr, int length);
284 virtual ~Lexer(); 310 virtual ~Lexer();
285 311
286 virtual void SeekForward(int pos); 312 virtual void SeekForward(int pos);
287 virtual bool ScanRegExpPattern(bool seen_equal); 313 virtual bool ScanRegExpPattern(bool seen_equal);
288 virtual bool ScanRegExpFlags(); 314 virtual bool ScanRegExpFlags();
289 virtual Location octal_position() const; 315 virtual Location octal_position() const;
290 virtual void clear_octal_position() { last_octal_end_ = NULL; } 316 virtual void clear_octal_position() { last_octal_end_ = NULL; }
291 317
292 protected: 318 protected:
293 virtual void Scan(); 319 virtual void Scan();
294 320
321 private:
322 uc32 ScanHexNumber(int length);
323
324 bool ScanLiteralUnicodeEscape();
325
295 const Char* GetNewBufferBasedOnHandle() const; 326 const Char* GetNewBufferBasedOnHandle() const;
296 virtual void UpdateBufferBasedOnHandle(); 327 virtual void UpdateBufferBasedOnHandle();
297 328
298 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal); 329 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal);
299 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal); 330 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal);
300 virtual Handle<String> AllocateLiteral(LiteralDesc* literal, 331 virtual Handle<String> AllocateLiteral(LiteralDesc* literal,
301 PretenureFlag tenured); 332 PretenureFlag tenured);
302 333
303 private: 334 // Helper function for FillLiteral.
304 uc32 ScanHexNumber(int length); 335 template<bool is_one_byte>
305 336 static void SetLiteral(
306 bool ScanLiteralUnicodeEscape(); 337 const Char* start, const Char* end, LiteralDesc* literal);
307
308 const Char* ScanHexNumber(const Char* start,
309 const Char* end,
310 uc32* result);
311 const Char* ScanOctalEscape(const Char* start,
312 const Char* end,
313 uc32* result);
314 const Char* ScanIdentifierUnicodeEscape(const Char* start,
315 const Char* end,
316 uc32* result);
317 const Char* ScanEscape(const Char* start,
318 const Char* end,
319 LiteralBuffer* literal);
320
321 // Returns true if the literal of the token can be represented as a
322 // substring of the source.
323 bool IsSubstringOfSource(const TokenDesc& token);
324 338
325 bool CopyToLiteralBuffer(const Char* start, 339 bool CopyToLiteralBuffer(const Char* start,
326 const Char* end, 340 const Char* end,
327 const TokenDesc& token, 341 const TokenDesc& token,
328 LiteralDesc* literal); 342 LiteralDesc* literal);
329 343
330 // One of source_handle_ or source_ptr_ is set. 344 // One of source_handle_ or source_ptr_ is set.
331 // If source_ptr_ is set, isolate_ is 0 and no isolate accesses are allowed. 345 // If source_ptr_ is set, isolate_ is 0 and no isolate accesses are allowed.
332 Isolate* isolate_; 346 Isolate* isolate_;
333 const Handle<String> source_handle_; 347 const Handle<String> source_handle_;
334 const Char* const source_ptr_; 348 const Char* const source_ptr_;
335 const int start_position_;
336 const int end_position_; 349 const int end_position_;
337 // Stream variables. 350 // Stream variables.
338 const Char* buffer_; 351 const Char* buffer_;
339 const Char* buffer_end_; 352 const Char* buffer_end_;
340 const Char* start_; 353 const Char* start_;
341 const Char* cursor_; 354 const Char* cursor_;
342 // Where we have seen the last octal number or an octal escape inside a 355 // Where we have seen the last octal number or an octal escape inside a
343 // string. Used by octal_position(). 356 // string. Used by octal_position().
344 const Char* last_octal_end_; 357 const Char* last_octal_end_;
345 }; 358 };
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
472 bool harmony_scoping_; 485 bool harmony_scoping_;
473 }; 486 };
474 487
475 488
476 #endif 489 #endif
477 490
478 491
479 } } 492 } }
480 493
481 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H 494 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H
OLDNEW
« no previous file with comments | « no previous file | src/lexer/lexer.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698