Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(71)

Side by Side Diff: src/parsing/scanner.cc

Issue 1841543003: [esnext] implement frontend changes for async/await proposal (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: A bunch more tests, some fixes, ExpressionClassifier gets fatter :( Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include "src/parsing/scanner.h" 7 #include "src/parsing/scanner.h"
8 8
9 #include <stdint.h> 9 #include <stdint.h>
10 10
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
49 } 49 }
50 50
51 51
52 void Scanner::Initialize(Utf16CharacterStream* source) { 52 void Scanner::Initialize(Utf16CharacterStream* source) {
53 source_ = source; 53 source_ = source;
54 // Need to capture identifiers in order to recognize "get" and "set" 54 // Need to capture identifiers in order to recognize "get" and "set"
55 // in object literals. 55 // in object literals.
56 Init(); 56 Init();
57 // Skip initial whitespace allowing HTML comment ends just like 57 // Skip initial whitespace allowing HTML comment ends just like
58 // after a newline and scan first token. 58 // after a newline and scan first token.
59 has_line_terminator_before_next_ = true; 59 has_preceding_line_terminator_ = true;
60 did_see_multiline_comment_ = false;
60 SkipWhiteSpace(); 61 SkipWhiteSpace();
61 Scan(); 62 Scan();
62 } 63 }
63 64
64 template <bool capture_raw, bool unicode> 65 template <bool capture_raw, bool unicode>
65 uc32 Scanner::ScanHexNumber(int expected_length) { 66 uc32 Scanner::ScanHexNumber(int expected_length) {
66 DCHECK(expected_length <= 4); // prevent overflow 67 DCHECK(expected_length <= 4); // prevent overflow
67 68
68 int begin = source_pos() - 2; 69 int begin = source_pos() - 2;
69 uc32 x = 0; 70 uc32 x = 0;
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after
242 243
243 Token::Value Scanner::Next() { 244 Token::Value Scanner::Next() {
244 if (next_.token == Token::EOS) { 245 if (next_.token == Token::EOS) {
245 next_.location.beg_pos = current_.location.beg_pos; 246 next_.location.beg_pos = current_.location.beg_pos;
246 next_.location.end_pos = current_.location.end_pos; 247 next_.location.end_pos = current_.location.end_pos;
247 } 248 }
248 current_ = next_; 249 current_ = next_;
249 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) { 250 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {
250 next_ = next_next_; 251 next_ = next_next_;
251 next_next_.token = Token::UNINITIALIZED; 252 next_next_.token = Token::UNINITIALIZED;
253 has_preceding_line_terminator_ = next_has_preceding_line_terminator_;
252 return current_.token; 254 return current_.token;
253 } 255 }
254 has_line_terminator_before_next_ = false; 256 has_preceding_line_terminator_ = false;
255 has_multiline_comment_before_next_ = false;
256 if (static_cast<unsigned>(c0_) <= 0x7f) { 257 if (static_cast<unsigned>(c0_) <= 0x7f) {
257 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); 258 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
258 if (token != Token::ILLEGAL) { 259 if (token != Token::ILLEGAL) {
259 int pos = source_pos(); 260 int pos = source_pos();
260 next_.token = token; 261 next_.token = token;
261 next_.location.beg_pos = pos; 262 next_.location.beg_pos = pos;
262 next_.location.end_pos = pos + 1; 263 next_.location.end_pos = pos + 1;
263 Advance(); 264 Advance();
264 return current_.token; 265 return current_.token;
265 } 266 }
266 } 267 }
267 Scan(); 268 Scan();
268 return current_.token; 269 return current_.token;
269 } 270 }
270 271
271 272
272 Token::Value Scanner::PeekAhead() { 273 Token::Value Scanner::PeekAhead() {
273 if (next_next_.token != Token::UNINITIALIZED) { 274 if (next_next_.token != Token::UNINITIALIZED) {
274 return next_next_.token; 275 return next_next_.token;
275 } 276 }
276 TokenDesc prev = current_; 277 TokenDesc prev = current_;
278 bool has_preceding_line_terminator = has_preceding_line_terminator_;
277 Next(); 279 Next();
280 next_has_preceding_line_terminator_ = has_preceding_line_terminator_;
281 has_preceding_line_terminator_ = has_preceding_line_terminator;
278 Token::Value ret = next_.token; 282 Token::Value ret = next_.token;
279 next_next_ = next_; 283 next_next_ = next_;
280 next_ = current_; 284 next_ = current_;
281 current_ = prev; 285 current_ = prev;
282 return ret; 286 return ret;
283 } 287 }
284 288
285 289
286 // TODO(yangguo): check whether this is actually necessary. 290 // TODO(yangguo): check whether this is actually necessary.
287 static inline bool IsLittleEndianByteOrderMark(uc32 c) { 291 static inline bool IsLittleEndianByteOrderMark(uc32 c) {
288 // The Unicode value U+FFFE is guaranteed never to be assigned as a 292 // The Unicode value U+FFFE is guaranteed never to be assigned as a
289 // Unicode character; this implies that in a Unicode context the 293 // Unicode character; this implies that in a Unicode context the
290 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 294 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
291 // character expressed in little-endian byte order (since it could 295 // character expressed in little-endian byte order (since it could
292 // not be a U+FFFE character expressed in big-endian byte 296 // not be a U+FFFE character expressed in big-endian byte
293 // order). Nevertheless, we check for it to be compatible with 297 // order). Nevertheless, we check for it to be compatible with
294 // Spidermonkey. 298 // Spidermonkey.
295 return c == 0xFFFE; 299 return c == 0xFFFE;
296 } 300 }
297 301
298 302
299 bool Scanner::SkipWhiteSpace() { 303 bool Scanner::SkipWhiteSpace() {
300 int start_position = source_pos(); 304 int start_position = source_pos();
301
302 while (true) { 305 while (true) {
303 while (true) { 306 while (true) {
304 // The unicode cache accepts unsigned inputs. 307 // The unicode cache accepts unsigned inputs.
305 if (c0_ < 0) break; 308 if (c0_ < 0) break;
306 // Advance as long as character is a WhiteSpace or LineTerminator. 309 // Advance as long as character is a WhiteSpace or LineTerminator.
307 // Remember if the latter is the case. 310 // Remember if the latter is the case.
308 if (unicode_cache_->IsLineTerminator(c0_)) { 311 if (unicode_cache_->IsLineTerminator(c0_)) {
309 has_line_terminator_before_next_ = true; 312 has_preceding_line_terminator_ = true;
313 did_see_multiline_comment_ = false;
310 } else if (!unicode_cache_->IsWhiteSpace(c0_) && 314 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&
311 !IsLittleEndianByteOrderMark(c0_)) { 315 !IsLittleEndianByteOrderMark(c0_)) {
312 break; 316 break;
313 } 317 }
314 Advance(); 318 Advance();
315 } 319 }
316 320
317 // If there is an HTML comment end '-->' at the beginning of a 321 // If there is an HTML comment end '-->' at the beginning of a
318 // line (with only whitespace in front of it), we treat the rest 322 // line (with only whitespace in front of it), we treat the rest
319 // of the line as a comment. This is in line with the way 323 // of the line as a comment. This is in line with the way
320 // SpiderMonkey handles it. 324 // SpiderMonkey handles it.
321 if (c0_ == '-' && has_line_terminator_before_next_) { 325 if (c0_ == '-' && has_preceding_line_terminator_ &&
326 !did_see_multiline_comment_) {
322 Advance(); 327 Advance();
323 if (c0_ == '-') { 328 if (c0_ == '-') {
324 Advance(); 329 Advance();
325 if (c0_ == '>') { 330 if (c0_ == '>') {
326 // Treat the rest of the line as a comment. 331 // Treat the rest of the line as a comment.
327 SkipSingleLineComment(); 332 SkipSingleLineComment();
328 // Continue skipping white space after the comment. 333 // Continue skipping white space after the comment.
329 continue; 334 continue;
330 } 335 }
331 PushBack('-'); // undo Advance() 336 PushBack('-'); // undo Advance()
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
418 Token::Value Scanner::SkipMultiLineComment() { 423 Token::Value Scanner::SkipMultiLineComment() {
419 DCHECK(c0_ == '*'); 424 DCHECK(c0_ == '*');
420 Advance(); 425 Advance();
421 426
422 while (c0_ >= 0) { 427 while (c0_ >= 0) {
423 uc32 ch = c0_; 428 uc32 ch = c0_;
424 Advance(); 429 Advance();
425 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { 430 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) {
426 // Following ECMA-262, section 7.4, a comment containing 431 // Following ECMA-262, section 7.4, a comment containing
427 // a newline will make the comment count as a line-terminator. 432 // a newline will make the comment count as a line-terminator.
428 has_multiline_comment_before_next_ = true; 433 has_preceding_line_terminator_ = true;
434 did_see_multiline_comment_ = true;
429 } 435 }
430 // If we have reached the end of the multi-line comment, we 436 // If we have reached the end of the multi-line comment, we
431 // consume the '/' and insert a whitespace. This way all 437 // consume the '/' and insert a whitespace. This way all
432 // multi-line comments are treated as whitespace. 438 // multi-line comments are treated as whitespace.
433 if (ch == '*' && c0_ == '/') { 439 if (ch == '*' && c0_ == '/') {
434 c0_ = ' '; 440 c0_ = ' ';
435 return Token::WHITESPACE; 441 return Token::WHITESPACE;
436 } 442 }
437 } 443 }
438 444
(...skipping 17 matching lines...) Expand all
456 PushBack('!'); // undo Advance() 462 PushBack('!'); // undo Advance()
457 DCHECK(c0_ == '!'); 463 DCHECK(c0_ == '!');
458 return Token::LT; 464 return Token::LT;
459 } 465 }
460 466
461 467
462 void Scanner::Scan() { 468 void Scanner::Scan() {
463 next_.literal_chars = NULL; 469 next_.literal_chars = NULL;
464 next_.raw_literal_chars = NULL; 470 next_.raw_literal_chars = NULL;
465 Token::Value token; 471 Token::Value token;
472
466 do { 473 do {
467 // Remember the position of the next token 474 // Remember the position of the next token
468 next_.location.beg_pos = source_pos(); 475 next_.location.beg_pos = source_pos();
469 476
470 switch (c0_) { 477 switch (c0_) {
471 case ' ': 478 case ' ':
472 case '\t': 479 case '\t':
473 Advance(); 480 Advance();
474 token = Token::WHITESPACE; 481 token = Token::WHITESPACE;
475 break; 482 break;
476 483
477 case '\n': 484 case '\n':
478 Advance(); 485 Advance();
479 has_line_terminator_before_next_ = true; 486 has_preceding_line_terminator_ = true;
487 did_see_multiline_comment_ = false;
480 token = Token::WHITESPACE; 488 token = Token::WHITESPACE;
481 break; 489 break;
482 490
483 case '"': case '\'': 491 case '"': case '\'':
484 token = ScanString(); 492 token = ScanString();
485 break; 493 break;
486 494
487 case '<': 495 case '<':
488 // < <= << <<= <!-- 496 // < <= << <<= <!--
489 Advance(); 497 Advance();
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
550 } else { 558 } else {
551 token = Token::ADD; 559 token = Token::ADD;
552 } 560 }
553 break; 561 break;
554 562
555 case '-': 563 case '-':
556 // - -- --> -= 564 // - -- --> -=
557 Advance(); 565 Advance();
558 if (c0_ == '-') { 566 if (c0_ == '-') {
559 Advance(); 567 Advance();
560 if (c0_ == '>' && has_line_terminator_before_next_) { 568 if (c0_ == '>' && has_preceding_line_terminator_ &&
569 !did_see_multiline_comment_) {
561 // For compatibility with SpiderMonkey, we skip lines that 570 // For compatibility with SpiderMonkey, we skip lines that
562 // start with an HTML comment end '-->'. 571 // start with an HTML comment end '-->'.
563 token = SkipSingleLineComment(); 572 token = SkipSingleLineComment();
564 } else { 573 } else {
565 token = Token::DEC; 574 token = Token::DEC;
566 } 575 }
567 } else if (c0_ == '=') { 576 } else if (c0_ == '=') {
568 token = Select(Token::ASSIGN_SUB); 577 token = Select(Token::ASSIGN_SUB);
569 } else { 578 } else {
570 token = Token::SUB; 579 token = Token::SUB;
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
736 int current_pos = source_pos(); 745 int current_pos = source_pos();
737 DCHECK_EQ(next_.location.end_pos, current_pos); 746 DCHECK_EQ(next_.location.end_pos, current_pos);
738 // Positions inside the lookahead token aren't supported. 747 // Positions inside the lookahead token aren't supported.
739 DCHECK(pos >= current_pos); 748 DCHECK(pos >= current_pos);
740 if (pos != current_pos) { 749 if (pos != current_pos) {
741 source_->SeekForward(pos - source_->pos()); 750 source_->SeekForward(pos - source_->pos());
742 Advance(); 751 Advance();
743 // This function is only called to seek to the location 752 // This function is only called to seek to the location
744 // of the end of a function (at the "}" token). It doesn't matter 753 // of the end of a function (at the "}" token). It doesn't matter
745 // whether there was a line terminator in the part we skip. 754 // whether there was a line terminator in the part we skip.
746 has_line_terminator_before_next_ = false; 755 has_preceding_line_terminator_ = false;
747 has_multiline_comment_before_next_ = false;
748 } 756 }
749 Scan(); 757 Scan();
750 } 758 }
751 759
752 760
753 template <bool capture_raw, bool in_template_literal> 761 template <bool capture_raw, bool in_template_literal>
754 bool Scanner::ScanEscape() { 762 bool Scanner::ScanEscape() {
755 uc32 c = c0_; 763 uc32 c = c0_;
756 Advance<capture_raw>(); 764 Advance<capture_raw>();
757 765
(...skipping 704 matching lines...) Expand 10 before | Expand all | Expand 10 after
1462 } 1470 }
1463 1471
1464 1472
1465 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { 1473 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {
1466 if (is_next_literal_one_byte()) { 1474 if (is_next_literal_one_byte()) {
1467 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); 1475 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
1468 } 1476 }
1469 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); 1477 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
1470 } 1478 }
1471 1479
1480 const AstRawString* Scanner::NextNextSymbol(
1481 AstValueFactory* ast_value_factory) {
1482 DCHECK(next_next_.token != Token::UNINITIALIZED);
1483 LiteralBuffer* literal = next_next_.literal_chars;
1484 if (literal->is_one_byte()) {
1485 return ast_value_factory->GetOneByteString(literal->one_byte_literal());
1486 }
1487 return ast_value_factory->GetTwoByteString(literal->two_byte_literal());
1488 }
1472 1489
1473 const AstRawString* Scanner::CurrentRawSymbol( 1490 const AstRawString* Scanner::CurrentRawSymbol(
1474 AstValueFactory* ast_value_factory) { 1491 AstValueFactory* ast_value_factory) {
1475 if (is_raw_literal_one_byte()) { 1492 if (is_raw_literal_one_byte()) {
1476 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string()); 1493 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());
1477 } 1494 }
1478 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string()); 1495 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());
1479 } 1496 }
1480 1497
1481 1498
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
1687 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); 1704 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
1688 } 1705 }
1689 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); 1706 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
1690 1707
1691 backing_store_.AddBlock(bytes); 1708 backing_store_.AddBlock(bytes);
1692 return backing_store_.EndSequence().start(); 1709 return backing_store_.EndSequence().start();
1693 } 1710 }
1694 1711
1695 } // namespace internal 1712 } // namespace internal
1696 } // namespace v8 1713 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698