Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #ifndef V8_SCANNER_H_ | 7 #ifndef V8_SCANNER_H_ |
| 8 #define V8_SCANNER_H_ | 8 #define V8_SCANNER_H_ |
| 9 | 9 |
| 10 #include "src/allocation.h" | 10 #include "src/allocation.h" |
| (...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 343 bool IsValid() const { | 343 bool IsValid() const { |
| 344 return beg_pos >= 0 && end_pos >= beg_pos; | 344 return beg_pos >= 0 && end_pos >= beg_pos; |
| 345 } | 345 } |
| 346 | 346 |
| 347 static Location invalid() { return Location(-1, -1); } | 347 static Location invalid() { return Location(-1, -1); } |
| 348 | 348 |
| 349 int beg_pos; | 349 int beg_pos; |
| 350 int end_pos; | 350 int end_pos; |
| 351 }; | 351 }; |
| 352 | 352 |
| 353 enum Mode { None, TemplateLiteral }; | |
| 354 | |
| 353 // -1 is outside of the range of any real source code. | 355 // -1 is outside of the range of any real source code. |
| 354 static const int kNoOctalLocation = -1; | 356 static const int kNoOctalLocation = -1; |
| 355 | 357 |
| 356 explicit Scanner(UnicodeCache* scanner_contants); | 358 explicit Scanner(UnicodeCache* scanner_contants); |
| 357 | 359 |
| 358 void Initialize(Utf16CharacterStream* source); | 360 void Initialize(Utf16CharacterStream* source); |
| 359 | 361 |
| 360 // Returns the next token and advances input. | 362 // Returns the next token and advances input. |
| 361 Token::Value Next(); | 363 Token::Value Next(Mode mode = None); |
|
arv (Not doing code reviews)
2014/10/27 18:14:06
I'm not sure you want to use a mode here. Once you
caitp (gmail)
2014/10/27 20:22:36
Done.
SpiderMonkey uses the mode strategy for thi
| |
| 362 // Returns the current token again. | 364 // Returns the current token again. |
| 363 Token::Value current_token() { return current_.token; } | 365 Token::Value current_token() { return current_.token; } |
| 364 // Returns the location information for the current token | 366 // Returns the location information for the current token |
| 365 // (the token last returned by Next()). | 367 // (the token last returned by Next()). |
| 366 Location location() const { return current_.location; } | 368 Location location() const { return current_.location; } |
| 367 | 369 |
| 368 // Similar functions for the upcoming token. | 370 // Similar functions for the upcoming token. |
| 369 | 371 |
| 370 // One token look-ahead (past the token returned by Next()). | 372 // One token look-ahead (past the token returned by Next()). |
| 371 Token::Value peek() const { return next_.token; } | 373 Token::Value peek() const { return next_.token; } |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 384 bool is_literal_contextual_keyword(Vector<const char> keyword) { | 386 bool is_literal_contextual_keyword(Vector<const char> keyword) { |
| 385 DCHECK_NOT_NULL(current_.literal_chars); | 387 DCHECK_NOT_NULL(current_.literal_chars); |
| 386 return current_.literal_chars->is_contextual_keyword(keyword); | 388 return current_.literal_chars->is_contextual_keyword(keyword); |
| 387 } | 389 } |
| 388 bool is_next_contextual_keyword(Vector<const char> keyword) { | 390 bool is_next_contextual_keyword(Vector<const char> keyword) { |
| 389 DCHECK_NOT_NULL(next_.literal_chars); | 391 DCHECK_NOT_NULL(next_.literal_chars); |
| 390 return next_.literal_chars->is_contextual_keyword(keyword); | 392 return next_.literal_chars->is_contextual_keyword(keyword); |
| 391 } | 393 } |
| 392 | 394 |
| 393 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); | 395 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); |
| 396 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory); | |
| 394 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); | 397 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); |
| 398 const AstRawString* NextRawSymbol(AstValueFactory* ast_value_factory); | |
| 395 | 399 |
| 396 double DoubleValue(); | 400 double DoubleValue(); |
| 397 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { | 401 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { |
| 398 if (is_literal_one_byte() && | 402 if (is_literal_one_byte() && |
| 399 literal_length() == length && | 403 literal_length() == length && |
| 400 (allow_escapes || !literal_contains_escapes())) { | 404 (allow_escapes || !literal_contains_escapes())) { |
| 401 const char* token = | 405 const char* token = |
| 402 reinterpret_cast<const char*>(literal_one_byte_string().start()); | 406 reinterpret_cast<const char*>(literal_one_byte_string().start()); |
| 403 return !strncmp(token, data, length); | 407 return !strncmp(token, data, length); |
| 404 } | 408 } |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 451 } | 455 } |
| 452 void SetHarmonyNumericLiterals(bool numeric_literals) { | 456 void SetHarmonyNumericLiterals(bool numeric_literals) { |
| 453 harmony_numeric_literals_ = numeric_literals; | 457 harmony_numeric_literals_ = numeric_literals; |
| 454 } | 458 } |
| 455 bool HarmonyClasses() const { | 459 bool HarmonyClasses() const { |
| 456 return harmony_classes_; | 460 return harmony_classes_; |
| 457 } | 461 } |
| 458 void SetHarmonyClasses(bool classes) { | 462 void SetHarmonyClasses(bool classes) { |
| 459 harmony_classes_ = classes; | 463 harmony_classes_ = classes; |
| 460 } | 464 } |
| 465 bool HarmonyTemplates() const { return harmony_templates_; } | |
| 466 void SetHarmonyTemplates(bool templates) { harmony_templates_ = templates; } | |
| 461 | 467 |
| 462 // Returns true if there was a line terminator before the peek'ed token, | 468 // Returns true if there was a line terminator before the peek'ed token, |
| 463 // possibly inside a multi-line comment. | 469 // possibly inside a multi-line comment. |
| 464 bool HasAnyLineTerminatorBeforeNext() const { | 470 bool HasAnyLineTerminatorBeforeNext() const { |
| 465 return has_line_terminator_before_next_ || | 471 return has_line_terminator_before_next_ || |
| 466 has_multiline_comment_before_next_; | 472 has_multiline_comment_before_next_; |
| 467 } | 473 } |
| 468 | 474 |
| 469 // Scans the input as a regular expression pattern, previous | 475 // Scans the input as a regular expression pattern, previous |
| 470 // character(s) must be /(=). Returns true if a pattern is scanned. | 476 // character(s) must be /(=). Returns true if a pattern is scanned. |
| 471 bool ScanRegExpPattern(bool seen_equal); | 477 bool ScanRegExpPattern(bool seen_equal); |
| 472 // Returns true if regexp flags are scanned (always since flags can | 478 // Returns true if regexp flags are scanned (always since flags can |
| 473 // be empty). | 479 // be empty). |
| 474 bool ScanRegExpFlags(); | 480 bool ScanRegExpFlags(); |
| 475 | 481 |
| 476 const LiteralBuffer* source_url() const { return &source_url_; } | 482 const LiteralBuffer* source_url() const { return &source_url_; } |
| 477 const LiteralBuffer* source_mapping_url() const { | 483 const LiteralBuffer* source_mapping_url() const { |
| 478 return &source_mapping_url_; | 484 return &source_mapping_url_; |
| 479 } | 485 } |
| 480 | 486 |
| 481 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; | 487 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; |
| 482 | 488 |
| 483 private: | 489 private: |
| 484 // The current and look-ahead token. | 490 // The current and look-ahead token. |
| 485 struct TokenDesc { | 491 struct TokenDesc { |
| 486 Token::Value token; | 492 Token::Value token; |
| 487 Location location; | 493 Location location; |
| 488 LiteralBuffer* literal_chars; | 494 LiteralBuffer* literal_chars; |
| 495 LiteralBuffer* raw_literal_chars; | |
| 489 }; | 496 }; |
| 490 | 497 |
| 491 static const int kCharacterLookaheadBufferSize = 1; | 498 static const int kCharacterLookaheadBufferSize = 1; |
| 492 | 499 |
| 493 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | 500 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
| 494 uc32 ScanOctalEscape(uc32 c, int length); | 501 uc32 ScanOctalEscape(uc32 c, int length, bool recordRaw = false); |
| 495 | 502 |
| 496 // Call this after setting source_ to the input. | 503 // Call this after setting source_ to the input. |
| 497 void Init() { | 504 void Init() { |
| 498 // Set c0_ (one character ahead) | 505 // Set c0_ (one character ahead) |
| 499 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | 506 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
| 500 Advance(); | 507 Advance(); |
| 501 // Initialize current_ to not refer to a literal. | 508 // Initialize current_ to not refer to a literal. |
| 502 current_.literal_chars = NULL; | 509 current_.literal_chars = NULL; |
| 510 current_.raw_literal_chars = NULL; | |
| 503 } | 511 } |
| 504 | 512 |
| 505 // Literal buffer support | 513 // Literal buffer support |
| 506 inline void StartLiteral() { | 514 inline void StartLiteral() { |
| 507 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? | 515 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? |
| 508 &literal_buffer2_ : &literal_buffer1_; | 516 &literal_buffer2_ : &literal_buffer1_; |
| 517 LiteralBuffer* raw_buffer = (current_.raw_literal_chars == &raw_buffer1_) | |
| 518 ? &raw_buffer2_ | |
| 519 : &raw_buffer1_; | |
| 509 free_buffer->Reset(); | 520 free_buffer->Reset(); |
| 510 next_.literal_chars = free_buffer; | 521 next_.literal_chars = free_buffer; |
| 522 next_.raw_literal_chars = raw_buffer; | |
| 511 } | 523 } |
| 512 | 524 |
| 513 INLINE(void AddLiteralChar(uc32 c)) { | 525 INLINE(void AddLiteralChar(uc32 c)) { |
| 514 DCHECK_NOT_NULL(next_.literal_chars); | 526 DCHECK_NOT_NULL(next_.literal_chars); |
| 515 next_.literal_chars->AddChar(c); | 527 next_.literal_chars->AddChar(c); |
| 516 } | 528 } |
| 517 | 529 |
| 530 INLINE(void AddRawLiteralChar(uc32 c)) { | |
| 531 DCHECK_NOT_NULL(next_.raw_literal_chars); | |
| 532 next_.raw_literal_chars->AddChar(c); | |
| 533 } | |
| 534 | |
| 518 // Complete scanning of a literal. | 535 // Complete scanning of a literal. |
| 519 inline void TerminateLiteral() { | 536 inline void TerminateLiteral() { |
| 520 // Does nothing in the current implementation. | 537 // Does nothing in the current implementation. |
| 521 } | 538 } |
| 522 | 539 |
| 523 // Stops scanning of a literal and drop the collected characters, | 540 // Stops scanning of a literal and drop the collected characters, |
| 524 // e.g., due to an encountered error. | 541 // e.g., due to an encountered error. |
| 525 inline void DropLiteral() { | 542 inline void DropLiteral() { |
| 526 next_.literal_chars = NULL; | 543 next_.literal_chars = NULL; |
| 527 } | 544 } |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 576 // These functions only give the correct result if the literal | 593 // These functions only give the correct result if the literal |
| 577 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 594 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
| 578 Vector<const uint8_t> literal_one_byte_string() { | 595 Vector<const uint8_t> literal_one_byte_string() { |
| 579 DCHECK_NOT_NULL(current_.literal_chars); | 596 DCHECK_NOT_NULL(current_.literal_chars); |
| 580 return current_.literal_chars->one_byte_literal(); | 597 return current_.literal_chars->one_byte_literal(); |
| 581 } | 598 } |
| 582 Vector<const uint16_t> literal_two_byte_string() { | 599 Vector<const uint16_t> literal_two_byte_string() { |
| 583 DCHECK_NOT_NULL(current_.literal_chars); | 600 DCHECK_NOT_NULL(current_.literal_chars); |
| 584 return current_.literal_chars->two_byte_literal(); | 601 return current_.literal_chars->two_byte_literal(); |
| 585 } | 602 } |
| 603 Vector<const uint8_t> raw_one_byte_string() { | |
| 604 DCHECK_NOT_NULL(current_.raw_literal_chars); | |
| 605 return current_.raw_literal_chars->one_byte_literal(); | |
| 606 } | |
| 607 Vector<const uint16_t> raw_two_byte_string() { | |
| 608 DCHECK_NOT_NULL(current_.raw_literal_chars); | |
| 609 return current_.raw_literal_chars->two_byte_literal(); | |
| 610 } | |
| 586 bool is_literal_one_byte() { | 611 bool is_literal_one_byte() { |
| 587 DCHECK_NOT_NULL(current_.literal_chars); | 612 DCHECK_NOT_NULL(current_.literal_chars); |
| 588 return current_.literal_chars->is_one_byte(); | 613 return current_.literal_chars->is_one_byte(); |
| 589 } | 614 } |
| 615 bool is_raw_one_byte() { | |
| 616 DCHECK_NOT_NULL(current_.raw_literal_chars); | |
| 617 return current_.raw_literal_chars->is_one_byte(); | |
| 618 } | |
| 590 int literal_length() const { | 619 int literal_length() const { |
| 591 DCHECK_NOT_NULL(current_.literal_chars); | 620 DCHECK_NOT_NULL(current_.literal_chars); |
| 592 return current_.literal_chars->length(); | 621 return current_.literal_chars->length(); |
| 593 } | 622 } |
| 594 // Returns the literal string for the next token (the token that | 623 // Returns the literal string for the next token (the token that |
| 595 // would be returned if Next() were called). | 624 // would be returned if Next() were called). |
| 596 Vector<const uint8_t> next_literal_one_byte_string() { | 625 Vector<const uint8_t> next_literal_one_byte_string() { |
| 597 DCHECK_NOT_NULL(next_.literal_chars); | 626 DCHECK_NOT_NULL(next_.literal_chars); |
| 598 return next_.literal_chars->one_byte_literal(); | 627 return next_.literal_chars->one_byte_literal(); |
| 599 } | 628 } |
| 600 Vector<const uint16_t> next_literal_two_byte_string() { | 629 Vector<const uint16_t> next_literal_two_byte_string() { |
| 601 DCHECK_NOT_NULL(next_.literal_chars); | 630 DCHECK_NOT_NULL(next_.literal_chars); |
| 602 return next_.literal_chars->two_byte_literal(); | 631 return next_.literal_chars->two_byte_literal(); |
| 603 } | 632 } |
| 633 Vector<const uint8_t> next_raw_one_byte_string() { | |
| 634 DCHECK_NOT_NULL(next_.raw_literal_chars); | |
| 635 return next_.raw_literal_chars->one_byte_literal(); | |
| 636 } | |
| 637 Vector<const uint16_t> next_raw_two_byte_string() { | |
| 638 DCHECK_NOT_NULL(next_.raw_literal_chars); | |
| 639 return next_.raw_literal_chars->two_byte_literal(); | |
| 640 } | |
| 604 bool is_next_literal_one_byte() { | 641 bool is_next_literal_one_byte() { |
| 605 DCHECK_NOT_NULL(next_.literal_chars); | 642 DCHECK_NOT_NULL(next_.literal_chars); |
| 606 return next_.literal_chars->is_one_byte(); | 643 return next_.literal_chars->is_one_byte(); |
| 607 } | 644 } |
| 645 bool is_next_raw_one_byte() { | |
| 646 DCHECK_NOT_NULL(next_.raw_literal_chars); | |
| 647 return next_.raw_literal_chars->is_one_byte(); | |
| 648 } | |
| 608 int next_literal_length() const { | 649 int next_literal_length() const { |
| 609 DCHECK_NOT_NULL(next_.literal_chars); | 650 DCHECK_NOT_NULL(next_.literal_chars); |
| 610 return next_.literal_chars->length(); | 651 return next_.literal_chars->length(); |
| 611 } | 652 } |
| 612 | 653 |
| 613 uc32 ScanHexNumber(int expected_length); | 654 uc32 ScanHexNumber(int expected_length, bool recordRaw = false); |
| 614 | 655 |
| 615 // Scans a single JavaScript token. | 656 // Scans a single JavaScript token. |
| 616 void Scan(); | 657 void Scan(Mode mode = None); |
| 617 | 658 |
| 618 bool SkipWhiteSpace(); | 659 bool SkipWhiteSpace(); |
| 619 Token::Value SkipSingleLineComment(); | 660 Token::Value SkipSingleLineComment(); |
| 620 Token::Value SkipSourceURLComment(); | 661 Token::Value SkipSourceURLComment(); |
| 621 void TryToParseSourceURLComment(); | 662 void TryToParseSourceURLComment(); |
| 622 Token::Value SkipMultiLineComment(); | 663 Token::Value SkipMultiLineComment(); |
| 623 // Scans a possible HTML comment -- begins with '<!'. | 664 // Scans a possible HTML comment -- begins with '<!'. |
| 624 Token::Value ScanHtmlComment(); | 665 Token::Value ScanHtmlComment(); |
| 625 | 666 |
| 626 void ScanDecimalDigits(); | 667 void ScanDecimalDigits(); |
| 627 Token::Value ScanNumber(bool seen_period); | 668 Token::Value ScanNumber(bool seen_period); |
| 628 Token::Value ScanIdentifierOrKeyword(); | 669 Token::Value ScanIdentifierOrKeyword(); |
| 629 Token::Value ScanIdentifierSuffix(LiteralScope* literal); | 670 Token::Value ScanIdentifierSuffix(LiteralScope* literal); |
| 630 | 671 |
| 631 Token::Value ScanString(); | 672 Token::Value ScanString(); |
| 673 Token::Value ScanTemplateSpan(); | |
| 632 | 674 |
| 633 // Scans an escape-sequence which is part of a string and adds the | 675 // Scans an escape-sequence which is part of a string and adds the |
| 634 // decoded character to the current literal. Returns true if a pattern | 676 // decoded character to the current literal. Returns true if a pattern |
| 635 // is scanned. | 677 // is scanned. |
| 636 bool ScanEscape(); | 678 bool ScanEscape(bool recordRaw = false); |
| 637 // Decodes a Unicode escape-sequence which is part of an identifier. | 679 // Decodes a Unicode escape-sequence which is part of an identifier. |
| 638 // If the escape sequence cannot be decoded the result is kBadChar. | 680 // If the escape sequence cannot be decoded the result is kBadChar. |
| 639 uc32 ScanIdentifierUnicodeEscape(); | 681 uc32 ScanIdentifierUnicodeEscape(); |
| 640 // Scans a Unicode escape-sequence and adds its characters, | 682 // Scans a Unicode escape-sequence and adds its characters, |
| 641 // uninterpreted, to the current literal. Used for parsing RegExp | 683 // uninterpreted, to the current literal. Used for parsing RegExp |
| 642 // flags. | 684 // flags. |
| 643 bool ScanLiteralUnicodeEscape(); | 685 bool ScanLiteralUnicodeEscape(); |
| 644 | 686 |
| 645 // Return the current source position. | 687 // Return the current source position. |
| 646 int source_pos() { | 688 int source_pos() { |
| 647 return source_->pos() - kCharacterLookaheadBufferSize; | 689 return source_->pos() - kCharacterLookaheadBufferSize; |
| 648 } | 690 } |
| 649 | 691 |
| 650 UnicodeCache* unicode_cache_; | 692 UnicodeCache* unicode_cache_; |
| 651 | 693 |
| 652 // Buffers collecting literal strings, numbers, etc. | 694 // Buffers collecting literal strings, numbers, etc. |
| 653 LiteralBuffer literal_buffer1_; | 695 LiteralBuffer literal_buffer1_; |
| 654 LiteralBuffer literal_buffer2_; | 696 LiteralBuffer literal_buffer2_; |
| 655 | 697 |
| 698 // Buffer to store raw string values | |
| 699 LiteralBuffer raw_buffer1_; | |
| 700 LiteralBuffer raw_buffer2_; | |
| 701 | |
| 656 // Values parsed from magic comments. | 702 // Values parsed from magic comments. |
| 657 LiteralBuffer source_url_; | 703 LiteralBuffer source_url_; |
| 658 LiteralBuffer source_mapping_url_; | 704 LiteralBuffer source_mapping_url_; |
| 659 | 705 |
| 660 TokenDesc current_; // desc for current token (as returned by Next()) | 706 TokenDesc current_; // desc for current token (as returned by Next()) |
| 661 TokenDesc next_; // desc for next token (one token look-ahead) | 707 TokenDesc next_; // desc for next token (one token look-ahead) |
| 662 | 708 |
| 663 // Input stream. Must be initialized to an Utf16CharacterStream. | 709 // Input stream. Must be initialized to an Utf16CharacterStream. |
| 664 Utf16CharacterStream* source_; | 710 Utf16CharacterStream* source_; |
| 665 | 711 |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 678 // line-terminator after the current token, and before the next. | 724 // line-terminator after the current token, and before the next. |
| 679 bool has_multiline_comment_before_next_; | 725 bool has_multiline_comment_before_next_; |
| 680 // Whether we scan 'let' as a keyword for harmony block-scoped let bindings. | 726 // Whether we scan 'let' as a keyword for harmony block-scoped let bindings. |
| 681 bool harmony_scoping_; | 727 bool harmony_scoping_; |
| 682 // Whether we scan 'module', 'import', 'export' as keywords. | 728 // Whether we scan 'module', 'import', 'export' as keywords. |
| 683 bool harmony_modules_; | 729 bool harmony_modules_; |
| 684 // Whether we scan 0o777 and 0b111 as numbers. | 730 // Whether we scan 0o777 and 0b111 as numbers. |
| 685 bool harmony_numeric_literals_; | 731 bool harmony_numeric_literals_; |
| 686 // Whether we scan 'class', 'extends', 'static' and 'super' as keywords. | 732 // Whether we scan 'class', 'extends', 'static' and 'super' as keywords. |
| 687 bool harmony_classes_; | 733 bool harmony_classes_; |
| 734 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL | |
| 735 bool harmony_templates_; | |
| 688 }; | 736 }; |
| 689 | 737 |
| 690 } } // namespace v8::internal | 738 } } // namespace v8::internal |
| 691 | 739 |
| 692 #endif // V8_SCANNER_H_ | 740 #endif // V8_SCANNER_H_ |
| OLD | NEW |