| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #ifndef V8_SCANNER_H_ | 7 #ifndef V8_SCANNER_H_ |
| 8 #define V8_SCANNER_H_ | 8 #define V8_SCANNER_H_ |
| 9 | 9 |
| 10 #include "src/allocation.h" | 10 #include "src/allocation.h" |
| (...skipping 304 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 315 | 315 |
| 316 // ---------------------------------------------------------------------------- | 316 // ---------------------------------------------------------------------------- |
| 317 // JavaScript Scanner. | 317 // JavaScript Scanner. |
| 318 | 318 |
| 319 class Scanner { | 319 class Scanner { |
| 320 public: | 320 public: |
| 321 // Scoped helper for literal recording. Automatically drops the literal | 321 // Scoped helper for literal recording. Automatically drops the literal |
| 322 // if aborting the scanning before it's complete. | 322 // if aborting the scanning before it's complete. |
| 323 class LiteralScope { | 323 class LiteralScope { |
| 324 public: | 324 public: |
| 325 explicit LiteralScope(Scanner* self, bool capture_raw = false) | 325 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) { |
| 326 : scanner_(self), complete_(false) { | |
| 327 scanner_->StartLiteral(); | 326 scanner_->StartLiteral(); |
| 328 if (capture_raw) scanner_->StartRawLiteral(); | |
| 329 } | 327 } |
| 330 ~LiteralScope() { | 328 ~LiteralScope() { |
| 331 if (!complete_) scanner_->DropLiteral(); | 329 if (!complete_) scanner_->DropLiteral(); |
| 332 } | 330 } |
| 333 void Complete() { | 331 void Complete() { |
| 334 scanner_->TerminateLiteral(); | |
| 335 complete_ = true; | 332 complete_ = true; |
| 336 } | 333 } |
| 337 | 334 |
| 338 private: | 335 private: |
| 339 Scanner* scanner_; | 336 Scanner* scanner_; |
| 340 bool complete_; | 337 bool complete_; |
| 341 }; | 338 }; |
| 342 | 339 |
| 343 // Representation of an interval of source positions. | 340 // Representation of an interval of source positions. |
| 344 struct Location { | 341 struct Location { |
| (...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 499 struct TokenDesc { | 496 struct TokenDesc { |
| 500 Token::Value token; | 497 Token::Value token; |
| 501 Location location; | 498 Location location; |
| 502 LiteralBuffer* literal_chars; | 499 LiteralBuffer* literal_chars; |
| 503 LiteralBuffer* raw_literal_chars; | 500 LiteralBuffer* raw_literal_chars; |
| 504 }; | 501 }; |
| 505 | 502 |
| 506 static const int kCharacterLookaheadBufferSize = 1; | 503 static const int kCharacterLookaheadBufferSize = 1; |
| 507 | 504 |
| 508 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | 505 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
| 506 template <bool capture_raw> |
| 509 uc32 ScanOctalEscape(uc32 c, int length); | 507 uc32 ScanOctalEscape(uc32 c, int length); |
| 510 | 508 |
| 511 // Call this after setting source_ to the input. | 509 // Call this after setting source_ to the input. |
| 512 void Init() { | 510 void Init() { |
| 513 // Set c0_ (one character ahead) | 511 // Set c0_ (one character ahead) |
| 514 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | 512 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
| 515 Advance(); | 513 Advance(); |
| 516 // Initialize current_ to not refer to a literal. | 514 // Initialize current_ to not refer to a literal. |
| 517 current_.literal_chars = NULL; | 515 current_.literal_chars = NULL; |
| 518 current_.raw_literal_chars = NULL; | 516 current_.raw_literal_chars = NULL; |
| 519 } | 517 } |
| 520 | 518 |
| 521 // Literal buffer support | 519 // Literal buffer support |
| 522 inline void StartLiteral() { | 520 inline void StartLiteral() { |
| 523 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? | 521 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? |
| 524 &literal_buffer2_ : &literal_buffer1_; | 522 &literal_buffer2_ : &literal_buffer1_; |
| 525 free_buffer->Reset(); | 523 free_buffer->Reset(); |
| 526 next_.literal_chars = free_buffer; | 524 next_.literal_chars = free_buffer; |
| 527 } | 525 } |
| 528 | 526 |
| 529 inline void StartRawLiteral() { | 527 inline void StartRawLiteral() { |
| 530 raw_literal_buffer_.Reset(); | 528 raw_literal_buffer_.Reset(); |
| 531 next_.raw_literal_chars = &raw_literal_buffer_; | 529 next_.raw_literal_chars = &raw_literal_buffer_; |
| 532 capturing_raw_literal_ = true; | |
| 533 } | 530 } |
| 534 | 531 |
| 535 INLINE(void AddLiteralChar(uc32 c)) { | 532 INLINE(void AddLiteralChar(uc32 c)) { |
| 536 DCHECK_NOT_NULL(next_.literal_chars); | 533 DCHECK_NOT_NULL(next_.literal_chars); |
| 537 next_.literal_chars->AddChar(c); | 534 next_.literal_chars->AddChar(c); |
| 538 } | 535 } |
| 539 | 536 |
| 540 INLINE(void AddRawLiteralChar(uc32 c)) { | 537 INLINE(void AddRawLiteralChar(uc32 c)) { |
| 541 DCHECK(capturing_raw_literal_); | |
| 542 DCHECK_NOT_NULL(next_.raw_literal_chars); | 538 DCHECK_NOT_NULL(next_.raw_literal_chars); |
| 543 next_.raw_literal_chars->AddChar(c); | 539 next_.raw_literal_chars->AddChar(c); |
| 544 } | 540 } |
| 545 | 541 |
| 546 INLINE(void ReduceRawLiteralLength(int delta)) { | 542 INLINE(void ReduceRawLiteralLength(int delta)) { |
| 547 DCHECK(capturing_raw_literal_); | |
| 548 DCHECK_NOT_NULL(next_.raw_literal_chars); | 543 DCHECK_NOT_NULL(next_.raw_literal_chars); |
| 549 next_.raw_literal_chars->ReduceLength(delta); | 544 next_.raw_literal_chars->ReduceLength(delta); |
| 550 } | 545 } |
| 551 | 546 |
| 552 // Complete scanning of a literal. | |
| 553 inline void TerminateLiteral() { capturing_raw_literal_ = false; } | |
| 554 | |
| 555 // Stops scanning of a literal and drop the collected characters, | 547 // Stops scanning of a literal and drop the collected characters, |
| 556 // e.g., due to an encountered error. | 548 // e.g., due to an encountered error. |
| 557 inline void DropLiteral() { | 549 inline void DropLiteral() { |
| 558 next_.literal_chars = NULL; | 550 next_.literal_chars = NULL; |
| 559 next_.raw_literal_chars = NULL; | 551 next_.raw_literal_chars = NULL; |
| 560 capturing_raw_literal_ = false; | |
| 561 } | 552 } |
| 562 | 553 |
| 563 inline void AddLiteralCharAdvance() { | 554 inline void AddLiteralCharAdvance() { |
| 564 AddLiteralChar(c0_); | 555 AddLiteralChar(c0_); |
| 565 Advance(); | 556 Advance(); |
| 566 } | 557 } |
| 567 | 558 |
| 568 // Low-level scanning support. | 559 // Low-level scanning support. |
| 560 template <bool capture_raw = false> |
| 569 void Advance() { | 561 void Advance() { |
| 570 if (capturing_raw_literal_) { | 562 if (capture_raw) { |
| 571 AddRawLiteralChar(c0_); | 563 AddRawLiteralChar(c0_); |
| 572 } | 564 } |
| 573 c0_ = source_->Advance(); | 565 c0_ = source_->Advance(); |
| 574 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { | 566 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { |
| 575 uc32 c1 = source_->Advance(); | 567 uc32 c1 = source_->Advance(); |
| 576 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { | 568 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { |
| 577 source_->PushBack(c1); | 569 source_->PushBack(c1); |
| 578 } else { | 570 } else { |
| 579 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); | 571 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); |
| 580 } | 572 } |
| 581 } | 573 } |
| 582 } | 574 } |
| 583 | 575 |
| 584 void PushBack(uc32 ch) { | 576 void PushBack(uc32 ch) { |
| 585 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { | 577 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { |
| 586 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); | 578 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); |
| 587 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); | 579 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); |
| 588 if (capturing_raw_literal_) ReduceRawLiteralLength(2); | |
| 589 } else { | 580 } else { |
| 590 source_->PushBack(c0_); | 581 source_->PushBack(c0_); |
| 591 if (capturing_raw_literal_) ReduceRawLiteralLength(1); | |
| 592 } | 582 } |
| 593 c0_ = ch; | 583 c0_ = ch; |
| 594 } | 584 } |
| 595 | 585 |
| 596 inline Token::Value Select(Token::Value tok) { | 586 inline Token::Value Select(Token::Value tok) { |
| 597 Advance(); | 587 Advance(); |
| 598 return tok; | 588 return tok; |
| 599 } | 589 } |
| 600 | 590 |
| 601 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 591 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
| 602 Advance(); | 592 Advance(); |
| 603 if (c0_ == next) { | 593 if (c0_ == next) { |
| 604 Advance(); | 594 Advance(); |
| 605 return then; | 595 return then; |
| 606 } else { | 596 } else { |
| 607 return else_; | 597 return else_; |
| 608 } | 598 } |
| 609 } | 599 } |
| 610 | 600 |
| 611 // Returns the literal string, if any, for the current token (the | 601 // Returns the literal string, if any, for the current token (the |
| 612 // token last returned by Next()). The string is 0-terminated. | 602 // token last returned by Next()). The string is 0-terminated. |
| 613 // Literal strings are collected for identifiers, strings, numbers as well | 603 // Literal strings are collected for identifiers, strings, numbers as well |
| 614 // as for template literals. For template literals we also collect the raw | 604 // as for template literals. For template literals we also collect the raw |
| 615 // form. | 605 // form. |
| 616 // These functions only give the correct result if the literal | 606 // These functions only give the correct result if the literal was scanned |
| 617 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 607 // when a LiteralScope object is alive. |
| 618 Vector<const uint8_t> literal_one_byte_string() { | 608 Vector<const uint8_t> literal_one_byte_string() { |
| 619 DCHECK_NOT_NULL(current_.literal_chars); | 609 DCHECK_NOT_NULL(current_.literal_chars); |
| 620 return current_.literal_chars->one_byte_literal(); | 610 return current_.literal_chars->one_byte_literal(); |
| 621 } | 611 } |
| 622 Vector<const uint16_t> literal_two_byte_string() { | 612 Vector<const uint16_t> literal_two_byte_string() { |
| 623 DCHECK_NOT_NULL(current_.literal_chars); | 613 DCHECK_NOT_NULL(current_.literal_chars); |
| 624 return current_.literal_chars->two_byte_literal(); | 614 return current_.literal_chars->two_byte_literal(); |
| 625 } | 615 } |
| 626 bool is_literal_one_byte() { | 616 bool is_literal_one_byte() { |
| 627 DCHECK_NOT_NULL(current_.literal_chars); | 617 DCHECK_NOT_NULL(current_.literal_chars); |
| (...skipping 23 matching lines...) Expand all Loading... |
| 651 } | 641 } |
| 652 Vector<const uint16_t> raw_literal_two_byte_string() { | 642 Vector<const uint16_t> raw_literal_two_byte_string() { |
| 653 DCHECK_NOT_NULL(current_.raw_literal_chars); | 643 DCHECK_NOT_NULL(current_.raw_literal_chars); |
| 654 return current_.raw_literal_chars->two_byte_literal(); | 644 return current_.raw_literal_chars->two_byte_literal(); |
| 655 } | 645 } |
| 656 bool is_raw_literal_one_byte() { | 646 bool is_raw_literal_one_byte() { |
| 657 DCHECK_NOT_NULL(current_.raw_literal_chars); | 647 DCHECK_NOT_NULL(current_.raw_literal_chars); |
| 658 return current_.raw_literal_chars->is_one_byte(); | 648 return current_.raw_literal_chars->is_one_byte(); |
| 659 } | 649 } |
| 660 | 650 |
| 661 | 651 template <bool capture_raw> |
| 662 uc32 ScanHexNumber(int expected_length); | 652 uc32 ScanHexNumber(int expected_length); |
| 663 // Scan a number of any length but not bigger than max_value. For example, the | 653 // Scan a number of any length but not bigger than max_value. For example, the |
| 664 // number can be 000000001, so it's very long in characters but its value is | 654 // number can be 000000001, so it's very long in characters but its value is |
| 665 // small. | 655 // small. |
| 656 template <bool capture_raw> |
| 666 uc32 ScanUnlimitedLengthHexNumber(int max_value); | 657 uc32 ScanUnlimitedLengthHexNumber(int max_value); |
| 667 | 658 |
| 668 // Scans a single JavaScript token. | 659 // Scans a single JavaScript token. |
| 669 void Scan(); | 660 void Scan(); |
| 670 | 661 |
| 671 bool SkipWhiteSpace(); | 662 bool SkipWhiteSpace(); |
| 672 Token::Value SkipSingleLineComment(); | 663 Token::Value SkipSingleLineComment(); |
| 673 Token::Value SkipSourceURLComment(); | 664 Token::Value SkipSourceURLComment(); |
| 674 void TryToParseSourceURLComment(); | 665 void TryToParseSourceURLComment(); |
| 675 Token::Value SkipMultiLineComment(); | 666 Token::Value SkipMultiLineComment(); |
| 676 // Scans a possible HTML comment -- begins with '<!'. | 667 // Scans a possible HTML comment -- begins with '<!'. |
| 677 Token::Value ScanHtmlComment(); | 668 Token::Value ScanHtmlComment(); |
| 678 | 669 |
| 679 void ScanDecimalDigits(); | 670 void ScanDecimalDigits(); |
| 680 Token::Value ScanNumber(bool seen_period); | 671 Token::Value ScanNumber(bool seen_period); |
| 681 Token::Value ScanIdentifierOrKeyword(); | 672 Token::Value ScanIdentifierOrKeyword(); |
| 682 Token::Value ScanIdentifierSuffix(LiteralScope* literal); | 673 Token::Value ScanIdentifierSuffix(LiteralScope* literal); |
| 683 | 674 |
| 684 Token::Value ScanString(); | 675 Token::Value ScanString(); |
| 685 | 676 |
| 686 // Scans an escape-sequence which is part of a string and adds the | 677 // Scans an escape-sequence which is part of a string and adds the |
| 687 // decoded character to the current literal. Returns true if a pattern | 678 // decoded character to the current literal. Returns true if a pattern |
| 688 // is scanned. | 679 // is scanned. |
| 680 template <bool capture_raw> |
| 689 bool ScanEscape(); | 681 bool ScanEscape(); |
| 690 // Decodes a Unicode escape-sequence which is part of an identifier. | 682 // Decodes a Unicode escape-sequence which is part of an identifier. |
| 691 // If the escape sequence cannot be decoded the result is kBadChar. | 683 // If the escape sequence cannot be decoded the result is kBadChar. |
| 692 uc32 ScanIdentifierUnicodeEscape(); | 684 uc32 ScanIdentifierUnicodeEscape(); |
| 693 // Helper for the above functions. | 685 // Helper for the above functions. |
| 686 template <bool capture_raw> |
| 694 uc32 ScanUnicodeEscape(); | 687 uc32 ScanUnicodeEscape(); |
| 695 | 688 |
| 696 Token::Value ScanTemplateSpan(); | 689 Token::Value ScanTemplateSpan(); |
| 697 | 690 |
| 698 // Return the current source position. | 691 // Return the current source position. |
| 699 int source_pos() { | 692 int source_pos() { |
| 700 return source_->pos() - kCharacterLookaheadBufferSize; | 693 return source_->pos() - kCharacterLookaheadBufferSize; |
| 701 } | 694 } |
| 702 | 695 |
| 703 UnicodeCache* unicode_cache_; | 696 UnicodeCache* unicode_cache_; |
| 704 | 697 |
| 705 // Buffers collecting literal strings, numbers, etc. | 698 // Buffers collecting literal strings, numbers, etc. |
| 706 LiteralBuffer literal_buffer1_; | 699 LiteralBuffer literal_buffer1_; |
| 707 LiteralBuffer literal_buffer2_; | 700 LiteralBuffer literal_buffer2_; |
| 708 | 701 |
| 709 // Values parsed from magic comments. | 702 // Values parsed from magic comments. |
| 710 LiteralBuffer source_url_; | 703 LiteralBuffer source_url_; |
| 711 LiteralBuffer source_mapping_url_; | 704 LiteralBuffer source_mapping_url_; |
| 712 | 705 |
| 713 // Buffer to store raw string values | 706 // Buffer to store raw string values |
| 714 LiteralBuffer raw_literal_buffer_; | 707 LiteralBuffer raw_literal_buffer_; |
| 715 | 708 |
| 716 // We only need to capture the raw literal when we are scanning template | |
| 717 // literal spans. | |
| 718 bool capturing_raw_literal_; | |
| 719 | |
| 720 TokenDesc current_; // desc for current token (as returned by Next()) | 709 TokenDesc current_; // desc for current token (as returned by Next()) |
| 721 TokenDesc next_; // desc for next token (one token look-ahead) | 710 TokenDesc next_; // desc for next token (one token look-ahead) |
| 722 | 711 |
| 723 // Input stream. Must be initialized to an Utf16CharacterStream. | 712 // Input stream. Must be initialized to an Utf16CharacterStream. |
| 724 Utf16CharacterStream* source_; | 713 Utf16CharacterStream* source_; |
| 725 | 714 |
| 726 | 715 |
| 727 // Start position of the octal literal last scanned. | 716 // Start position of the octal literal last scanned. |
| 728 Location octal_pos_; | 717 Location octal_pos_; |
| 729 | 718 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 747 bool harmony_classes_; | 736 bool harmony_classes_; |
| 748 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL | 737 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL |
| 749 bool harmony_templates_; | 738 bool harmony_templates_; |
| 750 // Whether we allow \u{xxxxx}. | 739 // Whether we allow \u{xxxxx}. |
| 751 bool harmony_unicode_; | 740 bool harmony_unicode_; |
| 752 }; | 741 }; |
| 753 | 742 |
| 754 } } // namespace v8::internal | 743 } } // namespace v8::internal |
| 755 | 744 |
| 756 #endif // V8_SCANNER_H_ | 745 #endif // V8_SCANNER_H_ |
| OLD | NEW |