| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #include <stdint.h> | 7 #include <stdint.h> |
| 8 | 8 |
| 9 #include <cmath> | 9 #include <cmath> |
| 10 | 10 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 27 } | 27 } |
| 28 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); | 28 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); |
| 29 } | 29 } |
| 30 | 30 |
| 31 | 31 |
| 32 // ---------------------------------------------------------------------------- | 32 // ---------------------------------------------------------------------------- |
| 33 // Scanner | 33 // Scanner |
| 34 | 34 |
| 35 Scanner::Scanner(UnicodeCache* unicode_cache) | 35 Scanner::Scanner(UnicodeCache* unicode_cache) |
| 36 : unicode_cache_(unicode_cache), | 36 : unicode_cache_(unicode_cache), |
| 37 capturing_raw_literal_(false), | |
| 38 octal_pos_(Location::invalid()), | 37 octal_pos_(Location::invalid()), |
| 39 harmony_scoping_(false), | 38 harmony_scoping_(false), |
| 40 harmony_modules_(false), | 39 harmony_modules_(false), |
| 41 harmony_numeric_literals_(false), | 40 harmony_numeric_literals_(false), |
| 42 harmony_classes_(false), | 41 harmony_classes_(false), |
| 43 harmony_templates_(false), | 42 harmony_templates_(false), |
| 44 harmony_unicode_(false) {} | 43 harmony_unicode_(false) {} |
| 45 | 44 |
| 46 | 45 |
| 47 void Scanner::Initialize(Utf16CharacterStream* source) { | 46 void Scanner::Initialize(Utf16CharacterStream* source) { |
| 48 source_ = source; | 47 source_ = source; |
| 49 // Need to capture identifiers in order to recognize "get" and "set" | 48 // Need to capture identifiers in order to recognize "get" and "set" |
| 50 // in object literals. | 49 // in object literals. |
| 51 Init(); | 50 Init(); |
| 52 // Skip initial whitespace allowing HTML comment ends just like | 51 // Skip initial whitespace allowing HTML comment ends just like |
| 53 // after a newline and scan first token. | 52 // after a newline and scan first token. |
| 54 has_line_terminator_before_next_ = true; | 53 has_line_terminator_before_next_ = true; |
| 55 SkipWhiteSpace(); | 54 SkipWhiteSpace(); |
| 56 Scan(); | 55 Scan(); |
| 57 } | 56 } |
| 58 | 57 |
| 59 | 58 |
| 59 template <bool capture_raw> |
| 60 uc32 Scanner::ScanHexNumber(int expected_length) { | 60 uc32 Scanner::ScanHexNumber(int expected_length) { |
| 61 DCHECK(expected_length <= 4); // prevent overflow | 61 DCHECK(expected_length <= 4); // prevent overflow |
| 62 | 62 |
| 63 uc32 x = 0; | 63 uc32 x = 0; |
| 64 for (int i = 0; i < expected_length; i++) { | 64 for (int i = 0; i < expected_length; i++) { |
| 65 int d = HexValue(c0_); | 65 int d = HexValue(c0_); |
| 66 if (d < 0) { | 66 if (d < 0) { |
| 67 return -1; | 67 return -1; |
| 68 } | 68 } |
| 69 x = x * 16 + d; | 69 x = x * 16 + d; |
| 70 Advance(); | 70 Advance<capture_raw>(); |
| 71 } | 71 } |
| 72 | 72 |
| 73 return x; | 73 return x; |
| 74 } | 74 } |
| 75 | 75 |
| 76 | 76 |
| 77 template <bool capture_raw> |
| 77 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { | 78 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { |
| 78 uc32 x = 0; | 79 uc32 x = 0; |
| 79 int d = HexValue(c0_); | 80 int d = HexValue(c0_); |
| 80 if (d < 0) { | 81 if (d < 0) { |
| 81 return -1; | 82 return -1; |
| 82 } | 83 } |
| 83 while (d >= 0) { | 84 while (d >= 0) { |
| 84 x = x * 16 + d; | 85 x = x * 16 + d; |
| 85 if (x > max_value) return -1; | 86 if (x > max_value) return -1; |
| 86 Advance(); | 87 Advance<capture_raw>(); |
| 87 d = HexValue(c0_); | 88 d = HexValue(c0_); |
| 88 } | 89 } |
| 89 return x; | 90 return x; |
| 90 } | 91 } |
| 91 | 92 |
| 92 | 93 |
| 93 // Ensure that tokens can be stored in a byte. | 94 // Ensure that tokens can be stored in a byte. |
| 94 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 95 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); |
| 95 | 96 |
| 96 // Table of one-character tokens, by character (0x00..0x7f only). | 97 // Table of one-character tokens, by character (0x00..0x7f only). |
| (...skipping 592 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 689 // This function is only called to seek to the location | 690 // This function is only called to seek to the location |
| 690 // of the end of a function (at the "}" token). It doesn't matter | 691 // of the end of a function (at the "}" token). It doesn't matter |
| 691 // whether there was a line terminator in the part we skip. | 692 // whether there was a line terminator in the part we skip. |
| 692 has_line_terminator_before_next_ = false; | 693 has_line_terminator_before_next_ = false; |
| 693 has_multiline_comment_before_next_ = false; | 694 has_multiline_comment_before_next_ = false; |
| 694 } | 695 } |
| 695 Scan(); | 696 Scan(); |
| 696 } | 697 } |
| 697 | 698 |
| 698 | 699 |
| 700 template <bool capture_raw> |
| 699 bool Scanner::ScanEscape() { | 701 bool Scanner::ScanEscape() { |
| 700 uc32 c = c0_; | 702 uc32 c = c0_; |
| 701 Advance(); | 703 Advance<capture_raw>(); |
| 702 | 704 |
| 703 // Skip escaped newlines. | 705 // Skip escaped newlines. |
| 704 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { | 706 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { |
| 705 // Allow CR+LF newlines in multiline string literals. | 707 // Allow CR+LF newlines in multiline string literals. |
| 706 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 708 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>(); |
| 707 // Allow LF+CR newlines in multiline string literals. | 709 // Allow LF+CR newlines in multiline string literals. |
| 708 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 710 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>(); |
| 709 return true; | 711 return true; |
| 710 } | 712 } |
| 711 | 713 |
| 712 switch (c) { | 714 switch (c) { |
| 713 case '\'': // fall through | 715 case '\'': // fall through |
| 714 case '"' : // fall through | 716 case '"' : // fall through |
| 715 case '\\': break; | 717 case '\\': break; |
| 716 case 'b' : c = '\b'; break; | 718 case 'b' : c = '\b'; break; |
| 717 case 'f' : c = '\f'; break; | 719 case 'f' : c = '\f'; break; |
| 718 case 'n' : c = '\n'; break; | 720 case 'n' : c = '\n'; break; |
| 719 case 'r' : c = '\r'; break; | 721 case 'r' : c = '\r'; break; |
| 720 case 't' : c = '\t'; break; | 722 case 't' : c = '\t'; break; |
| 721 case 'u' : { | 723 case 'u' : { |
| 722 c = ScanUnicodeEscape(); | 724 c = ScanUnicodeEscape<capture_raw>(); |
| 723 if (c < 0) return false; | 725 if (c < 0) return false; |
| 724 break; | 726 break; |
| 725 } | 727 } |
| 726 case 'v' : c = '\v'; break; | 728 case 'v' : c = '\v'; break; |
| 727 case 'x' : { | 729 case 'x' : { |
| 728 c = ScanHexNumber(2); | 730 c = ScanHexNumber<capture_raw>(2); |
| 729 if (c < 0) return false; | 731 if (c < 0) return false; |
| 730 break; | 732 break; |
| 731 } | 733 } |
| 732 case '0' : // fall through | 734 case '0' : // fall through |
| 733 case '1' : // fall through | 735 case '1' : // fall through |
| 734 case '2' : // fall through | 736 case '2' : // fall through |
| 735 case '3' : // fall through | 737 case '3' : // fall through |
| 736 case '4' : // fall through | 738 case '4' : // fall through |
| 737 case '5' : // fall through | 739 case '5' : // fall through |
| 738 case '6' : // fall through | 740 case '6' : // fall through |
| 739 case '7' : c = ScanOctalEscape(c, 2); break; | 741 case '7': |
| 742 c = ScanOctalEscape<capture_raw>(c, 2); |
| 743 break; |
| 740 } | 744 } |
| 741 | 745 |
| 742 // According to ECMA-262, section 7.8.4, characters not covered by the | 746 // According to ECMA-262, section 7.8.4, characters not covered by the |
| 743 // above cases should be illegal, but they are commonly handled as | 747 // above cases should be illegal, but they are commonly handled as |
| 744 // non-escaped characters by JS VMs. | 748 // non-escaped characters by JS VMs. |
| 745 AddLiteralChar(c); | 749 AddLiteralChar(c); |
| 746 return true; | 750 return true; |
| 747 } | 751 } |
| 748 | 752 |
| 749 | 753 |
| 750 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of | 754 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of |
| 751 // ECMA-262. Other JS VMs support them. | 755 // ECMA-262. Other JS VMs support them. |
| 756 template <bool capture_raw> |
| 752 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { | 757 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { |
| 753 uc32 x = c - '0'; | 758 uc32 x = c - '0'; |
| 754 int i = 0; | 759 int i = 0; |
| 755 for (; i < length; i++) { | 760 for (; i < length; i++) { |
| 756 int d = c0_ - '0'; | 761 int d = c0_ - '0'; |
| 757 if (d < 0 || d > 7) break; | 762 if (d < 0 || d > 7) break; |
| 758 int nx = x * 8 + d; | 763 int nx = x * 8 + d; |
| 759 if (nx >= 256) break; | 764 if (nx >= 256) break; |
| 760 x = nx; | 765 x = nx; |
| 761 Advance(); | 766 Advance<capture_raw>(); |
| 762 } | 767 } |
| 763 // Anything except '\0' is an octal escape sequence, illegal in strict mode. | 768 // Anything except '\0' is an octal escape sequence, illegal in strict mode. |
| 764 // Remember the position of octal escape sequences so that an error | 769 // Remember the position of octal escape sequences so that an error |
| 765 // can be reported later (in strict mode). | 770 // can be reported later (in strict mode). |
| 766 // We don't report the error immediately, because the octal escape can | 771 // We don't report the error immediately, because the octal escape can |
| 767 // occur before the "use strict" directive. | 772 // occur before the "use strict" directive. |
| 768 if (c != '0' || i > 0) { | 773 if (c != '0' || i > 0) { |
| 769 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); | 774 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); |
| 770 } | 775 } |
| 771 return x; | 776 return x; |
| 772 } | 777 } |
| 773 | 778 |
| 774 | 779 |
| 775 Token::Value Scanner::ScanString() { | 780 Token::Value Scanner::ScanString() { |
| 776 uc32 quote = c0_; | 781 uc32 quote = c0_; |
| 777 Advance(); // consume quote | 782 Advance(); // consume quote |
| 778 | 783 |
| 779 LiteralScope literal(this); | 784 LiteralScope literal(this); |
| 780 while (c0_ != quote && c0_ >= 0 | 785 while (c0_ != quote && c0_ >= 0 |
| 781 && !unicode_cache_->IsLineTerminator(c0_)) { | 786 && !unicode_cache_->IsLineTerminator(c0_)) { |
| 782 uc32 c = c0_; | 787 uc32 c = c0_; |
| 783 Advance(); | 788 Advance(); |
| 784 if (c == '\\') { | 789 if (c == '\\') { |
| 785 if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL; | 790 if (c0_ < 0 || !ScanEscape<false>()) return Token::ILLEGAL; |
| 786 } else { | 791 } else { |
| 787 AddLiteralChar(c); | 792 AddLiteralChar(c); |
| 788 } | 793 } |
| 789 } | 794 } |
| 790 if (c0_ != quote) return Token::ILLEGAL; | 795 if (c0_ != quote) return Token::ILLEGAL; |
| 791 literal.Complete(); | 796 literal.Complete(); |
| 792 | 797 |
| 793 Advance(); // consume quote | 798 Advance(); // consume quote |
| 794 return Token::STRING; | 799 return Token::STRING; |
| 795 } | 800 } |
| 796 | 801 |
| 797 | 802 |
| 798 Token::Value Scanner::ScanTemplateSpan() { | 803 Token::Value Scanner::ScanTemplateSpan() { |
| 799 // When scanning a TemplateSpan, we are looking for the following construct: | 804 // When scanning a TemplateSpan, we are looking for the following construct: |
| 800 // TEMPLATE_SPAN :: | 805 // TEMPLATE_SPAN :: |
| 801 // ` LiteralChars* ${ | 806 // ` LiteralChars* ${ |
| 802 // | } LiteralChars* ${ | 807 // | } LiteralChars* ${ |
| 803 // | 808 // |
| 804 // TEMPLATE_TAIL :: | 809 // TEMPLATE_TAIL :: |
| 805 // ` LiteralChars* ` | 810 // ` LiteralChars* ` |
| 806 // | } LiteralChar* ` | 811 // | } LiteralChar* ` |
| 807 // | 812 // |
| 808 // A TEMPLATE_SPAN should always be followed by an Expression, while a | 813 // A TEMPLATE_SPAN should always be followed by an Expression, while a |
| 809 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be | 814 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be |
| 810 // followed by an Expression. | 815 // followed by an Expression. |
| 811 | 816 |
| 812 Token::Value result = Token::TEMPLATE_SPAN; | 817 Token::Value result = Token::TEMPLATE_SPAN; |
| 813 LiteralScope literal(this, true); | 818 LiteralScope literal(this); |
| 819 StartRawLiteral(); |
| 820 const bool capture_raw = true; |
| 814 | 821 |
| 815 while (true) { | 822 while (true) { |
| 816 uc32 c = c0_; | 823 uc32 c = c0_; |
| 817 Advance(); | 824 Advance<capture_raw>(); |
| 818 if (c == '`') { | 825 if (c == '`') { |
| 819 result = Token::TEMPLATE_TAIL; | 826 result = Token::TEMPLATE_TAIL; |
| 820 ReduceRawLiteralLength(1); | 827 ReduceRawLiteralLength(1); |
| 821 break; | 828 break; |
| 822 } else if (c == '$' && c0_ == '{') { | 829 } else if (c == '$' && c0_ == '{') { |
| 823 Advance(); // Consume '{' | 830 Advance<capture_raw>(); // Consume '{' |
| 824 ReduceRawLiteralLength(2); | 831 ReduceRawLiteralLength(2); |
| 825 break; | 832 break; |
| 826 } else if (c == '\\') { | 833 } else if (c == '\\') { |
| 827 if (unicode_cache_->IsLineTerminator(c0_)) { | 834 if (unicode_cache_->IsLineTerminator(c0_)) { |
| 828 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty | 835 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty |
| 829 // code unit sequence. | 836 // code unit sequence. |
| 830 uc32 lastChar = c0_; | 837 uc32 lastChar = c0_; |
| 831 Advance(); | 838 Advance<capture_raw>(); |
| 832 if (lastChar == '\r') { | 839 if (lastChar == '\r') { |
| 833 ReduceRawLiteralLength(1); // Remove \r | 840 ReduceRawLiteralLength(1); // Remove \r |
| 834 if (c0_ == '\n') { | 841 if (c0_ == '\n') { |
| 835 Advance(); // Adds \n | 842 Advance<capture_raw>(); // Adds \n |
| 836 } else { | 843 } else { |
| 837 AddRawLiteralChar('\n'); | 844 AddRawLiteralChar('\n'); |
| 838 } | 845 } |
| 839 } | 846 } |
| 840 } else if (c0_ == '0') { | 847 } else if (c0_ == '0') { |
| 841 Advance(); | 848 Advance<capture_raw>(); |
| 842 AddLiteralChar('0'); | 849 AddLiteralChar('0'); |
| 843 } else { | 850 } else { |
| 844 ScanEscape(); | 851 ScanEscape<true>(); |
| 845 } | 852 } |
| 846 } else if (c < 0) { | 853 } else if (c < 0) { |
| 847 // Unterminated template literal | 854 // Unterminated template literal |
| 848 PushBack(c); | 855 PushBack(c); |
| 849 break; | 856 break; |
| 850 } else { | 857 } else { |
| 851 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A. | 858 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A. |
| 852 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence | 859 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence |
| 853 // consisting of the CV 0x000A. | 860 // consisting of the CV 0x000A. |
| 854 if (c == '\r') { | 861 if (c == '\r') { |
| 855 ReduceRawLiteralLength(1); // Remove \r | 862 ReduceRawLiteralLength(1); // Remove \r |
| 856 if (c0_ == '\n') { | 863 if (c0_ == '\n') { |
| 857 Advance(); // Adds \n | 864 Advance<capture_raw>(); // Adds \n |
| 858 } else { | 865 } else { |
| 859 AddRawLiteralChar('\n'); | 866 AddRawLiteralChar('\n'); |
| 860 } | 867 } |
| 861 c = '\n'; | 868 c = '\n'; |
| 862 } | 869 } |
| 863 AddLiteralChar(c); | 870 AddLiteralChar(c); |
| 864 } | 871 } |
| 865 } | 872 } |
| 866 literal.Complete(); | 873 literal.Complete(); |
| 867 next_.location.end_pos = source_pos(); | 874 next_.location.end_pos = source_pos(); |
| (...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 995 literal.Complete(); | 1002 literal.Complete(); |
| 996 | 1003 |
| 997 return Token::NUMBER; | 1004 return Token::NUMBER; |
| 998 } | 1005 } |
| 999 | 1006 |
| 1000 | 1007 |
| 1001 uc32 Scanner::ScanIdentifierUnicodeEscape() { | 1008 uc32 Scanner::ScanIdentifierUnicodeEscape() { |
| 1002 Advance(); | 1009 Advance(); |
| 1003 if (c0_ != 'u') return -1; | 1010 if (c0_ != 'u') return -1; |
| 1004 Advance(); | 1011 Advance(); |
| 1005 return ScanUnicodeEscape(); | 1012 return ScanUnicodeEscape<false>(); |
| 1006 } | 1013 } |
| 1007 | 1014 |
| 1008 | 1015 |
| 1016 template <bool capture_raw> |
| 1009 uc32 Scanner::ScanUnicodeEscape() { | 1017 uc32 Scanner::ScanUnicodeEscape() { |
| 1010 // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are | 1018 // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are |
| 1011 // allowed). In the latter case, the number of hex digits between { } is | 1019 // allowed). In the latter case, the number of hex digits between { } is |
| 1012 // arbitrary. \ and u have already been read. | 1020 // arbitrary. \ and u have already been read. |
| 1013 if (c0_ == '{' && HarmonyUnicode()) { | 1021 if (c0_ == '{' && HarmonyUnicode()) { |
| 1014 Advance(); | 1022 Advance<capture_raw>(); |
| 1015 uc32 cp = ScanUnlimitedLengthHexNumber(0x10ffff); | 1023 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff); |
| 1016 if (cp < 0) { | 1024 if (cp < 0) { |
| 1017 return -1; | 1025 return -1; |
| 1018 } | 1026 } |
| 1019 if (c0_ != '}') { | 1027 if (c0_ != '}') { |
| 1020 return -1; | 1028 return -1; |
| 1021 } | 1029 } |
| 1022 Advance(); | 1030 Advance<capture_raw>(); |
| 1023 return cp; | 1031 return cp; |
| 1024 } | 1032 } |
| 1025 return ScanHexNumber(4); | 1033 return ScanHexNumber<capture_raw>(4); |
| 1026 } | 1034 } |
| 1027 | 1035 |
| 1028 | 1036 |
| 1029 // ---------------------------------------------------------------------------- | 1037 // ---------------------------------------------------------------------------- |
| 1030 // Keyword Matcher | 1038 // Keyword Matcher |
| 1031 | 1039 |
| 1032 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ | 1040 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ |
| 1033 KEYWORD_GROUP('b') \ | 1041 KEYWORD_GROUP('b') \ |
| 1034 KEYWORD("break", Token::BREAK) \ | 1042 KEYWORD("break", Token::BREAK) \ |
| 1035 KEYWORD_GROUP('c') \ | 1043 KEYWORD_GROUP('c') \ |
| (...skipping 434 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1470 } | 1478 } |
| 1471 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1479 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
| 1472 } | 1480 } |
| 1473 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1481 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
| 1474 | 1482 |
| 1475 backing_store_.AddBlock(bytes); | 1483 backing_store_.AddBlock(bytes); |
| 1476 return backing_store_.EndSequence().start(); | 1484 return backing_store_.EndSequence().start(); |
| 1477 } | 1485 } |
| 1478 | 1486 |
| 1479 } } // namespace v8::internal | 1487 } } // namespace v8::internal |
| OLD | NEW |