OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include <stdint.h> | 7 #include <stdint.h> |
8 | 8 |
9 #include <cmath> | 9 #include <cmath> |
10 | 10 |
(...skipping 16 matching lines...) Expand all Loading... |
27 } | 27 } |
28 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); | 28 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); |
29 } | 29 } |
30 | 30 |
31 | 31 |
32 // ---------------------------------------------------------------------------- | 32 // ---------------------------------------------------------------------------- |
33 // Scanner | 33 // Scanner |
34 | 34 |
35 Scanner::Scanner(UnicodeCache* unicode_cache) | 35 Scanner::Scanner(UnicodeCache* unicode_cache) |
36 : unicode_cache_(unicode_cache), | 36 : unicode_cache_(unicode_cache), |
37 capturing_raw_literal_(false), | |
38 octal_pos_(Location::invalid()), | 37 octal_pos_(Location::invalid()), |
39 harmony_scoping_(false), | 38 harmony_scoping_(false), |
40 harmony_modules_(false), | 39 harmony_modules_(false), |
41 harmony_numeric_literals_(false), | 40 harmony_numeric_literals_(false), |
42 harmony_classes_(false), | 41 harmony_classes_(false), |
43 harmony_templates_(false), | 42 harmony_templates_(false), |
44 harmony_unicode_(false) {} | 43 harmony_unicode_(false) {} |
45 | 44 |
46 | 45 |
47 void Scanner::Initialize(Utf16CharacterStream* source) { | 46 void Scanner::Initialize(Utf16CharacterStream* source) { |
48 source_ = source; | 47 source_ = source; |
49 // Need to capture identifiers in order to recognize "get" and "set" | 48 // Need to capture identifiers in order to recognize "get" and "set" |
50 // in object literals. | 49 // in object literals. |
51 Init(); | 50 Init(); |
52 // Skip initial whitespace allowing HTML comment ends just like | 51 // Skip initial whitespace allowing HTML comment ends just like |
53 // after a newline and scan first token. | 52 // after a newline and scan first token. |
54 has_line_terminator_before_next_ = true; | 53 has_line_terminator_before_next_ = true; |
55 SkipWhiteSpace(); | 54 SkipWhiteSpace(); |
56 Scan(); | 55 Scan(); |
57 } | 56 } |
58 | 57 |
59 | 58 |
| 59 template <bool capture_raw> |
60 uc32 Scanner::ScanHexNumber(int expected_length) { | 60 uc32 Scanner::ScanHexNumber(int expected_length) { |
61 DCHECK(expected_length <= 4); // prevent overflow | 61 DCHECK(expected_length <= 4); // prevent overflow |
62 | 62 |
63 uc32 x = 0; | 63 uc32 x = 0; |
64 for (int i = 0; i < expected_length; i++) { | 64 for (int i = 0; i < expected_length; i++) { |
65 int d = HexValue(c0_); | 65 int d = HexValue(c0_); |
66 if (d < 0) { | 66 if (d < 0) { |
67 return -1; | 67 return -1; |
68 } | 68 } |
69 x = x * 16 + d; | 69 x = x * 16 + d; |
70 Advance(); | 70 Advance<capture_raw>(); |
71 } | 71 } |
72 | 72 |
73 return x; | 73 return x; |
74 } | 74 } |
75 | 75 |
76 | 76 |
| 77 template <bool capture_raw> |
77 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { | 78 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { |
78 uc32 x = 0; | 79 uc32 x = 0; |
79 int d = HexValue(c0_); | 80 int d = HexValue(c0_); |
80 if (d < 0) { | 81 if (d < 0) { |
81 return -1; | 82 return -1; |
82 } | 83 } |
83 while (d >= 0) { | 84 while (d >= 0) { |
84 x = x * 16 + d; | 85 x = x * 16 + d; |
85 if (x > max_value) return -1; | 86 if (x > max_value) return -1; |
86 Advance(); | 87 Advance<capture_raw>(); |
87 d = HexValue(c0_); | 88 d = HexValue(c0_); |
88 } | 89 } |
89 return x; | 90 return x; |
90 } | 91 } |
91 | 92 |
92 | 93 |
93 // Ensure that tokens can be stored in a byte. | 94 // Ensure that tokens can be stored in a byte. |
94 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 95 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); |
95 | 96 |
96 // Table of one-character tokens, by character (0x00..0x7f only). | 97 // Table of one-character tokens, by character (0x00..0x7f only). |
(...skipping 592 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
689 // This function is only called to seek to the location | 690 // This function is only called to seek to the location |
690 // of the end of a function (at the "}" token). It doesn't matter | 691 // of the end of a function (at the "}" token). It doesn't matter |
691 // whether there was a line terminator in the part we skip. | 692 // whether there was a line terminator in the part we skip. |
692 has_line_terminator_before_next_ = false; | 693 has_line_terminator_before_next_ = false; |
693 has_multiline_comment_before_next_ = false; | 694 has_multiline_comment_before_next_ = false; |
694 } | 695 } |
695 Scan(); | 696 Scan(); |
696 } | 697 } |
697 | 698 |
698 | 699 |
| 700 template <bool capture_raw> |
699 bool Scanner::ScanEscape() { | 701 bool Scanner::ScanEscape() { |
700 uc32 c = c0_; | 702 uc32 c = c0_; |
701 Advance(); | 703 Advance<capture_raw>(); |
702 | 704 |
703 // Skip escaped newlines. | 705 // Skip escaped newlines. |
704 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { | 706 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { |
705 // Allow CR+LF newlines in multiline string literals. | 707 // Allow CR+LF newlines in multiline string literals. |
706 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 708 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>(); |
707 // Allow LF+CR newlines in multiline string literals. | 709 // Allow LF+CR newlines in multiline string literals. |
708 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 710 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>(); |
709 return true; | 711 return true; |
710 } | 712 } |
711 | 713 |
712 switch (c) { | 714 switch (c) { |
713 case '\'': // fall through | 715 case '\'': // fall through |
714 case '"' : // fall through | 716 case '"' : // fall through |
715 case '\\': break; | 717 case '\\': break; |
716 case 'b' : c = '\b'; break; | 718 case 'b' : c = '\b'; break; |
717 case 'f' : c = '\f'; break; | 719 case 'f' : c = '\f'; break; |
718 case 'n' : c = '\n'; break; | 720 case 'n' : c = '\n'; break; |
719 case 'r' : c = '\r'; break; | 721 case 'r' : c = '\r'; break; |
720 case 't' : c = '\t'; break; | 722 case 't' : c = '\t'; break; |
721 case 'u' : { | 723 case 'u' : { |
722 c = ScanUnicodeEscape(); | 724 c = ScanUnicodeEscape<capture_raw>(); |
723 if (c < 0) return false; | 725 if (c < 0) return false; |
724 break; | 726 break; |
725 } | 727 } |
726 case 'v' : c = '\v'; break; | 728 case 'v' : c = '\v'; break; |
727 case 'x' : { | 729 case 'x' : { |
728 c = ScanHexNumber(2); | 730 c = ScanHexNumber<capture_raw>(2); |
729 if (c < 0) return false; | 731 if (c < 0) return false; |
730 break; | 732 break; |
731 } | 733 } |
732 case '0' : // fall through | 734 case '0' : // fall through |
733 case '1' : // fall through | 735 case '1' : // fall through |
734 case '2' : // fall through | 736 case '2' : // fall through |
735 case '3' : // fall through | 737 case '3' : // fall through |
736 case '4' : // fall through | 738 case '4' : // fall through |
737 case '5' : // fall through | 739 case '5' : // fall through |
738 case '6' : // fall through | 740 case '6' : // fall through |
739 case '7' : c = ScanOctalEscape(c, 2); break; | 741 case '7': |
| 742 c = ScanOctalEscape<capture_raw>(c, 2); |
| 743 break; |
740 } | 744 } |
741 | 745 |
742 // According to ECMA-262, section 7.8.4, characters not covered by the | 746 // According to ECMA-262, section 7.8.4, characters not covered by the |
743 // above cases should be illegal, but they are commonly handled as | 747 // above cases should be illegal, but they are commonly handled as |
744 // non-escaped characters by JS VMs. | 748 // non-escaped characters by JS VMs. |
745 AddLiteralChar(c); | 749 AddLiteralChar(c); |
746 return true; | 750 return true; |
747 } | 751 } |
748 | 752 |
749 | 753 |
750 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of | 754 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of |
751 // ECMA-262. Other JS VMs support them. | 755 // ECMA-262. Other JS VMs support them. |
| 756 template <bool capture_raw> |
752 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { | 757 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { |
753 uc32 x = c - '0'; | 758 uc32 x = c - '0'; |
754 int i = 0; | 759 int i = 0; |
755 for (; i < length; i++) { | 760 for (; i < length; i++) { |
756 int d = c0_ - '0'; | 761 int d = c0_ - '0'; |
757 if (d < 0 || d > 7) break; | 762 if (d < 0 || d > 7) break; |
758 int nx = x * 8 + d; | 763 int nx = x * 8 + d; |
759 if (nx >= 256) break; | 764 if (nx >= 256) break; |
760 x = nx; | 765 x = nx; |
761 Advance(); | 766 Advance<capture_raw>(); |
762 } | 767 } |
763 // Anything except '\0' is an octal escape sequence, illegal in strict mode. | 768 // Anything except '\0' is an octal escape sequence, illegal in strict mode. |
764 // Remember the position of octal escape sequences so that an error | 769 // Remember the position of octal escape sequences so that an error |
765 // can be reported later (in strict mode). | 770 // can be reported later (in strict mode). |
766 // We don't report the error immediately, because the octal escape can | 771 // We don't report the error immediately, because the octal escape can |
767 // occur before the "use strict" directive. | 772 // occur before the "use strict" directive. |
768 if (c != '0' || i > 0) { | 773 if (c != '0' || i > 0) { |
769 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); | 774 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); |
770 } | 775 } |
771 return x; | 776 return x; |
772 } | 777 } |
773 | 778 |
774 | 779 |
775 Token::Value Scanner::ScanString() { | 780 Token::Value Scanner::ScanString() { |
776 uc32 quote = c0_; | 781 uc32 quote = c0_; |
777 Advance(); // consume quote | 782 Advance(); // consume quote |
778 | 783 |
779 LiteralScope literal(this); | 784 LiteralScope literal(this); |
780 while (c0_ != quote && c0_ >= 0 | 785 while (c0_ != quote && c0_ >= 0 |
781 && !unicode_cache_->IsLineTerminator(c0_)) { | 786 && !unicode_cache_->IsLineTerminator(c0_)) { |
782 uc32 c = c0_; | 787 uc32 c = c0_; |
783 Advance(); | 788 Advance(); |
784 if (c == '\\') { | 789 if (c == '\\') { |
785 if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL; | 790 if (c0_ < 0 || !ScanEscape<false>()) return Token::ILLEGAL; |
786 } else { | 791 } else { |
787 AddLiteralChar(c); | 792 AddLiteralChar(c); |
788 } | 793 } |
789 } | 794 } |
790 if (c0_ != quote) return Token::ILLEGAL; | 795 if (c0_ != quote) return Token::ILLEGAL; |
791 literal.Complete(); | 796 literal.Complete(); |
792 | 797 |
793 Advance(); // consume quote | 798 Advance(); // consume quote |
794 return Token::STRING; | 799 return Token::STRING; |
795 } | 800 } |
796 | 801 |
797 | 802 |
798 Token::Value Scanner::ScanTemplateSpan() { | 803 Token::Value Scanner::ScanTemplateSpan() { |
799 // When scanning a TemplateSpan, we are looking for the following construct: | 804 // When scanning a TemplateSpan, we are looking for the following construct: |
800 // TEMPLATE_SPAN :: | 805 // TEMPLATE_SPAN :: |
801 // ` LiteralChars* ${ | 806 // ` LiteralChars* ${ |
802 // | } LiteralChars* ${ | 807 // | } LiteralChars* ${ |
803 // | 808 // |
804 // TEMPLATE_TAIL :: | 809 // TEMPLATE_TAIL :: |
805 // ` LiteralChars* ` | 810 // ` LiteralChars* ` |
806 // | } LiteralChar* ` | 811 // | } LiteralChar* ` |
807 // | 812 // |
808 // A TEMPLATE_SPAN should always be followed by an Expression, while a | 813 // A TEMPLATE_SPAN should always be followed by an Expression, while a |
809 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be | 814 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be |
810 // followed by an Expression. | 815 // followed by an Expression. |
811 | 816 |
812 Token::Value result = Token::TEMPLATE_SPAN; | 817 Token::Value result = Token::TEMPLATE_SPAN; |
813 LiteralScope literal(this, true); | 818 LiteralScope literal(this); |
| 819 StartRawLiteral(); |
| 820 const bool capture_raw = true; |
814 | 821 |
815 while (true) { | 822 while (true) { |
816 uc32 c = c0_; | 823 uc32 c = c0_; |
817 Advance(); | 824 Advance<capture_raw>(); |
818 if (c == '`') { | 825 if (c == '`') { |
819 result = Token::TEMPLATE_TAIL; | 826 result = Token::TEMPLATE_TAIL; |
820 ReduceRawLiteralLength(1); | 827 ReduceRawLiteralLength(1); |
821 break; | 828 break; |
822 } else if (c == '$' && c0_ == '{') { | 829 } else if (c == '$' && c0_ == '{') { |
823 Advance(); // Consume '{' | 830 Advance<capture_raw>(); // Consume '{' |
824 ReduceRawLiteralLength(2); | 831 ReduceRawLiteralLength(2); |
825 break; | 832 break; |
826 } else if (c == '\\') { | 833 } else if (c == '\\') { |
827 if (unicode_cache_->IsLineTerminator(c0_)) { | 834 if (unicode_cache_->IsLineTerminator(c0_)) { |
828 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty | 835 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty |
829 // code unit sequence. | 836 // code unit sequence. |
830 uc32 lastChar = c0_; | 837 uc32 lastChar = c0_; |
831 Advance(); | 838 Advance<capture_raw>(); |
832 if (lastChar == '\r') { | 839 if (lastChar == '\r') { |
833 ReduceRawLiteralLength(1); // Remove \r | 840 ReduceRawLiteralLength(1); // Remove \r |
834 if (c0_ == '\n') { | 841 if (c0_ == '\n') { |
835 Advance(); // Adds \n | 842 Advance<capture_raw>(); // Adds \n |
836 } else { | 843 } else { |
837 AddRawLiteralChar('\n'); | 844 AddRawLiteralChar('\n'); |
838 } | 845 } |
839 } | 846 } |
840 } else if (c0_ == '0') { | 847 } else if (c0_ == '0') { |
841 Advance(); | 848 Advance<capture_raw>(); |
842 AddLiteralChar('0'); | 849 AddLiteralChar('0'); |
843 } else { | 850 } else { |
844 ScanEscape(); | 851 ScanEscape<true>(); |
845 } | 852 } |
846 } else if (c < 0) { | 853 } else if (c < 0) { |
847 // Unterminated template literal | 854 // Unterminated template literal |
848 PushBack(c); | 855 PushBack(c); |
849 break; | 856 break; |
850 } else { | 857 } else { |
851 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A. | 858 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A. |
852 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence | 859 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence |
853 // consisting of the CV 0x000A. | 860 // consisting of the CV 0x000A. |
854 if (c == '\r') { | 861 if (c == '\r') { |
855 ReduceRawLiteralLength(1); // Remove \r | 862 ReduceRawLiteralLength(1); // Remove \r |
856 if (c0_ == '\n') { | 863 if (c0_ == '\n') { |
857 Advance(); // Adds \n | 864 Advance<capture_raw>(); // Adds \n |
858 } else { | 865 } else { |
859 AddRawLiteralChar('\n'); | 866 AddRawLiteralChar('\n'); |
860 } | 867 } |
861 c = '\n'; | 868 c = '\n'; |
862 } | 869 } |
863 AddLiteralChar(c); | 870 AddLiteralChar(c); |
864 } | 871 } |
865 } | 872 } |
866 literal.Complete(); | 873 literal.Complete(); |
867 next_.location.end_pos = source_pos(); | 874 next_.location.end_pos = source_pos(); |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
995 literal.Complete(); | 1002 literal.Complete(); |
996 | 1003 |
997 return Token::NUMBER; | 1004 return Token::NUMBER; |
998 } | 1005 } |
999 | 1006 |
1000 | 1007 |
1001 uc32 Scanner::ScanIdentifierUnicodeEscape() { | 1008 uc32 Scanner::ScanIdentifierUnicodeEscape() { |
1002 Advance(); | 1009 Advance(); |
1003 if (c0_ != 'u') return -1; | 1010 if (c0_ != 'u') return -1; |
1004 Advance(); | 1011 Advance(); |
1005 return ScanUnicodeEscape(); | 1012 return ScanUnicodeEscape<false>(); |
1006 } | 1013 } |
1007 | 1014 |
1008 | 1015 |
| 1016 template <bool capture_raw> |
1009 uc32 Scanner::ScanUnicodeEscape() { | 1017 uc32 Scanner::ScanUnicodeEscape() { |
1010 // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are | 1018 // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are |
1011 // allowed). In the latter case, the number of hex digits between { } is | 1019 // allowed). In the latter case, the number of hex digits between { } is |
1012 // arbitrary. \ and u have already been read. | 1020 // arbitrary. \ and u have already been read. |
1013 if (c0_ == '{' && HarmonyUnicode()) { | 1021 if (c0_ == '{' && HarmonyUnicode()) { |
1014 Advance(); | 1022 Advance<capture_raw>(); |
1015 uc32 cp = ScanUnlimitedLengthHexNumber(0x10ffff); | 1023 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff); |
1016 if (cp < 0) { | 1024 if (cp < 0) { |
1017 return -1; | 1025 return -1; |
1018 } | 1026 } |
1019 if (c0_ != '}') { | 1027 if (c0_ != '}') { |
1020 return -1; | 1028 return -1; |
1021 } | 1029 } |
1022 Advance(); | 1030 Advance<capture_raw>(); |
1023 return cp; | 1031 return cp; |
1024 } | 1032 } |
1025 return ScanHexNumber(4); | 1033 return ScanHexNumber<capture_raw>(4); |
1026 } | 1034 } |
1027 | 1035 |
1028 | 1036 |
1029 // ---------------------------------------------------------------------------- | 1037 // ---------------------------------------------------------------------------- |
1030 // Keyword Matcher | 1038 // Keyword Matcher |
1031 | 1039 |
1032 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ | 1040 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ |
1033 KEYWORD_GROUP('b') \ | 1041 KEYWORD_GROUP('b') \ |
1034 KEYWORD("break", Token::BREAK) \ | 1042 KEYWORD("break", Token::BREAK) \ |
1035 KEYWORD_GROUP('c') \ | 1043 KEYWORD_GROUP('c') \ |
(...skipping 434 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1470 } | 1478 } |
1471 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1479 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
1472 } | 1480 } |
1473 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1481 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
1474 | 1482 |
1475 backing_store_.AddBlock(bytes); | 1483 backing_store_.AddBlock(bytes); |
1476 return backing_store_.EndSequence().start(); | 1484 return backing_store_.EndSequence().start(); |
1477 } | 1485 } |
1478 | 1486 |
1479 } } // namespace v8::internal | 1487 } } // namespace v8::internal |
OLD | NEW |