Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(76)

Side by Side Diff: src/scanner.cc

Issue 766193003: Make template scan related function take a template<bool> parameter (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Remove empty TerminateLiteral and use default template param Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/scanner.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <cmath> 9 #include <cmath>
10 10
(...skipping 16 matching lines...) Expand all
27 } 27 }
28 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); 28 return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
29 } 29 }
30 30
31 31
32 // ---------------------------------------------------------------------------- 32 // ----------------------------------------------------------------------------
33 // Scanner 33 // Scanner
34 34
35 Scanner::Scanner(UnicodeCache* unicode_cache) 35 Scanner::Scanner(UnicodeCache* unicode_cache)
36 : unicode_cache_(unicode_cache), 36 : unicode_cache_(unicode_cache),
37 capturing_raw_literal_(false),
38 octal_pos_(Location::invalid()), 37 octal_pos_(Location::invalid()),
39 harmony_scoping_(false), 38 harmony_scoping_(false),
40 harmony_modules_(false), 39 harmony_modules_(false),
41 harmony_numeric_literals_(false), 40 harmony_numeric_literals_(false),
42 harmony_classes_(false), 41 harmony_classes_(false),
43 harmony_templates_(false), 42 harmony_templates_(false),
44 harmony_unicode_(false) {} 43 harmony_unicode_(false) {}
45 44
46 45
47 void Scanner::Initialize(Utf16CharacterStream* source) { 46 void Scanner::Initialize(Utf16CharacterStream* source) {
48 source_ = source; 47 source_ = source;
49 // Need to capture identifiers in order to recognize "get" and "set" 48 // Need to capture identifiers in order to recognize "get" and "set"
50 // in object literals. 49 // in object literals.
51 Init(); 50 Init();
52 // Skip initial whitespace allowing HTML comment ends just like 51 // Skip initial whitespace allowing HTML comment ends just like
53 // after a newline and scan first token. 52 // after a newline and scan first token.
54 has_line_terminator_before_next_ = true; 53 has_line_terminator_before_next_ = true;
55 SkipWhiteSpace(); 54 SkipWhiteSpace();
56 Scan(); 55 Scan();
57 } 56 }
58 57
59 58
59 template <bool capture_raw>
60 uc32 Scanner::ScanHexNumber(int expected_length) { 60 uc32 Scanner::ScanHexNumber(int expected_length) {
61 DCHECK(expected_length <= 4); // prevent overflow 61 DCHECK(expected_length <= 4); // prevent overflow
62 62
63 uc32 x = 0; 63 uc32 x = 0;
64 for (int i = 0; i < expected_length; i++) { 64 for (int i = 0; i < expected_length; i++) {
65 int d = HexValue(c0_); 65 int d = HexValue(c0_);
66 if (d < 0) { 66 if (d < 0) {
67 return -1; 67 return -1;
68 } 68 }
69 x = x * 16 + d; 69 x = x * 16 + d;
70 Advance(); 70 Advance<capture_raw>();
71 } 71 }
72 72
73 return x; 73 return x;
74 } 74 }
75 75
76 76
77 template <bool capture_raw>
77 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { 78 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {
78 uc32 x = 0; 79 uc32 x = 0;
79 int d = HexValue(c0_); 80 int d = HexValue(c0_);
80 if (d < 0) { 81 if (d < 0) {
81 return -1; 82 return -1;
82 } 83 }
83 while (d >= 0) { 84 while (d >= 0) {
84 x = x * 16 + d; 85 x = x * 16 + d;
85 if (x > max_value) return -1; 86 if (x > max_value) return -1;
86 Advance(); 87 Advance<capture_raw>();
87 d = HexValue(c0_); 88 d = HexValue(c0_);
88 } 89 }
89 return x; 90 return x;
90 } 91 }
91 92
92 93
93 // Ensure that tokens can be stored in a byte. 94 // Ensure that tokens can be stored in a byte.
94 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); 95 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
95 96
96 // Table of one-character tokens, by character (0x00..0x7f only). 97 // Table of one-character tokens, by character (0x00..0x7f only).
(...skipping 592 matching lines...) Expand 10 before | Expand all | Expand 10 after
689 // This function is only called to seek to the location 690 // This function is only called to seek to the location
690 // of the end of a function (at the "}" token). It doesn't matter 691 // of the end of a function (at the "}" token). It doesn't matter
691 // whether there was a line terminator in the part we skip. 692 // whether there was a line terminator in the part we skip.
692 has_line_terminator_before_next_ = false; 693 has_line_terminator_before_next_ = false;
693 has_multiline_comment_before_next_ = false; 694 has_multiline_comment_before_next_ = false;
694 } 695 }
695 Scan(); 696 Scan();
696 } 697 }
697 698
698 699
700 template <bool capture_raw>
699 bool Scanner::ScanEscape() { 701 bool Scanner::ScanEscape() {
700 uc32 c = c0_; 702 uc32 c = c0_;
701 Advance(); 703 Advance<capture_raw>();
702 704
703 // Skip escaped newlines. 705 // Skip escaped newlines.
704 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { 706 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {
705 // Allow CR+LF newlines in multiline string literals. 707 // Allow CR+LF newlines in multiline string literals.
706 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); 708 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
707 // Allow LF+CR newlines in multiline string literals. 709 // Allow LF+CR newlines in multiline string literals.
708 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); 710 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>();
709 return true; 711 return true;
710 } 712 }
711 713
712 switch (c) { 714 switch (c) {
713 case '\'': // fall through 715 case '\'': // fall through
714 case '"' : // fall through 716 case '"' : // fall through
715 case '\\': break; 717 case '\\': break;
716 case 'b' : c = '\b'; break; 718 case 'b' : c = '\b'; break;
717 case 'f' : c = '\f'; break; 719 case 'f' : c = '\f'; break;
718 case 'n' : c = '\n'; break; 720 case 'n' : c = '\n'; break;
719 case 'r' : c = '\r'; break; 721 case 'r' : c = '\r'; break;
720 case 't' : c = '\t'; break; 722 case 't' : c = '\t'; break;
721 case 'u' : { 723 case 'u' : {
722 c = ScanUnicodeEscape(); 724 c = ScanUnicodeEscape<capture_raw>();
723 if (c < 0) return false; 725 if (c < 0) return false;
724 break; 726 break;
725 } 727 }
726 case 'v' : c = '\v'; break; 728 case 'v' : c = '\v'; break;
727 case 'x' : { 729 case 'x' : {
728 c = ScanHexNumber(2); 730 c = ScanHexNumber<capture_raw>(2);
729 if (c < 0) return false; 731 if (c < 0) return false;
730 break; 732 break;
731 } 733 }
732 case '0' : // fall through 734 case '0' : // fall through
733 case '1' : // fall through 735 case '1' : // fall through
734 case '2' : // fall through 736 case '2' : // fall through
735 case '3' : // fall through 737 case '3' : // fall through
736 case '4' : // fall through 738 case '4' : // fall through
737 case '5' : // fall through 739 case '5' : // fall through
738 case '6' : // fall through 740 case '6' : // fall through
739 case '7' : c = ScanOctalEscape(c, 2); break; 741 case '7':
742 c = ScanOctalEscape<capture_raw>(c, 2);
743 break;
740 } 744 }
741 745
742 // According to ECMA-262, section 7.8.4, characters not covered by the 746 // According to ECMA-262, section 7.8.4, characters not covered by the
743 // above cases should be illegal, but they are commonly handled as 747 // above cases should be illegal, but they are commonly handled as
744 // non-escaped characters by JS VMs. 748 // non-escaped characters by JS VMs.
745 AddLiteralChar(c); 749 AddLiteralChar(c);
746 return true; 750 return true;
747 } 751 }
748 752
749 753
750 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of 754 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
751 // ECMA-262. Other JS VMs support them. 755 // ECMA-262. Other JS VMs support them.
756 template <bool capture_raw>
752 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { 757 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
753 uc32 x = c - '0'; 758 uc32 x = c - '0';
754 int i = 0; 759 int i = 0;
755 for (; i < length; i++) { 760 for (; i < length; i++) {
756 int d = c0_ - '0'; 761 int d = c0_ - '0';
757 if (d < 0 || d > 7) break; 762 if (d < 0 || d > 7) break;
758 int nx = x * 8 + d; 763 int nx = x * 8 + d;
759 if (nx >= 256) break; 764 if (nx >= 256) break;
760 x = nx; 765 x = nx;
761 Advance(); 766 Advance<capture_raw>();
762 } 767 }
763 // Anything except '\0' is an octal escape sequence, illegal in strict mode. 768 // Anything except '\0' is an octal escape sequence, illegal in strict mode.
764 // Remember the position of octal escape sequences so that an error 769 // Remember the position of octal escape sequences so that an error
765 // can be reported later (in strict mode). 770 // can be reported later (in strict mode).
766 // We don't report the error immediately, because the octal escape can 771 // We don't report the error immediately, because the octal escape can
767 // occur before the "use strict" directive. 772 // occur before the "use strict" directive.
768 if (c != '0' || i > 0) { 773 if (c != '0' || i > 0) {
769 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); 774 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
770 } 775 }
771 return x; 776 return x;
772 } 777 }
773 778
774 779
775 Token::Value Scanner::ScanString() { 780 Token::Value Scanner::ScanString() {
776 uc32 quote = c0_; 781 uc32 quote = c0_;
777 Advance(); // consume quote 782 Advance(); // consume quote
778 783
779 LiteralScope literal(this); 784 LiteralScope literal(this);
780 while (c0_ != quote && c0_ >= 0 785 while (c0_ != quote && c0_ >= 0
781 && !unicode_cache_->IsLineTerminator(c0_)) { 786 && !unicode_cache_->IsLineTerminator(c0_)) {
782 uc32 c = c0_; 787 uc32 c = c0_;
783 Advance(); 788 Advance();
784 if (c == '\\') { 789 if (c == '\\') {
785 if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL; 790 if (c0_ < 0 || !ScanEscape<false>()) return Token::ILLEGAL;
786 } else { 791 } else {
787 AddLiteralChar(c); 792 AddLiteralChar(c);
788 } 793 }
789 } 794 }
790 if (c0_ != quote) return Token::ILLEGAL; 795 if (c0_ != quote) return Token::ILLEGAL;
791 literal.Complete(); 796 literal.Complete();
792 797
793 Advance(); // consume quote 798 Advance(); // consume quote
794 return Token::STRING; 799 return Token::STRING;
795 } 800 }
796 801
797 802
798 Token::Value Scanner::ScanTemplateSpan() { 803 Token::Value Scanner::ScanTemplateSpan() {
799 // When scanning a TemplateSpan, we are looking for the following construct: 804 // When scanning a TemplateSpan, we are looking for the following construct:
800 // TEMPLATE_SPAN :: 805 // TEMPLATE_SPAN ::
801 // ` LiteralChars* ${ 806 // ` LiteralChars* ${
802 // | } LiteralChars* ${ 807 // | } LiteralChars* ${
803 // 808 //
804 // TEMPLATE_TAIL :: 809 // TEMPLATE_TAIL ::
805 // ` LiteralChars* ` 810 // ` LiteralChars* `
806 // | } LiteralChar* ` 811 // | } LiteralChar* `
807 // 812 //
808 // A TEMPLATE_SPAN should always be followed by an Expression, while a 813 // A TEMPLATE_SPAN should always be followed by an Expression, while a
809 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be 814 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be
810 // followed by an Expression. 815 // followed by an Expression.
811 816
812 Token::Value result = Token::TEMPLATE_SPAN; 817 Token::Value result = Token::TEMPLATE_SPAN;
813 LiteralScope literal(this, true); 818 LiteralScope literal(this);
819 StartRawLiteral();
820 const bool capture_raw = true;
814 821
815 while (true) { 822 while (true) {
816 uc32 c = c0_; 823 uc32 c = c0_;
817 Advance(); 824 Advance<capture_raw>();
818 if (c == '`') { 825 if (c == '`') {
819 result = Token::TEMPLATE_TAIL; 826 result = Token::TEMPLATE_TAIL;
820 ReduceRawLiteralLength(1); 827 ReduceRawLiteralLength(1);
821 break; 828 break;
822 } else if (c == '$' && c0_ == '{') { 829 } else if (c == '$' && c0_ == '{') {
823 Advance(); // Consume '{' 830 Advance<capture_raw>(); // Consume '{'
824 ReduceRawLiteralLength(2); 831 ReduceRawLiteralLength(2);
825 break; 832 break;
826 } else if (c == '\\') { 833 } else if (c == '\\') {
827 if (unicode_cache_->IsLineTerminator(c0_)) { 834 if (unicode_cache_->IsLineTerminator(c0_)) {
828 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty 835 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
829 // code unit sequence. 836 // code unit sequence.
830 uc32 lastChar = c0_; 837 uc32 lastChar = c0_;
831 Advance(); 838 Advance<capture_raw>();
832 if (lastChar == '\r') { 839 if (lastChar == '\r') {
833 ReduceRawLiteralLength(1); // Remove \r 840 ReduceRawLiteralLength(1); // Remove \r
834 if (c0_ == '\n') { 841 if (c0_ == '\n') {
835 Advance(); // Adds \n 842 Advance<capture_raw>(); // Adds \n
836 } else { 843 } else {
837 AddRawLiteralChar('\n'); 844 AddRawLiteralChar('\n');
838 } 845 }
839 } 846 }
840 } else if (c0_ == '0') { 847 } else if (c0_ == '0') {
841 Advance(); 848 Advance<capture_raw>();
842 AddLiteralChar('0'); 849 AddLiteralChar('0');
843 } else { 850 } else {
844 ScanEscape(); 851 ScanEscape<true>();
845 } 852 }
846 } else if (c < 0) { 853 } else if (c < 0) {
847 // Unterminated template literal 854 // Unterminated template literal
848 PushBack(c); 855 PushBack(c);
849 break; 856 break;
850 } else { 857 } else {
851 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A. 858 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
852 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence 859 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
853 // consisting of the CV 0x000A. 860 // consisting of the CV 0x000A.
854 if (c == '\r') { 861 if (c == '\r') {
855 ReduceRawLiteralLength(1); // Remove \r 862 ReduceRawLiteralLength(1); // Remove \r
856 if (c0_ == '\n') { 863 if (c0_ == '\n') {
857 Advance(); // Adds \n 864 Advance<capture_raw>(); // Adds \n
858 } else { 865 } else {
859 AddRawLiteralChar('\n'); 866 AddRawLiteralChar('\n');
860 } 867 }
861 c = '\n'; 868 c = '\n';
862 } 869 }
863 AddLiteralChar(c); 870 AddLiteralChar(c);
864 } 871 }
865 } 872 }
866 literal.Complete(); 873 literal.Complete();
867 next_.location.end_pos = source_pos(); 874 next_.location.end_pos = source_pos();
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after
995 literal.Complete(); 1002 literal.Complete();
996 1003
997 return Token::NUMBER; 1004 return Token::NUMBER;
998 } 1005 }
999 1006
1000 1007
1001 uc32 Scanner::ScanIdentifierUnicodeEscape() { 1008 uc32 Scanner::ScanIdentifierUnicodeEscape() {
1002 Advance(); 1009 Advance();
1003 if (c0_ != 'u') return -1; 1010 if (c0_ != 'u') return -1;
1004 Advance(); 1011 Advance();
1005 return ScanUnicodeEscape(); 1012 return ScanUnicodeEscape<false>();
1006 } 1013 }
1007 1014
1008 1015
1016 template <bool capture_raw>
1009 uc32 Scanner::ScanUnicodeEscape() { 1017 uc32 Scanner::ScanUnicodeEscape() {
1010 // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are 1018 // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
1011 // allowed). In the latter case, the number of hex digits between { } is 1019 // allowed). In the latter case, the number of hex digits between { } is
1012 // arbitrary. \ and u have already been read. 1020 // arbitrary. \ and u have already been read.
1013 if (c0_ == '{' && HarmonyUnicode()) { 1021 if (c0_ == '{' && HarmonyUnicode()) {
1014 Advance(); 1022 Advance<capture_raw>();
1015 uc32 cp = ScanUnlimitedLengthHexNumber(0x10ffff); 1023 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff);
1016 if (cp < 0) { 1024 if (cp < 0) {
1017 return -1; 1025 return -1;
1018 } 1026 }
1019 if (c0_ != '}') { 1027 if (c0_ != '}') {
1020 return -1; 1028 return -1;
1021 } 1029 }
1022 Advance(); 1030 Advance<capture_raw>();
1023 return cp; 1031 return cp;
1024 } 1032 }
1025 return ScanHexNumber(4); 1033 return ScanHexNumber<capture_raw>(4);
1026 } 1034 }
1027 1035
1028 1036
1029 // ---------------------------------------------------------------------------- 1037 // ----------------------------------------------------------------------------
1030 // Keyword Matcher 1038 // Keyword Matcher
1031 1039
1032 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ 1040 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
1033 KEYWORD_GROUP('b') \ 1041 KEYWORD_GROUP('b') \
1034 KEYWORD("break", Token::BREAK) \ 1042 KEYWORD("break", Token::BREAK) \
1035 KEYWORD_GROUP('c') \ 1043 KEYWORD_GROUP('c') \
(...skipping 434 matching lines...) Expand 10 before | Expand all | Expand 10 after
1470 } 1478 }
1471 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); 1479 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
1472 } 1480 }
1473 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); 1481 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
1474 1482
1475 backing_store_.AddBlock(bytes); 1483 backing_store_.AddBlock(bytes);
1476 return backing_store_.EndSequence().start(); 1484 return backing_store_.EndSequence().start();
1477 } 1485 }
1478 1486
1479 } } // namespace v8::internal 1487 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698