Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(177)

Side by Side Diff: src/scanner.cc

Issue 987083003: [es6] support rest parameters in arrow functions (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Fix preparser bug Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <cmath> 9 #include <cmath>
10 10
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
45 45
46 void Scanner::Initialize(Utf16CharacterStream* source) { 46 void Scanner::Initialize(Utf16CharacterStream* source) {
47 source_ = source; 47 source_ = source;
48 // Need to capture identifiers in order to recognize "get" and "set" 48 // Need to capture identifiers in order to recognize "get" and "set"
49 // in object literals. 49 // in object literals.
50 Init(); 50 Init();
51 // Skip initial whitespace allowing HTML comment ends just like 51 // Skip initial whitespace allowing HTML comment ends just like
52 // after a newline and scan first token. 52 // after a newline and scan first token.
53 has_line_terminator_before_next_ = true; 53 has_line_terminator_before_next_ = true;
54 SkipWhiteSpace(); 54 SkipWhiteSpace();
55 Scan(); 55 peek_count_ = 1;
56 Scan(&next_[0]);
56 } 57 }
57 58
58 59
59 template <bool capture_raw> 60 template <bool capture_raw>
60 uc32 Scanner::ScanHexNumber(int expected_length) { 61 uc32 Scanner::ScanHexNumber(int expected_length) {
61 DCHECK(expected_length <= 4); // prevent overflow 62 DCHECK(expected_length <= 4); // prevent overflow
62 63
63 uc32 x = 0; 64 uc32 x = 0;
64 for (int i = 0; i < expected_length; i++) { 65 for (int i = 0; i < expected_length; i++) {
65 int d = HexValue(c0_); 66 int d = HexValue(c0_);
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
221 Token::ILLEGAL, 222 Token::ILLEGAL,
222 Token::LBRACE, // 0x7b 223 Token::LBRACE, // 0x7b
223 Token::ILLEGAL, 224 Token::ILLEGAL,
224 Token::RBRACE, // 0x7d 225 Token::RBRACE, // 0x7d
225 Token::BIT_NOT, // 0x7e 226 Token::BIT_NOT, // 0x7e
226 Token::ILLEGAL 227 Token::ILLEGAL
227 }; 228 };
228 229
229 230
230 Token::Value Scanner::Next() { 231 Token::Value Scanner::Next() {
231 current_ = next_; 232 current_ = next_[0];
233 if (peek_count_ > 1) {
234 std::memmove(&next_[0], &next_[1], (peek_count_ - 1) * sizeof(TokenDesc));
235 next_[--peek_count_].token = Token::ILLEGAL;
236 return current_.token;
237 }
232 has_line_terminator_before_next_ = false; 238 has_line_terminator_before_next_ = false;
233 has_multiline_comment_before_next_ = false; 239 has_multiline_comment_before_next_ = false;
234 if (static_cast<unsigned>(c0_) <= 0x7f) { 240 if (static_cast<unsigned>(c0_) <= 0x7f) {
235 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); 241 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
236 if (token != Token::ILLEGAL) { 242 if (token != Token::ILLEGAL) {
237 int pos = source_pos(); 243 int pos = source_pos();
238 next_.token = token; 244 next_[0].token = token;
239 next_.location.beg_pos = pos; 245 next_[0].location.beg_pos = pos;
240 next_.location.end_pos = pos + 1; 246 next_[0].location.end_pos = pos + 1;
241 Advance(); 247 Advance();
242 return current_.token; 248 return current_.token;
243 } 249 }
244 } 250 }
245 Scan(); 251 Scan(&next_[0]);
246 return current_.token; 252 return current_.token;
247 } 253 }
248 254
249 255
250 // TODO(yangguo): check whether this is actually necessary. 256 // TODO(yangguo): check whether this is actually necessary.
251 static inline bool IsLittleEndianByteOrderMark(uc32 c) { 257 static inline bool IsLittleEndianByteOrderMark(uc32 c) {
252 // The Unicode value U+FFFE is guaranteed never to be assigned as a 258 // The Unicode value U+FFFE is guaranteed never to be assigned as a
253 // Unicode character; this implies that in a Unicode context the 259 // Unicode character; this implies that in a Unicode context the
254 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 260 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
255 // character expressed in little-endian byte order (since it could 261 // character expressed in little-endian byte order (since it could
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after
413 Advance(); 419 Advance();
414 if (c0_ == '-') return SkipSingleLineComment(); 420 if (c0_ == '-') return SkipSingleLineComment();
415 PushBack('-'); // undo Advance() 421 PushBack('-'); // undo Advance()
416 } 422 }
417 PushBack('!'); // undo Advance() 423 PushBack('!'); // undo Advance()
418 DCHECK(c0_ == '!'); 424 DCHECK(c0_ == '!');
419 return Token::LT; 425 return Token::LT;
420 } 426 }
421 427
422 428
423 void Scanner::Scan() { 429 Token::Value Scanner::peek(int n) {
424 next_.literal_chars = NULL; 430 if (n == 0) return peek();
425 next_.raw_literal_chars = NULL; 431 PeekScan(n);
432 return next_[n].token;
433 }
434
435
436 Scanner::Location Scanner::peek_location(int n) {
437 if (n == 0) return peek_location();
438 PeekScan(n);
439 return next_[n].location;
440 }
441
442
443 void Scanner::PeekScan(int count) {
444 DCHECK(count > 0 && count < kMaxLookahead);
445 while (peek_count_ <= count) {
446 Scan(&next_[peek_count_++]);
447 }
448 }
449
450
451 void Scanner::Scan(TokenDesc* next) {
452 next->literal_chars = NULL;
453 next->raw_literal_chars = NULL;
426 Token::Value token; 454 Token::Value token;
427 do { 455 do {
428 // Remember the position of the next token 456 // Remember the position of the next token
429 next_.location.beg_pos = source_pos(); 457 next->location.beg_pos = source_pos();
430 458
431 switch (c0_) { 459 switch (c0_) {
432 case ' ': 460 case ' ':
433 case '\t': 461 case '\t':
434 Advance(); 462 Advance();
435 token = Token::WHITESPACE; 463 token = Token::WHITESPACE;
436 break; 464 break;
437 465
438 case '\n': 466 case '\n':
439 Advance(); 467 Advance();
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
673 } else { 701 } else {
674 token = Select(Token::ILLEGAL); 702 token = Select(Token::ILLEGAL);
675 } 703 }
676 break; 704 break;
677 } 705 }
678 706
679 // Continue scanning for tokens as long as we're just skipping 707 // Continue scanning for tokens as long as we're just skipping
680 // whitespace. 708 // whitespace.
681 } while (token == Token::WHITESPACE); 709 } while (token == Token::WHITESPACE);
682 710
683 next_.location.end_pos = source_pos(); 711 next->location.end_pos = source_pos();
684 next_.token = token; 712 next->token = token;
685 } 713 }
686 714
687 715
688 void Scanner::SeekForward(int pos) { 716 void Scanner::SeekForward(int pos) {
717 // Not supported when there is multiple lookahead tokens
marja 2015/03/10 09:11:19 ... what prevents this from happening?
caitp (gmail) 2015/03/10 14:47:51 it just seemed complicated to make it work with mu
718 DCHECK(peek_count_ < 2);
719
689 // After this call, we will have the token at the given position as 720 // After this call, we will have the token at the given position as
690 // the "next" token. The "current" token will be invalid. 721 // the "next" token. The "current" token will be invalid.
691 if (pos == next_.location.beg_pos) return; 722 TokenDesc* next = &next_[0];
723 if (pos == next->location.beg_pos) return;
692 int current_pos = source_pos(); 724 int current_pos = source_pos();
693 DCHECK_EQ(next_.location.end_pos, current_pos); 725 DCHECK_EQ(next->location.end_pos, current_pos);
694 // Positions inside the lookahead token aren't supported. 726 // Positions inside the lookahead token aren't supported.
695 DCHECK(pos >= current_pos); 727 DCHECK(pos >= current_pos);
696 if (pos != current_pos) { 728 if (pos != current_pos) {
697 source_->SeekForward(pos - source_->pos()); 729 source_->SeekForward(pos - source_->pos());
698 Advance(); 730 Advance();
699 // This function is only called to seek to the location 731 // This function is only called to seek to the location
700 // of the end of a function (at the "}" token). It doesn't matter 732 // of the end of a function (at the "}" token). It doesn't matter
701 // whether there was a line terminator in the part we skip. 733 // whether there was a line terminator in the part we skip.
702 has_line_terminator_before_next_ = false; 734 has_line_terminator_before_next_ = false;
703 has_multiline_comment_before_next_ = false; 735 has_multiline_comment_before_next_ = false;
704 } 736 }
705 Scan(); 737 Scan(next);
706 } 738 }
707 739
708 740
709 template <bool capture_raw, bool in_template_literal> 741 template <bool capture_raw, bool in_template_literal>
710 bool Scanner::ScanEscape() { 742 bool Scanner::ScanEscape(TokenDesc* next) {
711 uc32 c = c0_; 743 uc32 c = c0_;
712 Advance<capture_raw>(); 744 Advance<capture_raw>();
713 745
714 // Skip escaped newlines. 746 // Skip escaped newlines.
715 if (!in_template_literal && c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { 747 if (!in_template_literal && c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {
716 // Allow CR+LF newlines in multiline string literals. 748 // Allow CR+LF newlines in multiline string literals.
717 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>(); 749 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
718 // Allow LF+CR newlines in multiline string literals. 750 // Allow LF+CR newlines in multiline string literals.
719 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>(); 751 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>();
720 return true; 752 return true;
(...skipping 29 matching lines...) Expand all
750 case '5': // fall through 782 case '5': // fall through
751 case '6': // fall through 783 case '6': // fall through
752 case '7': 784 case '7':
753 c = ScanOctalEscape<capture_raw>(c, 2); 785 c = ScanOctalEscape<capture_raw>(c, 2);
754 break; 786 break;
755 } 787 }
756 788
757 // According to ECMA-262, section 7.8.4, characters not covered by the 789 // According to ECMA-262, section 7.8.4, characters not covered by the
758 // above cases should be illegal, but they are commonly handled as 790 // above cases should be illegal, but they are commonly handled as
759 // non-escaped characters by JS VMs. 791 // non-escaped characters by JS VMs.
760 AddLiteralChar(c); 792 AddLiteralChar(next, c);
761 return true; 793 return true;
762 } 794 }
763 795
764 796
765 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of 797 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
766 // ECMA-262. Other JS VMs support them. 798 // ECMA-262. Other JS VMs support them.
767 template <bool capture_raw> 799 template <bool capture_raw>
768 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { 800 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
769 uc32 x = c - '0'; 801 uc32 x = c - '0';
770 int i = 0; 802 int i = 0;
(...skipping 18 matching lines...) Expand all
789 821
790 822
791 const int kMaxAscii = 127; 823 const int kMaxAscii = 127;
792 824
793 825
794 Token::Value Scanner::ScanString() { 826 Token::Value Scanner::ScanString() {
795 uc32 quote = c0_; 827 uc32 quote = c0_;
796 Advance<false, false>(); // consume quote 828 Advance<false, false>(); // consume quote
797 829
798 LiteralScope literal(this); 830 LiteralScope literal(this);
831 TokenDesc* next = literal.next_;
799 while (true) { 832 while (true) {
800 if (c0_ > kMaxAscii) { 833 if (c0_ > kMaxAscii) {
801 HandleLeadSurrogate(); 834 HandleLeadSurrogate();
802 break; 835 break;
803 } 836 }
804 if (c0_ < 0 || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL; 837 if (c0_ < 0 || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL;
805 if (c0_ == quote) { 838 if (c0_ == quote) {
806 literal.Complete(); 839 literal.Complete();
807 Advance<false, false>(); 840 Advance<false, false>();
808 return Token::STRING; 841 return Token::STRING;
809 } 842 }
810 uc32 c = c0_; 843 uc32 c = c0_;
811 if (c == '\\') break; 844 if (c == '\\') break;
812 Advance<false, false>(); 845 Advance<false, false>();
813 AddLiteralChar(c); 846 AddLiteralChar(next, c);
814 } 847 }
815 848
816 while (c0_ != quote && c0_ >= 0 849 while (c0_ != quote && c0_ >= 0
817 && !unicode_cache_->IsLineTerminator(c0_)) { 850 && !unicode_cache_->IsLineTerminator(c0_)) {
818 uc32 c = c0_; 851 uc32 c = c0_;
819 Advance(); 852 Advance();
820 if (c == '\\') { 853 if (c == '\\') {
821 if (c0_ < 0 || !ScanEscape<false, false>()) return Token::ILLEGAL; 854 if (c0_ < 0 || !ScanEscape<false, false>(next)) return Token::ILLEGAL;
822 } else { 855 } else {
823 AddLiteralChar(c); 856 AddLiteralChar(next, c);
824 } 857 }
825 } 858 }
826 if (c0_ != quote) return Token::ILLEGAL; 859 if (c0_ != quote) return Token::ILLEGAL;
827 literal.Complete(); 860 literal.Complete();
828 861
829 Advance(); // consume quote 862 Advance(); // consume quote
830 return Token::STRING; 863 return Token::STRING;
831 } 864 }
832 865
833 866
834 Token::Value Scanner::ScanTemplateSpan() { 867 Token::Value Scanner::ScanTemplateSpan() {
835 // When scanning a TemplateSpan, we are looking for the following construct: 868 // When scanning a TemplateSpan, we are looking for the following construct:
836 // TEMPLATE_SPAN :: 869 // TEMPLATE_SPAN ::
837 // ` LiteralChars* ${ 870 // ` LiteralChars* ${
838 // | } LiteralChars* ${ 871 // | } LiteralChars* ${
839 // 872 //
840 // TEMPLATE_TAIL :: 873 // TEMPLATE_TAIL ::
841 // ` LiteralChars* ` 874 // ` LiteralChars* `
842 // | } LiteralChar* ` 875 // | } LiteralChar* `
843 // 876 //
844 // A TEMPLATE_SPAN should always be followed by an Expression, while a 877 // A TEMPLATE_SPAN should always be followed by an Expression, while a
845 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be 878 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be
846 // followed by an Expression. 879 // followed by an Expression.
847
848 Token::Value result = Token::TEMPLATE_SPAN; 880 Token::Value result = Token::TEMPLATE_SPAN;
849 LiteralScope literal(this); 881 LiteralScope literal(this);
882 TokenDesc* next = literal.next_;
850 StartRawLiteral(); 883 StartRawLiteral();
851 const bool capture_raw = true; 884 const bool capture_raw = true;
852 const bool in_template_literal = true; 885 const bool in_template_literal = true;
853 886
854 while (true) { 887 while (true) {
855 uc32 c = c0_; 888 uc32 c = c0_;
856 Advance<capture_raw>(); 889 Advance<capture_raw>();
857 if (c == '`') { 890 if (c == '`') {
858 result = Token::TEMPLATE_TAIL; 891 result = Token::TEMPLATE_TAIL;
859 ReduceRawLiteralLength(1); 892 ReduceRawLiteralLength(next, 1);
860 break; 893 break;
861 } else if (c == '$' && c0_ == '{') { 894 } else if (c == '$' && c0_ == '{') {
862 Advance<capture_raw>(); // Consume '{' 895 Advance<capture_raw>(); // Consume '{'
863 ReduceRawLiteralLength(2); 896 ReduceRawLiteralLength(next, 2);
864 break; 897 break;
865 } else if (c == '\\') { 898 } else if (c == '\\') {
866 if (c0_ > 0 && unicode_cache_->IsLineTerminator(c0_)) { 899 if (c0_ > 0 && unicode_cache_->IsLineTerminator(c0_)) {
867 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty 900 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
868 // code unit sequence. 901 // code unit sequence.
869 uc32 lastChar = c0_; 902 uc32 lastChar = c0_;
870 Advance<capture_raw>(); 903 Advance<capture_raw>();
871 if (lastChar == '\r') { 904 if (lastChar == '\r') {
872 ReduceRawLiteralLength(1); // Remove \r 905 ReduceRawLiteralLength(next, 1); // Remove \r
873 if (c0_ == '\n') { 906 if (c0_ == '\n') {
874 Advance<capture_raw>(); // Adds \n 907 Advance<capture_raw>(); // Adds \n
875 } else { 908 } else {
876 AddRawLiteralChar('\n'); 909 AddRawLiteralChar(next, '\n');
877 } 910 }
878 } 911 }
879 } else if (!ScanEscape<capture_raw, in_template_literal>()) { 912 } else if (!ScanEscape<capture_raw, in_template_literal>(next)) {
880 return Token::ILLEGAL; 913 return Token::ILLEGAL;
881 } 914 }
882 } else if (c < 0) { 915 } else if (c < 0) {
883 // Unterminated template literal 916 // Unterminated template literal
884 PushBack(c); 917 PushBack(c);
885 break; 918 break;
886 } else { 919 } else {
887 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A. 920 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
888 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence 921 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
889 // consisting of the CV 0x000A. 922 // consisting of the CV 0x000A.
890 if (c == '\r') { 923 if (c == '\r') {
891 ReduceRawLiteralLength(1); // Remove \r 924 ReduceRawLiteralLength(next, 1); // Remove \r
892 if (c0_ == '\n') { 925 if (c0_ == '\n') {
893 Advance<capture_raw>(); // Adds \n 926 Advance<capture_raw>(); // Adds \n
894 } else { 927 } else {
895 AddRawLiteralChar('\n'); 928 AddRawLiteralChar(next, '\n');
896 } 929 }
897 c = '\n'; 930 c = '\n';
898 } 931 }
899 AddLiteralChar(c); 932 AddLiteralChar(next, c);
900 } 933 }
901 } 934 }
902 literal.Complete(); 935 literal.Complete();
903 next_.location.end_pos = source_pos(); 936 next->location.end_pos = source_pos();
904 next_.token = result; 937 next->token = result;
905 return result; 938 return result;
906 } 939 }
907 940
908 941
909 Token::Value Scanner::ScanTemplateStart() { 942 Token::Value Scanner::ScanTemplateStart() {
910 DCHECK(c0_ == '`'); 943 DCHECK(c0_ == '`');
911 next_.location.beg_pos = source_pos(); 944 TokenDesc* next = PeekTokenDesc();
945 next->location.beg_pos = source_pos();
912 Advance(); // Consume ` 946 Advance(); // Consume `
913 return ScanTemplateSpan(); 947 return ScanTemplateSpan();
914 } 948 }
915 949
916 950
917 Token::Value Scanner::ScanTemplateContinuation() { 951 Token::Value Scanner::ScanTemplateContinuation() {
918 DCHECK_EQ(next_.token, Token::RBRACE); 952 TokenDesc* next = PeekTokenDesc();
919 next_.location.beg_pos = source_pos() - 1; // We already consumed } 953 DCHECK_EQ(next->token, Token::RBRACE);
954 next->location.beg_pos = source_pos() - 1; // We already consumed }
920 return ScanTemplateSpan(); 955 return ScanTemplateSpan();
921 } 956 }
922 957
923 958
924 void Scanner::ScanDecimalDigits() { 959 void Scanner::ScanDecimalDigits(TokenDesc* next) {
925 while (IsDecimalDigit(c0_)) 960 while (IsDecimalDigit(c0_)) AddLiteralCharAdvance(next);
926 AddLiteralCharAdvance();
927 } 961 }
928 962
929 963
930 Token::Value Scanner::ScanNumber(bool seen_period) { 964 Token::Value Scanner::ScanNumber(bool seen_period) {
931 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction 965 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
932 966
933 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; 967 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;
934 968
935 LiteralScope literal(this); 969 LiteralScope literal(this);
970 TokenDesc* next = literal.next_;
936 bool at_start = !seen_period; 971 bool at_start = !seen_period;
937 if (seen_period) { 972 if (seen_period) {
938 // we have already seen a decimal point of the float 973 // we have already seen a decimal point of the float
939 AddLiteralChar('.'); 974 AddLiteralChar(next, '.');
940 ScanDecimalDigits(); // we know we have at least one digit 975 ScanDecimalDigits(next); // we know we have at least one digit
941 976
942 } else { 977 } else {
943 // if the first character is '0' we must check for octals and hex 978 // if the first character is '0' we must check for octals and hex
944 if (c0_ == '0') { 979 if (c0_ == '0') {
945 int start_pos = source_pos(); // For reporting octal positions. 980 int start_pos = source_pos(); // For reporting octal positions.
946 AddLiteralCharAdvance(); 981 AddLiteralCharAdvance(next);
947 982
948 // either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or 983 // either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or
949 // an octal number. 984 // an octal number.
950 if (c0_ == 'x' || c0_ == 'X') { 985 if (c0_ == 'x' || c0_ == 'X') {
951 // hex number 986 // hex number
952 kind = HEX; 987 kind = HEX;
953 AddLiteralCharAdvance(); 988 AddLiteralCharAdvance(next);
954 if (!IsHexDigit(c0_)) { 989 if (!IsHexDigit(c0_)) {
955 // we must have at least one hex digit after 'x'/'X' 990 // we must have at least one hex digit after 'x'/'X'
956 return Token::ILLEGAL; 991 return Token::ILLEGAL;
957 } 992 }
958 while (IsHexDigit(c0_)) { 993 while (IsHexDigit(c0_)) {
959 AddLiteralCharAdvance(); 994 AddLiteralCharAdvance(next);
960 } 995 }
961 } else if (harmony_numeric_literals_ && (c0_ == 'o' || c0_ == 'O')) { 996 } else if (harmony_numeric_literals_ && (c0_ == 'o' || c0_ == 'O')) {
962 kind = OCTAL; 997 kind = OCTAL;
963 AddLiteralCharAdvance(); 998 AddLiteralCharAdvance(next);
964 if (!IsOctalDigit(c0_)) { 999 if (!IsOctalDigit(c0_)) {
965 // we must have at least one octal digit after 'o'/'O' 1000 // we must have at least one octal digit after 'o'/'O'
966 return Token::ILLEGAL; 1001 return Token::ILLEGAL;
967 } 1002 }
968 while (IsOctalDigit(c0_)) { 1003 while (IsOctalDigit(c0_)) {
969 AddLiteralCharAdvance(); 1004 AddLiteralCharAdvance(next);
970 } 1005 }
971 } else if (harmony_numeric_literals_ && (c0_ == 'b' || c0_ == 'B')) { 1006 } else if (harmony_numeric_literals_ && (c0_ == 'b' || c0_ == 'B')) {
972 kind = BINARY; 1007 kind = BINARY;
973 AddLiteralCharAdvance(); 1008 AddLiteralCharAdvance(next);
974 if (!IsBinaryDigit(c0_)) { 1009 if (!IsBinaryDigit(c0_)) {
975 // we must have at least one binary digit after 'b'/'B' 1010 // we must have at least one binary digit after 'b'/'B'
976 return Token::ILLEGAL; 1011 return Token::ILLEGAL;
977 } 1012 }
978 while (IsBinaryDigit(c0_)) { 1013 while (IsBinaryDigit(c0_)) {
979 AddLiteralCharAdvance(); 1014 AddLiteralCharAdvance(next);
980 } 1015 }
981 } else if ('0' <= c0_ && c0_ <= '7') { 1016 } else if ('0' <= c0_ && c0_ <= '7') {
982 // (possible) octal number 1017 // (possible) octal number
983 kind = IMPLICIT_OCTAL; 1018 kind = IMPLICIT_OCTAL;
984 while (true) { 1019 while (true) {
985 if (c0_ == '8' || c0_ == '9') { 1020 if (c0_ == '8' || c0_ == '9') {
986 at_start = false; 1021 at_start = false;
987 kind = DECIMAL; 1022 kind = DECIMAL;
988 break; 1023 break;
989 } 1024 }
990 if (c0_ < '0' || '7' < c0_) { 1025 if (c0_ < '0' || '7' < c0_) {
991 // Octal literal finished. 1026 // Octal literal finished.
992 octal_pos_ = Location(start_pos, source_pos()); 1027 octal_pos_ = Location(start_pos, source_pos());
993 break; 1028 break;
994 } 1029 }
995 AddLiteralCharAdvance(); 1030 AddLiteralCharAdvance(next);
996 } 1031 }
997 } 1032 }
998 } 1033 }
999 1034
1000 // Parse decimal digits and allow trailing fractional part. 1035 // Parse decimal digits and allow trailing fractional part.
1001 if (kind == DECIMAL) { 1036 if (kind == DECIMAL) {
1002 if (at_start) { 1037 if (at_start) {
1003 int value = 0; 1038 int value = 0;
1004 while (IsDecimalDigit(c0_)) { 1039 while (IsDecimalDigit(c0_)) {
1005 value = 10 * value + (c0_ - '0'); 1040 value = 10 * value + (c0_ - '0');
1006 1041
1007 uc32 first_char = c0_; 1042 uc32 first_char = c0_;
1008 Advance<false, false>(); 1043 Advance<false, false>();
1009 AddLiteralChar(first_char); 1044 AddLiteralChar(next, first_char);
1010 } 1045 }
1011 1046
1012 if (next_.literal_chars->one_byte_literal().length() < 10 && 1047 if (next->literal_chars->one_byte_literal().length() < 10 &&
1013 c0_ != '.' && c0_ != 'e' && c0_ != 'E') { 1048 c0_ != '.' && c0_ != 'e' && c0_ != 'E') {
1014 smi_value_ = value; 1049 smi_value_ = value;
1015 literal.Complete(); 1050 literal.Complete();
1016 HandleLeadSurrogate(); 1051 HandleLeadSurrogate();
1017 1052
1018 return Token::SMI; 1053 return Token::SMI;
1019 } 1054 }
1020 HandleLeadSurrogate(); 1055 HandleLeadSurrogate();
1021 } 1056 }
1022 1057
1023 ScanDecimalDigits(); // optional 1058 ScanDecimalDigits(next); // optional
1024 if (c0_ == '.') { 1059 if (c0_ == '.') {
1025 AddLiteralCharAdvance(); 1060 AddLiteralCharAdvance(next);
1026 ScanDecimalDigits(); // optional 1061 ScanDecimalDigits(next); // optional
1027 } 1062 }
1028 } 1063 }
1029 } 1064 }
1030 1065
1031 // scan exponent, if any 1066 // scan exponent, if any
1032 if (c0_ == 'e' || c0_ == 'E') { 1067 if (c0_ == 'e' || c0_ == 'E') {
1033 DCHECK(kind != HEX); // 'e'/'E' must be scanned as part of the hex number 1068 DCHECK(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
1034 if (kind != DECIMAL) return Token::ILLEGAL; 1069 if (kind != DECIMAL) return Token::ILLEGAL;
1035 // scan exponent 1070 // scan exponent
1036 AddLiteralCharAdvance(); 1071 AddLiteralCharAdvance(next);
1037 if (c0_ == '+' || c0_ == '-') 1072 if (c0_ == '+' || c0_ == '-') AddLiteralCharAdvance(next);
1038 AddLiteralCharAdvance();
1039 if (!IsDecimalDigit(c0_)) { 1073 if (!IsDecimalDigit(c0_)) {
1040 // we must have at least one decimal digit after 'e'/'E' 1074 // we must have at least one decimal digit after 'e'/'E'
1041 return Token::ILLEGAL; 1075 return Token::ILLEGAL;
1042 } 1076 }
1043 ScanDecimalDigits(); 1077 ScanDecimalDigits(next);
1044 } 1078 }
1045 1079
1046 // The source character immediately following a numeric literal must 1080 // The source character immediately following a numeric literal must
1047 // not be an identifier start or a decimal digit; see ECMA-262 1081 // not be an identifier start or a decimal digit; see ECMA-262
1048 // section 7.8.3, page 17 (note that we read only one decimal digit 1082 // section 7.8.3, page 17 (note that we read only one decimal digit
1049 // if the value is 0). 1083 // if the value is 0).
1050 if (IsDecimalDigit(c0_) || 1084 if (IsDecimalDigit(c0_) ||
1051 (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_))) 1085 (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_)))
1052 return Token::ILLEGAL; 1086 return Token::ILLEGAL;
1053 1087
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after
1211 } 1245 }
1212 return Token::FUTURE_STRICT_RESERVED_WORD == 1246 return Token::FUTURE_STRICT_RESERVED_WORD ==
1213 KeywordOrIdentifierToken(string->raw_data(), string->length(), 1247 KeywordOrIdentifierToken(string->raw_data(), string->length(),
1214 harmony_scoping_, harmony_modules_, 1248 harmony_scoping_, harmony_modules_,
1215 harmony_classes_); 1249 harmony_classes_);
1216 } 1250 }
1217 1251
1218 1252
1219 Token::Value Scanner::ScanIdentifierOrKeyword() { 1253 Token::Value Scanner::ScanIdentifierOrKeyword() {
1220 DCHECK(unicode_cache_->IsIdentifierStart(c0_)); 1254 DCHECK(unicode_cache_->IsIdentifierStart(c0_));
1255 TokenDesc* next = PeekTokenDesc();
1221 LiteralScope literal(this); 1256 LiteralScope literal(this);
1222 if (IsInRange(c0_, 'a', 'z')) { 1257 if (IsInRange(c0_, 'a', 'z')) {
1223 do { 1258 do {
1224 uc32 first_char = c0_; 1259 uc32 first_char = c0_;
1225 Advance<false, false>(); 1260 Advance<false, false>();
1226 AddLiteralChar(first_char); 1261 AddLiteralChar(next, first_char);
1227 } while (IsInRange(c0_, 'a', 'z')); 1262 } while (IsInRange(c0_, 'a', 'z'));
1228 1263
1229 if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '_' || 1264 if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '_' ||
1230 c0_ == '$') { 1265 c0_ == '$') {
1231 // Identifier starting with lowercase. 1266 // Identifier starting with lowercase.
1232 uc32 first_char = c0_; 1267 uc32 first_char = c0_;
1233 Advance<false, false>(); 1268 Advance<false, false>();
1234 AddLiteralChar(first_char); 1269 AddLiteralChar(next, first_char);
1235 while (IsAsciiIdentifier(c0_)) { 1270 while (IsAsciiIdentifier(c0_)) {
1236 uc32 first_char = c0_; 1271 uc32 first_char = c0_;
1237 Advance<false, false>(); 1272 Advance<false, false>();
1238 AddLiteralChar(first_char); 1273 AddLiteralChar(next, first_char);
1239 } 1274 }
1240 if (c0_ <= kMaxAscii && c0_ != '\\') { 1275 if (c0_ <= kMaxAscii && c0_ != '\\') {
1241 literal.Complete(); 1276 literal.Complete();
1242 return Token::IDENTIFIER; 1277 return Token::IDENTIFIER;
1243 } 1278 }
1244 } else if (c0_ <= kMaxAscii && c0_ != '\\') { 1279 } else if (c0_ <= kMaxAscii && c0_ != '\\') {
1245 // Only a-z+: could be a keyword or identifier. 1280 // Only a-z+: could be a keyword or identifier.
1246 literal.Complete(); 1281 literal.Complete();
1247 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); 1282 Vector<const uint8_t> chars = next->literal_chars->one_byte_literal();
1248 return KeywordOrIdentifierToken(chars.start(), chars.length(), 1283 return KeywordOrIdentifierToken(chars.start(), chars.length(),
1249 harmony_scoping_, harmony_modules_, 1284 harmony_scoping_, harmony_modules_,
1250 harmony_classes_); 1285 harmony_classes_);
1251 } 1286 }
1252 1287
1253 HandleLeadSurrogate(); 1288 HandleLeadSurrogate();
1254 } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '_' || c0_ == '$') { 1289 } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '_' || c0_ == '$') {
1255 do { 1290 do {
1256 uc32 first_char = c0_; 1291 uc32 first_char = c0_;
1257 Advance<false, false>(); 1292 Advance<false, false>();
1258 AddLiteralChar(first_char); 1293 AddLiteralChar(next, first_char);
1259 } while (IsAsciiIdentifier(c0_)); 1294 } while (IsAsciiIdentifier(c0_));
1260 1295
1261 if (c0_ <= kMaxAscii && c0_ != '\\') { 1296 if (c0_ <= kMaxAscii && c0_ != '\\') {
1262 literal.Complete(); 1297 literal.Complete();
1263 return Token::IDENTIFIER; 1298 return Token::IDENTIFIER;
1264 } 1299 }
1265 1300
1266 HandleLeadSurrogate(); 1301 HandleLeadSurrogate();
1267 } else if (c0_ == '\\') { 1302 } else if (c0_ == '\\') {
1268 // Scan identifier start character. 1303 // Scan identifier start character.
1269 uc32 c = ScanIdentifierUnicodeEscape(); 1304 uc32 c = ScanIdentifierUnicodeEscape();
1270 // Only allow legal identifier start characters. 1305 // Only allow legal identifier start characters.
1271 if (c < 0 || 1306 if (c < 0 ||
1272 c == '\\' || // No recursive escapes. 1307 c == '\\' || // No recursive escapes.
1273 !unicode_cache_->IsIdentifierStart(c)) { 1308 !unicode_cache_->IsIdentifierStart(c)) {
1274 return Token::ILLEGAL; 1309 return Token::ILLEGAL;
1275 } 1310 }
1276 AddLiteralChar(c); 1311 AddLiteralChar(next, c);
1277 return ScanIdentifierSuffix(&literal); 1312 return ScanIdentifierSuffix(&literal);
1278 } else { 1313 } else {
1279 uc32 first_char = c0_; 1314 uc32 first_char = c0_;
1280 Advance(); 1315 Advance();
1281 AddLiteralChar(first_char); 1316 AddLiteralChar(next, first_char);
1282 } 1317 }
1283 1318
1284 // Scan the rest of the identifier characters. 1319 // Scan the rest of the identifier characters.
1285 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { 1320 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {
1286 if (c0_ != '\\') { 1321 if (c0_ != '\\') {
1287 uc32 next_char = c0_; 1322 uc32 next_char = c0_;
1288 Advance(); 1323 Advance();
1289 AddLiteralChar(next_char); 1324 AddLiteralChar(next, next_char);
1290 continue; 1325 continue;
1291 } 1326 }
1292 // Fallthrough if no longer able to complete keyword. 1327 // Fallthrough if no longer able to complete keyword.
1293 return ScanIdentifierSuffix(&literal); 1328 return ScanIdentifierSuffix(&literal);
1294 } 1329 }
1295 1330
1296 literal.Complete(); 1331 literal.Complete();
1297 1332
1298 if (next_.literal_chars->is_one_byte()) { 1333 if (next->literal_chars->is_one_byte()) {
1299 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); 1334 Vector<const uint8_t> chars = next->literal_chars->one_byte_literal();
1300 return KeywordOrIdentifierToken(chars.start(), 1335 return KeywordOrIdentifierToken(chars.start(),
1301 chars.length(), 1336 chars.length(),
1302 harmony_scoping_, 1337 harmony_scoping_,
1303 harmony_modules_, 1338 harmony_modules_,
1304 harmony_classes_); 1339 harmony_classes_);
1305 } 1340 }
1306 return Token::IDENTIFIER; 1341 return Token::IDENTIFIER;
1307 } 1342 }
1308 1343
1309 1344
1310 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { 1345 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {
1311 // Scan the rest of the identifier characters. 1346 // Scan the rest of the identifier characters.
1347 TokenDesc* next = literal->next_;
1312 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { 1348 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {
1313 if (c0_ == '\\') { 1349 if (c0_ == '\\') {
1314 uc32 c = ScanIdentifierUnicodeEscape(); 1350 uc32 c = ScanIdentifierUnicodeEscape();
1315 // Only allow legal identifier part characters. 1351 // Only allow legal identifier part characters.
1316 if (c < 0 || 1352 if (c < 0 ||
1317 c == '\\' || 1353 c == '\\' ||
1318 !unicode_cache_->IsIdentifierPart(c)) { 1354 !unicode_cache_->IsIdentifierPart(c)) {
1319 return Token::ILLEGAL; 1355 return Token::ILLEGAL;
1320 } 1356 }
1321 AddLiteralChar(c); 1357 AddLiteralChar(next, c);
1322 } else { 1358 } else {
1323 AddLiteralChar(c0_); 1359 AddLiteralChar(next, c0_);
1324 Advance(); 1360 Advance();
1325 } 1361 }
1326 } 1362 }
1327 literal->Complete(); 1363 literal->Complete();
1328 1364
1329 return Token::IDENTIFIER; 1365 return Token::IDENTIFIER;
1330 } 1366 }
1331 1367
1332 1368
1333 bool Scanner::ScanRegExpPattern(bool seen_equal) { 1369 bool Scanner::ScanRegExpPattern(bool seen_equal) {
1334 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags 1370 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
1335 bool in_character_class = false; 1371 bool in_character_class = false;
1372 TokenDesc* next = PeekTokenDesc();
1336 1373
1337 // Previous token is either '/' or '/=', in the second case, the 1374 // Previous token is either '/' or '/=', in the second case, the
1338 // pattern starts at =. 1375 // pattern starts at =.
1339 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); 1376 next->location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
1340 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); 1377 next->location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1341 1378
1342 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 1379 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1343 // the scanner should pass uninterpreted bodies to the RegExp 1380 // the scanner should pass uninterpreted bodies to the RegExp
1344 // constructor. 1381 // constructor.
1345 LiteralScope literal(this); 1382 LiteralScope literal(this);
1346 if (seen_equal) { 1383 if (seen_equal) {
1347 AddLiteralChar('='); 1384 AddLiteralChar(next, '=');
1348 } 1385 }
1349 1386
1350 while (c0_ != '/' || in_character_class) { 1387 while (c0_ != '/' || in_character_class) {
1351 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; 1388 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false;
1352 if (c0_ == '\\') { // Escape sequence. 1389 if (c0_ == '\\') { // Escape sequence.
1353 AddLiteralCharAdvance(); 1390 AddLiteralCharAdvance(next);
1354 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; 1391 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false;
1355 AddLiteralCharAdvance(); 1392 AddLiteralCharAdvance(next);
1356 // If the escape allows more characters, i.e., \x??, \u????, or \c?, 1393 // If the escape allows more characters, i.e., \x??, \u????, or \c?,
1357 // only "safe" characters are allowed (letters, digits, underscore), 1394 // only "safe" characters are allowed (letters, digits, underscore),
1358 // otherwise the escape isn't valid and the invalid character has 1395 // otherwise the escape isn't valid and the invalid character has
1359 // its normal meaning. I.e., we can just continue scanning without 1396 // its normal meaning. I.e., we can just continue scanning without
1360 // worrying whether the following characters are part of the escape 1397 // worrying whether the following characters are part of the escape
1361 // or not, since any '/', '\\' or '[' is guaranteed to not be part 1398 // or not, since any '/', '\\' or '[' is guaranteed to not be part
1362 // of the escape sequence. 1399 // of the escape sequence.
1363 1400
1364 // TODO(896): At some point, parse RegExps more throughly to capture 1401 // TODO(896): At some point, parse RegExps more throughly to capture
1365 // octal esacpes in strict mode. 1402 // octal esacpes in strict mode.
1366 } else { // Unescaped character. 1403 } else { // Unescaped character.
1367 if (c0_ == '[') in_character_class = true; 1404 if (c0_ == '[') in_character_class = true;
1368 if (c0_ == ']') in_character_class = false; 1405 if (c0_ == ']') in_character_class = false;
1369 AddLiteralCharAdvance(); 1406 AddLiteralCharAdvance(next);
1370 } 1407 }
1371 } 1408 }
1372 Advance(); // consume '/' 1409 Advance(); // consume '/'
1373 1410
1374 literal.Complete(); 1411 literal.Complete();
1375 1412
1376 return true; 1413 return true;
1377 } 1414 }
1378 1415
1379 1416
1380 bool Scanner::ScanRegExpFlags() { 1417 bool Scanner::ScanRegExpFlags() {
1381 // Scan regular expression flags. 1418 // Scan regular expression flags.
1419 TokenDesc* next = PeekTokenDesc();
1382 LiteralScope literal(this); 1420 LiteralScope literal(this);
1383 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { 1421 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {
1384 if (c0_ != '\\') { 1422 if (c0_ != '\\') {
1385 AddLiteralCharAdvance(); 1423 AddLiteralCharAdvance(next);
1386 } else { 1424 } else {
1387 return false; 1425 return false;
1388 } 1426 }
1389 } 1427 }
1390 literal.Complete(); 1428 literal.Complete();
1391 1429
1392 next_.location.end_pos = source_pos() - 1; 1430 next->location.end_pos = source_pos() - 1;
1393 return true; 1431 return true;
1394 } 1432 }
1395 1433
1396 1434
1397 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) { 1435 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {
1398 if (is_literal_one_byte()) { 1436 if (is_literal_one_byte()) {
1399 return ast_value_factory->GetOneByteString(literal_one_byte_string()); 1437 return ast_value_factory->GetOneByteString(literal_one_byte_string());
1400 } 1438 }
1401 return ast_value_factory->GetTwoByteString(literal_two_byte_string()); 1439 return ast_value_factory->GetTwoByteString(literal_two_byte_string());
1402 } 1440 }
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after
1574 } 1612 }
1575 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); 1613 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
1576 } 1614 }
1577 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); 1615 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
1578 1616
1579 backing_store_.AddBlock(bytes); 1617 backing_store_.AddBlock(bytes);
1580 return backing_store_.EndSequence().start(); 1618 return backing_store_.EndSequence().start();
1581 } 1619 }
1582 1620
1583 } } // namespace v8::internal 1621 } } // namespace v8::internal
OLDNEW
« src/preparser.h ('K') | « src/scanner.h ('k') | src/typing.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698