Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/scanner-base.cc

Issue 7348008: Merge up to 8597 to experimental/gc from the bleeding edge. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/gc/
Patch Set: '' Created 9 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/scanner-base.h ('k') | src/scopeinfo.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
70 70
71 71
72 72
73 // ---------------------------------------------------------------------------- 73 // ----------------------------------------------------------------------------
74 // JavaScriptScanner 74 // JavaScriptScanner
75 75
76 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants) 76 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants)
77 : Scanner(scanner_contants), octal_pos_(Location::invalid()) { } 77 : Scanner(scanner_contants), octal_pos_(Location::invalid()) { }
78 78
79 79
80 void JavaScriptScanner::Initialize(UC16CharacterStream* source) {
81 source_ = source;
82 // Need to capture identifiers in order to recognize "get" and "set"
83 // in object literals.
84 Init();
85 // Skip initial whitespace allowing HTML comment ends just like
86 // after a newline and scan first token.
87 has_line_terminator_before_next_ = true;
88 SkipWhiteSpace();
89 Scan();
90 }
91
80 Token::Value JavaScriptScanner::Next() { 92 Token::Value JavaScriptScanner::Next() {
81 current_ = next_; 93 current_ = next_;
82 has_line_terminator_before_next_ = false; 94 has_line_terminator_before_next_ = false;
95 has_multiline_comment_before_next_ = false;
83 Scan(); 96 Scan();
84 return current_.token; 97 return current_.token;
85 } 98 }
86 99
87 100
88 static inline bool IsByteOrderMark(uc32 c) { 101 static inline bool IsByteOrderMark(uc32 c) {
89 // The Unicode value U+FFFE is guaranteed never to be assigned as a 102 // The Unicode value U+FFFE is guaranteed never to be assigned as a
90 // Unicode character; this implies that in a Unicode context the 103 // Unicode character; this implies that in a Unicode context the
91 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 104 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
92 // character expressed in little-endian byte order (since it could 105 // character expressed in little-endian byte order (since it could
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
137 } 150 }
138 151
139 152
140 Token::Value JavaScriptScanner::SkipSingleLineComment() { 153 Token::Value JavaScriptScanner::SkipSingleLineComment() {
141 Advance(); 154 Advance();
142 155
143 // The line terminator at the end of the line is not considered 156 // The line terminator at the end of the line is not considered
144 // to be part of the single-line comment; it is recognized 157 // to be part of the single-line comment; it is recognized
145 // separately by the lexical grammar and becomes part of the 158 // separately by the lexical grammar and becomes part of the
146 // stream of input elements for the syntactic grammar (see 159 // stream of input elements for the syntactic grammar (see
147 // ECMA-262, section 7.4, page 12). 160 // ECMA-262, section 7.4).
148 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { 161 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) {
149 Advance(); 162 Advance();
150 } 163 }
151 164
152 return Token::WHITESPACE; 165 return Token::WHITESPACE;
153 } 166 }
154 167
155 168
156 Token::Value JavaScriptScanner::SkipMultiLineComment() { 169 Token::Value JavaScriptScanner::SkipMultiLineComment() {
157 ASSERT(c0_ == '*'); 170 ASSERT(c0_ == '*');
158 Advance(); 171 Advance();
159 172
160 while (c0_ >= 0) { 173 while (c0_ >= 0) {
161 char ch = c0_; 174 char ch = c0_;
162 Advance(); 175 Advance();
176 if (unicode_cache_->IsLineTerminator(ch)) {
177 // Following ECMA-262, section 7.4, a comment containing
178 // a newline will make the comment count as a line-terminator.
179 has_multiline_comment_before_next_ = true;
180 }
163 // If we have reached the end of the multi-line comment, we 181 // If we have reached the end of the multi-line comment, we
164 // consume the '/' and insert a whitespace. This way all 182 // consume the '/' and insert a whitespace. This way all
165 // multi-line comments are treated as whitespace - even the ones 183 // multi-line comments are treated as whitespace.
166 // containing line terminators. This contradicts ECMA-262, section
167 // 7.4, page 12, that says that multi-line comments containing
168 // line terminators should be treated as a line terminator, but it
169 // matches the behaviour of SpiderMonkey and KJS.
170 if (ch == '*' && c0_ == '/') { 184 if (ch == '*' && c0_ == '/') {
171 c0_ = ' '; 185 c0_ = ' ';
172 return Token::WHITESPACE; 186 return Token::WHITESPACE;
173 } 187 }
174 } 188 }
175 189
176 // Unterminated multi-line comment. 190 // Unterminated multi-line comment.
177 return Token::ILLEGAL; 191 return Token::ILLEGAL;
178 } 192 }
179 193
(...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after
441 ASSERT_EQ(next_.location.end_pos, current_pos); 455 ASSERT_EQ(next_.location.end_pos, current_pos);
442 // Positions inside the lookahead token aren't supported. 456 // Positions inside the lookahead token aren't supported.
443 ASSERT(pos >= current_pos); 457 ASSERT(pos >= current_pos);
444 if (pos != current_pos) { 458 if (pos != current_pos) {
445 source_->SeekForward(pos - source_->pos()); 459 source_->SeekForward(pos - source_->pos());
446 Advance(); 460 Advance();
447 // This function is only called to seek to the location 461 // This function is only called to seek to the location
448 // of the end of a function (at the "}" token). It doesn't matter 462 // of the end of a function (at the "}" token). It doesn't matter
449 // whether there was a line terminator in the part we skip. 463 // whether there was a line terminator in the part we skip.
450 has_line_terminator_before_next_ = false; 464 has_line_terminator_before_next_ = false;
465 has_multiline_comment_before_next_ = false;
451 } 466 }
452 Scan(); 467 Scan();
453 } 468 }
454 469
455 470
456 void JavaScriptScanner::ScanEscape() { 471 void JavaScriptScanner::ScanEscape() {
457 uc32 c = c0_; 472 uc32 c = c0_;
458 Advance(); 473 Advance();
459 474
460 // Skip escaped newlines. 475 // Skip escaped newlines.
(...skipping 316 matching lines...) Expand 10 before | Expand all | Expand 10 after
777 { "break", KEYWORD_PREFIX, Token::BREAK }, 792 { "break", KEYWORD_PREFIX, Token::BREAK },
778 { NULL, C, Token::ILLEGAL }, 793 { NULL, C, Token::ILLEGAL },
779 { NULL, D, Token::ILLEGAL }, 794 { NULL, D, Token::ILLEGAL },
780 { NULL, E, Token::ILLEGAL }, 795 { NULL, E, Token::ILLEGAL },
781 { NULL, F, Token::ILLEGAL }, 796 { NULL, F, Token::ILLEGAL },
782 { NULL, UNMATCHABLE, Token::ILLEGAL }, 797 { NULL, UNMATCHABLE, Token::ILLEGAL },
783 { NULL, UNMATCHABLE, Token::ILLEGAL }, 798 { NULL, UNMATCHABLE, Token::ILLEGAL },
784 { NULL, I, Token::ILLEGAL }, 799 { NULL, I, Token::ILLEGAL },
785 { NULL, UNMATCHABLE, Token::ILLEGAL }, 800 { NULL, UNMATCHABLE, Token::ILLEGAL },
786 { NULL, UNMATCHABLE, Token::ILLEGAL }, 801 { NULL, UNMATCHABLE, Token::ILLEGAL },
787 { "let", KEYWORD_PREFIX, Token::FUTURE_RESERVED_WORD }, 802 { "let", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD },
788 { NULL, UNMATCHABLE, Token::ILLEGAL }, 803 { NULL, UNMATCHABLE, Token::ILLEGAL },
789 { NULL, N, Token::ILLEGAL }, 804 { NULL, N, Token::ILLEGAL },
790 { NULL, UNMATCHABLE, Token::ILLEGAL }, 805 { NULL, UNMATCHABLE, Token::ILLEGAL },
791 { NULL, P, Token::ILLEGAL }, 806 { NULL, P, Token::ILLEGAL },
792 { NULL, UNMATCHABLE, Token::ILLEGAL }, 807 { NULL, UNMATCHABLE, Token::ILLEGAL },
793 { "return", KEYWORD_PREFIX, Token::RETURN }, 808 { "return", KEYWORD_PREFIX, Token::RETURN },
794 { NULL, S, Token::ILLEGAL }, 809 { NULL, S, Token::ILLEGAL },
795 { NULL, T, Token::ILLEGAL }, 810 { NULL, T, Token::ILLEGAL },
796 { NULL, UNMATCHABLE, Token::ILLEGAL }, 811 { NULL, UNMATCHABLE, Token::ILLEGAL },
797 { NULL, V, Token::ILLEGAL }, 812 { NULL, V, Token::ILLEGAL },
798 { NULL, W, Token::ILLEGAL }, 813 { NULL, W, Token::ILLEGAL },
799 { NULL, UNMATCHABLE, Token::ILLEGAL }, 814 { NULL, UNMATCHABLE, Token::ILLEGAL },
800 { "yield", KEYWORD_PREFIX, Token::FUTURE_RESERVED_WORD } 815 { "yield", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD }
801 }; 816 };
802 817
803 818
804 void KeywordMatcher::Step(unibrow::uchar input) { 819 void KeywordMatcher::Step(unibrow::uchar input) {
805 switch (state_) { 820 switch (state_) {
806 case INITIAL: { 821 case INITIAL: {
807 // matching the first character is the only state with significant fanout. 822 // matching the first character is the only state with significant fanout.
808 // Match only lower-case letters in range 'b'..'y'. 823 // Match only lower-case letters in range 'b'..'y'.
809 unsigned int offset = input - kFirstCharRangeMin; 824 unsigned int offset = input - kFirstCharRangeMin;
810 if (offset < kFirstCharRangeLength) { 825 if (offset < kFirstCharRangeLength) {
(...skipping 16 matching lines...) Expand all
827 } 842 }
828 return; 843 return;
829 } 844 }
830 break; 845 break;
831 case KEYWORD_MATCHED: 846 case KEYWORD_MATCHED:
832 token_ = Token::IDENTIFIER; 847 token_ = Token::IDENTIFIER;
833 break; 848 break;
834 case C: 849 case C:
835 if (MatchState(input, 'a', CA)) return; 850 if (MatchState(input, 'a', CA)) return;
836 if (MatchKeywordStart(input, "class", 1, 851 if (MatchKeywordStart(input, "class", 1,
837 Token::FUTURE_RESERVED_WORD)) return; 852 Token::FUTURE_RESERVED_WORD)) return;
838 if (MatchState(input, 'o', CO)) return; 853 if (MatchState(input, 'o', CO)) return;
839 break; 854 break;
840 case CA: 855 case CA:
841 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return; 856 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return;
842 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return; 857 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return;
843 break; 858 break;
844 case CO: 859 case CO:
845 if (MatchState(input, 'n', CON)) return; 860 if (MatchState(input, 'n', CON)) return;
846 break; 861 break;
847 case CON: 862 case CON:
848 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return; 863 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return;
849 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return; 864 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return;
850 break; 865 break;
851 case D: 866 case D:
852 if (MatchState(input, 'e', DE)) return; 867 if (MatchState(input, 'e', DE)) return;
853 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return; 868 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return;
854 break; 869 break;
855 case DE: 870 case DE:
856 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return; 871 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return;
857 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return; 872 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return;
858 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return; 873 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return;
859 break; 874 break;
860 case E: 875 case E:
861 if (MatchKeywordStart(input, "else", 1, Token::ELSE)) return; 876 if (MatchKeywordStart(input, "else", 1, Token::ELSE)) return;
862 if (MatchKeywordStart(input, "enum", 1, 877 if (MatchKeywordStart(input, "enum", 1,
863 Token::FUTURE_RESERVED_WORD)) return; 878 Token::FUTURE_RESERVED_WORD)) return;
864 if (MatchState(input, 'x', EX)) return; 879 if (MatchState(input, 'x', EX)) return;
865 break; 880 break;
866 case EX: 881 case EX:
867 if (MatchKeywordStart(input, "export", 2, 882 if (MatchKeywordStart(input, "export", 2,
868 Token::FUTURE_RESERVED_WORD)) return; 883 Token::FUTURE_RESERVED_WORD)) return;
869 if (MatchKeywordStart(input, "extends", 2, 884 if (MatchKeywordStart(input, "extends", 2,
870 Token::FUTURE_RESERVED_WORD)) return; 885 Token::FUTURE_RESERVED_WORD)) return;
871 break; 886 break;
872 case F: 887 case F:
873 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return; 888 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return;
874 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; 889 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;
875 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; 890 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;
876 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; 891 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;
877 break; 892 break;
878 case I: 893 case I:
879 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; 894 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;
880 if (MatchState(input, 'm', IM)) return; 895 if (MatchState(input, 'm', IM)) return;
881 if (MatchKeyword(input, 'n', IN, Token::IN)) return; 896 if (MatchKeyword(input, 'n', IN, Token::IN)) return;
882 break; 897 break;
883 case IM: 898 case IM:
884 if (MatchState(input, 'p', IMP)) return; 899 if (MatchState(input, 'p', IMP)) return;
885 break; 900 break;
886 case IMP: 901 case IMP:
887 if (MatchKeywordStart(input, "implements", 3, 902 if (MatchKeywordStart(input, "implements", 3,
888 Token::FUTURE_RESERVED_WORD )) return; 903 Token::FUTURE_STRICT_RESERVED_WORD )) return;
889 if (MatchKeywordStart(input, "import", 3, 904 if (MatchKeywordStart(input, "import", 3,
890 Token::FUTURE_RESERVED_WORD)) return; 905 Token::FUTURE_RESERVED_WORD)) return;
891 break; 906 break;
892 case IN: 907 case IN:
893 token_ = Token::IDENTIFIER; 908 token_ = Token::IDENTIFIER;
894 if (MatchKeywordStart(input, "interface", 2, 909 if (MatchKeywordStart(input, "interface", 2,
895 Token::FUTURE_RESERVED_WORD)) return; 910 Token::FUTURE_STRICT_RESERVED_WORD)) return;
896 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) return; 911 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) return;
897 break; 912 break;
898 case N: 913 case N:
899 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;
900 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; 914 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;
901 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; 915 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;
902 break; 916 break;
903 case P: 917 case P:
904 if (MatchKeywordStart(input, "package", 1, 918 if (MatchKeywordStart(input, "package", 1,
905 Token::FUTURE_RESERVED_WORD)) return; 919 Token::FUTURE_STRICT_RESERVED_WORD)) return;
906 if (MatchState(input, 'r', PR)) return; 920 if (MatchState(input, 'r', PR)) return;
907 if (MatchKeywordStart(input, "public", 1, 921 if (MatchKeywordStart(input, "public", 1,
908 Token::FUTURE_RESERVED_WORD)) return; 922 Token::FUTURE_STRICT_RESERVED_WORD)) return;
909 break; 923 break;
910 case PR: 924 case PR:
911 if (MatchKeywordStart(input, "private", 2, 925 if (MatchKeywordStart(input, "private", 2,
912 Token::FUTURE_RESERVED_WORD)) return; 926 Token::FUTURE_STRICT_RESERVED_WORD)) return;
913 if (MatchKeywordStart(input, "protected", 2, 927 if (MatchKeywordStart(input, "protected", 2,
914 Token::FUTURE_RESERVED_WORD)) return; 928 Token::FUTURE_STRICT_RESERVED_WORD)) return;
915 break; 929 break;
916 case S: 930 case S:
917 if (MatchKeywordStart(input, "static", 1, 931 if (MatchKeywordStart(input, "static", 1,
918 Token::FUTURE_RESERVED_WORD)) return; 932 Token::FUTURE_STRICT_RESERVED_WORD)) return;
919 if (MatchKeywordStart(input, "super", 1, 933 if (MatchKeywordStart(input, "super", 1,
920 Token::FUTURE_RESERVED_WORD)) return; 934 Token::FUTURE_RESERVED_WORD)) return;
921 if (MatchKeywordStart(input, "switch", 1, 935 if (MatchKeywordStart(input, "switch", 1,
922 Token::SWITCH)) return; 936 Token::SWITCH)) return;
923 break; 937 break;
924 case T: 938 case T:
925 if (MatchState(input, 'h', TH)) return; 939 if (MatchState(input, 'h', TH)) return;
926 if (MatchState(input, 'r', TR)) return; 940 if (MatchState(input, 'r', TR)) return;
927 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; 941 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;
928 break; 942 break;
929 case TH: 943 case TH:
930 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return; 944 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return;
931 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return; 945 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return;
932 break; 946 break;
(...skipping 10 matching lines...) Expand all
943 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; 957 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;
944 break; 958 break;
945 case UNMATCHABLE: 959 case UNMATCHABLE:
946 break; 960 break;
947 } 961 }
948 // On fallthrough, it's a failure. 962 // On fallthrough, it's a failure.
949 state_ = UNMATCHABLE; 963 state_ = UNMATCHABLE;
950 } 964 }
951 965
952 } } // namespace v8::internal 966 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner-base.h ('k') | src/scopeinfo.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698