Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(144)

Side by Side Diff: src/scanner.cc

Issue 360048: Changed keyword token recognition to be done inline in the identifier scanner. (Closed)
Patch Set: Addressed review comments. Created 11 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/scanner.h ('k') | test/cctest/SConscript » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after
187 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); 187 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
188 } 188 }
189 189
190 190
191 void TwoByteStringUTF16Buffer::SeekForward(int pos) { 191 void TwoByteStringUTF16Buffer::SeekForward(int pos) {
192 pos_ = pos; 192 pos_ = pos;
193 } 193 }
194 194
195 195
196 // ---------------------------------------------------------------------------- 196 // ----------------------------------------------------------------------------
197 // Keyword Matcher
198 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {
199 { "break", KEYWORD_PREFIX, Token::BREAK },
200 { NULL, C, Token::ILLEGAL },
201 { NULL, D, Token::ILLEGAL },
202 { "else", KEYWORD_PREFIX, Token::ELSE },
203 { NULL, F, Token::ILLEGAL },
204 { NULL, UNMATCHABLE, Token::ILLEGAL },
205 { NULL, UNMATCHABLE, Token::ILLEGAL },
206 { NULL, I, Token::ILLEGAL },
207 { NULL, UNMATCHABLE, Token::ILLEGAL },
208 { NULL, UNMATCHABLE, Token::ILLEGAL },
209 { NULL, UNMATCHABLE, Token::ILLEGAL },
210 { NULL, UNMATCHABLE, Token::ILLEGAL },
211 { NULL, N, Token::ILLEGAL },
212 { NULL, UNMATCHABLE, Token::ILLEGAL },
213 { NULL, UNMATCHABLE, Token::ILLEGAL },
214 { NULL, UNMATCHABLE, Token::ILLEGAL },
215 { "return", KEYWORD_PREFIX, Token::RETURN },
216 { "switch", KEYWORD_PREFIX, Token::SWITCH },
217 { NULL, T, Token::ILLEGAL },
218 { NULL, UNMATCHABLE, Token::ILLEGAL },
219 { NULL, V, Token::ILLEGAL },
220 { NULL, W, Token::ILLEGAL }
221 };
222
223
224 void KeywordMatcher::Step(uc32 input) {
225 switch (state_) {
226 case INITIAL: {
227 // matching the first character is the only state with significant fanout.
228 // Match only lower-case letters in range 'b'..'w'.
229 unsigned int offset = input - kFirstCharRangeMin;
230 if (offset < kFirstCharRangeLength) {
231 state_ = first_states_[offset].state;
232 if (state_ == KEYWORD_PREFIX) {
233 keyword_ = first_states_[offset].keyword;
234 counter_ = 1;
235 keyword_token_ = first_states_[offset].token;
236 }
237 return;
238 }
239 break;
240 }
241 case KEYWORD_PREFIX:
242 if (keyword_[counter_] == input) {
243 ASSERT_NE(input, '\0');
244 counter_++;
245 if (keyword_[counter_] == '\0') {
246 state_ = KEYWORD_MATCHED;
247 token_ = keyword_token_;
248 }
249 return;
250 }
251 break;
252 case KEYWORD_MATCHED:
253 token_ = Token::IDENTIFIER;
254 break;
255 case C:
256 if (MatchState(input, 'a', CA)) return;
257 if (MatchState(input, 'o', CO)) return;
258 break;
259 case CA:
260 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return;
261 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return;
262 break;
263 case CO:
264 if (MatchState(input, 'n', CON)) return;
265 break;
266 case CON:
267 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return;
268 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return;
269 break;
270 case D:
271 if (MatchState(input, 'e', DE)) return;
272 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return;
273 break;
274 case DE:
275 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return;
276 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return;
277 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return;
278 break;
279 case F:
280 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return;
281 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;
282 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;
283 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;
284 break;
285 case I:
286 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;
287 if (MatchKeyword(input, 'n', IN, Token::IN)) return;
288 break;
289 case IN:
290 token_ = Token::IDENTIFIER;
291 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) {
292 return;
293 }
294 break;
295 case N:
296 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;
297 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;
298 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;
299 break;
300 case T:
301 if (MatchState(input, 'h', TH)) return;
302 if (MatchState(input, 'r', TR)) return;
303 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;
304 break;
305 case TH:
306 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return;
307 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return;
308 break;
309 case TR:
310 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return;
311 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return;
312 break;
313 case V:
314 if (MatchKeywordStart(input, "var", 1, Token::VAR)) return;
315 if (MatchKeywordStart(input, "void", 1, Token::VOID)) return;
316 break;
317 case W:
318 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return;
319 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;
320 break;
321 default:
322 UNREACHABLE();
323 }
324 // On fallthrough, it's a failure.
325 state_ = UNMATCHABLE;
326 }
327
328
329 // ----------------------------------------------------------------------------
197 // Scanner 330 // Scanner
198 331
199 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { 332 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) {
200 Token::Initialize(); 333 Token::Initialize();
201 } 334 }
202 335
203 336
204 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, 337 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
205 int position) { 338 int position) {
206 // Initialize the source buffer. 339 // Initialize the source buffer.
(...skipping 641 matching lines...) Expand 10 before | Expand all | Expand 10 after
848 uc32 c = ScanHexEscape('u', 4); 981 uc32 c = ScanHexEscape('u', 4);
849 // We do not allow a unicode escape sequence to start another 982 // We do not allow a unicode escape sequence to start another
850 // unicode escape sequence. 983 // unicode escape sequence.
851 if (c == '\\') return unibrow::Utf8::kBadChar; 984 if (c == '\\') return unibrow::Utf8::kBadChar;
852 return c; 985 return c;
853 } 986 }
854 987
855 988
856 Token::Value Scanner::ScanIdentifier() { 989 Token::Value Scanner::ScanIdentifier() {
857 ASSERT(kIsIdentifierStart.get(c0_)); 990 ASSERT(kIsIdentifierStart.get(c0_));
858 bool has_escapes = false;
859 991
860 StartLiteral(); 992 StartLiteral();
993 KeywordMatcher keyword_match;
994
861 // Scan identifier start character. 995 // Scan identifier start character.
862 if (c0_ == '\\') { 996 if (c0_ == '\\') {
863 has_escapes = true;
864 uc32 c = ScanIdentifierUnicodeEscape(); 997 uc32 c = ScanIdentifierUnicodeEscape();
865 // Only allow legal identifier start characters. 998 // Only allow legal identifier start characters.
866 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; 999 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;
867 AddChar(c); 1000 AddChar(c);
1001 keyword_match.Fail();
868 } else { 1002 } else {
869 AddChar(c0_); 1003 AddChar(c0_);
1004 keyword_match.AddChar(c0_);
870 Advance(); 1005 Advance();
871 } 1006 }
872 1007
873 // Scan the rest of the identifier characters. 1008 // Scan the rest of the identifier characters.
874 while (kIsIdentifierPart.get(c0_)) { 1009 while (kIsIdentifierPart.get(c0_)) {
875 if (c0_ == '\\') { 1010 if (c0_ == '\\') {
876 has_escapes = true;
877 uc32 c = ScanIdentifierUnicodeEscape(); 1011 uc32 c = ScanIdentifierUnicodeEscape();
878 // Only allow legal identifier part characters. 1012 // Only allow legal identifier part characters.
879 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; 1013 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;
880 AddChar(c); 1014 AddChar(c);
1015 keyword_match.Fail();
881 } else { 1016 } else {
882 AddChar(c0_); 1017 AddChar(c0_);
1018 keyword_match.AddChar(c0_);
883 Advance(); 1019 Advance();
884 } 1020 }
885 } 1021 }
886 TerminateLiteral(); 1022 TerminateLiteral();
887 1023
888 // We don't have any 1-letter keywords (this is probably a common case). 1024 return keyword_match.token();
889 if ((next_.literal_end - next_.literal_pos) == 1) {
890 return Token::IDENTIFIER;
891 }
892
893 // If the identifier contains unicode escapes, it must not be
894 // resolved to a keyword.
895 if (has_escapes) {
896 return Token::IDENTIFIER;
897 }
898
899 return Token::Lookup(&literals_.data()[next_.literal_pos]);
900 } 1025 }
901 1026
902 1027
903 1028
904 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { 1029 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {
905 // Checks whether the buffer contains an identifier (no escape). 1030 // Checks whether the buffer contains an identifier (no escape).
906 if (!buffer->has_more()) return false; 1031 if (!buffer->has_more()) return false;
907 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; 1032 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;
908 while (buffer->has_more()) { 1033 while (buffer->has_more()) {
909 if (!kIsIdentifierPart.get(buffer->GetNext())) return false; 1034 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
966 } 1091 }
967 AddCharAdvance(); 1092 AddCharAdvance();
968 } 1093 }
969 TerminateLiteral(); 1094 TerminateLiteral();
970 1095
971 next_.location.end_pos = source_pos() - 1; 1096 next_.location.end_pos = source_pos() - 1;
972 return true; 1097 return true;
973 } 1098 }
974 1099
975 } } // namespace v8::internal 1100 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner.h ('k') | test/cctest/SConscript » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698