src/scanner.cc - Issue 360048: Changed keyword token recognition to be done inline in the identifier scanner.

Side by Side Diff: src/scanner.cc

Issue 360048: Changed keyword token recognition to be done inline in the identifier scanner. (Closed)

Patch Set: Addressed review comments. Created 11 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 176 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
187 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);	187 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);

188 }	188 }

189	189

190	190

191 void TwoByteStringUTF16Buffer::SeekForward(int pos) {	191 void TwoByteStringUTF16Buffer::SeekForward(int pos) {

192 pos_ = pos;	192 pos_ = pos;

193 }	193 }

194	194

195	195

196 // ----------------------------------------------------------------------------	196 // ----------------------------------------------------------------------------

	197 // Keyword Matcher

	198 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {

	199 { "break", KEYWORD_PREFIX, Token::BREAK },

	200 { NULL, C, Token::ILLEGAL },

	201 { NULL, D, Token::ILLEGAL },

	202 { "else", KEYWORD_PREFIX, Token::ELSE },

	203 { NULL, F, Token::ILLEGAL },

	204 { NULL, UNMATCHABLE, Token::ILLEGAL },

	205 { NULL, UNMATCHABLE, Token::ILLEGAL },

	206 { NULL, I, Token::ILLEGAL },

	207 { NULL, UNMATCHABLE, Token::ILLEGAL },

	208 { NULL, UNMATCHABLE, Token::ILLEGAL },

	209 { NULL, UNMATCHABLE, Token::ILLEGAL },

	210 { NULL, UNMATCHABLE, Token::ILLEGAL },

	211 { NULL, N, Token::ILLEGAL },

	212 { NULL, UNMATCHABLE, Token::ILLEGAL },

	213 { NULL, UNMATCHABLE, Token::ILLEGAL },

	214 { NULL, UNMATCHABLE, Token::ILLEGAL },

	215 { "return", KEYWORD_PREFIX, Token::RETURN },

	216 { "switch", KEYWORD_PREFIX, Token::SWITCH },

	217 { NULL, T, Token::ILLEGAL },

	218 { NULL, UNMATCHABLE, Token::ILLEGAL },

	219 { NULL, V, Token::ILLEGAL },

	220 { NULL, W, Token::ILLEGAL }

	221 };

	222

	223

	224 void KeywordMatcher::Step(uc32 input) {

	225 switch (state_) {

	226 case INITIAL: {

	227 // matching the first character is the only state with significant fanout.

	228 // Match only lower-case letters in range 'b'..'w'.

	229 unsigned int offset = input - kFirstCharRangeMin;

	230 if (offset < kFirstCharRangeLength) {

	231 state_ = first_states_[offset].state;

	232 if (state_ == KEYWORD_PREFIX) {

	233 keyword_ = first_states_[offset].keyword;

	234 counter_ = 1;

	235 keyword_token_ = first_states_[offset].token;

	236 }

	237 return;

	238 }

	239 break;

	240 }

	241 case KEYWORD_PREFIX:

	242 if (keyword_[counter_] == input) {

	243 ASSERT_NE(input, '\0');

	244 counter_++;

	245 if (keyword_[counter_] == '\0') {

	246 state_ = KEYWORD_MATCHED;

	247 token_ = keyword_token_;

	248 }

	249 return;

	250 }

	251 break;

	252 case KEYWORD_MATCHED:

	253 token_ = Token::IDENTIFIER;

	254 break;

	255 case C:

	256 if (MatchState(input, 'a', CA)) return;

	257 if (MatchState(input, 'o', CO)) return;

	258 break;

	259 case CA:

	260 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return;

	261 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return;

	262 break;

	263 case CO:

	264 if (MatchState(input, 'n', CON)) return;

	265 break;

	266 case CON:

	267 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return;

	268 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return;

	269 break;

	270 case D:

	271 if (MatchState(input, 'e', DE)) return;

	272 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return;

	273 break;

	274 case DE:

	275 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return;

	276 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return;

	277 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return;

	278 break;

	279 case F:

	280 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return;

	281 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;

	282 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;

	283 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;

	284 break;

	285 case I:

	286 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;

	287 if (MatchKeyword(input, 'n', IN, Token::IN)) return;

	288 break;

	289 case IN:

	290 token_ = Token::IDENTIFIER;

	291 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) {

	292 return;

	293 }

	294 break;

	295 case N:

	296 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;

	297 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;

	298 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;

	299 break;

	300 case T:

	301 if (MatchState(input, 'h', TH)) return;

	302 if (MatchState(input, 'r', TR)) return;

	303 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;

	304 break;

	305 case TH:

	306 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return;

	307 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return;

	308 break;

	309 case TR:

	310 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return;

	311 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return;

	312 break;

	313 case V:

	314 if (MatchKeywordStart(input, "var", 1, Token::VAR)) return;

	315 if (MatchKeywordStart(input, "void", 1, Token::VOID)) return;

	316 break;

	317 case W:

	318 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return;

	319 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;

	320 break;

	321 default:

	322 UNREACHABLE();

	323 }

	324 // On fallthrough, it's a failure.

	325 state_ = UNMATCHABLE;

	326 }

	327

	328

	329 // ----------------------------------------------------------------------------

197 // Scanner	330 // Scanner

198	331

199 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) {	332 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) {

200 Token::Initialize();	333 Token::Initialize();

201 }	334 }

202	335

203	336

204 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,	337 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,

205 int position) {	338 int position) {

206 // Initialize the source buffer.	339 // Initialize the source buffer.

(...skipping 641 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
848 uc32 c = ScanHexEscape('u', 4);	981 uc32 c = ScanHexEscape('u', 4);

849 // We do not allow a unicode escape sequence to start another	982 // We do not allow a unicode escape sequence to start another

850 // unicode escape sequence.	983 // unicode escape sequence.

851 if (c == '\\') return unibrow::Utf8::kBadChar;	984 if (c == '\\') return unibrow::Utf8::kBadChar;

852 return c;	985 return c;

853 }	986 }

854	987

855	988

856 Token::Value Scanner::ScanIdentifier() {	989 Token::Value Scanner::ScanIdentifier() {

857 ASSERT(kIsIdentifierStart.get(c0_));	990 ASSERT(kIsIdentifierStart.get(c0_));

858 bool has_escapes = false;

859	991

860 StartLiteral();	992 StartLiteral();

	993 KeywordMatcher keyword_match;

	994

861 // Scan identifier start character.	995 // Scan identifier start character.

862 if (c0_ == '\\') {	996 if (c0_ == '\\') {

863 has_escapes = true;

864 uc32 c = ScanIdentifierUnicodeEscape();	997 uc32 c = ScanIdentifierUnicodeEscape();

865 // Only allow legal identifier start characters.	998 // Only allow legal identifier start characters.

866 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;	999 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;

867 AddChar(c);	1000 AddChar(c);

	1001 keyword_match.Fail();

868 } else {	1002 } else {

869 AddChar(c0_);	1003 AddChar(c0_);

	1004 keyword_match.AddChar(c0_);

870 Advance();	1005 Advance();

871 }	1006 }

872	1007

873 // Scan the rest of the identifier characters.	1008 // Scan the rest of the identifier characters.

874 while (kIsIdentifierPart.get(c0_)) {	1009 while (kIsIdentifierPart.get(c0_)) {

875 if (c0_ == '\\') {	1010 if (c0_ == '\\') {

876 has_escapes = true;

877 uc32 c = ScanIdentifierUnicodeEscape();	1011 uc32 c = ScanIdentifierUnicodeEscape();

878 // Only allow legal identifier part characters.	1012 // Only allow legal identifier part characters.

879 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;	1013 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;

880 AddChar(c);	1014 AddChar(c);

	1015 keyword_match.Fail();

881 } else {	1016 } else {

882 AddChar(c0_);	1017 AddChar(c0_);

	1018 keyword_match.AddChar(c0_);

883 Advance();	1019 Advance();

884 }	1020 }

885 }	1021 }

886 TerminateLiteral();	1022 TerminateLiteral();

887	1023

888 // We don't have any 1-letter keywords (this is probably a common case).	1024 return keyword_match.token();

889 if ((next_.literal_end - next_.literal_pos) == 1) {

890 return Token::IDENTIFIER;

891 }

892

893 // If the identifier contains unicode escapes, it must not be

894 // resolved to a keyword.

895 if (has_escapes) {

896 return Token::IDENTIFIER;

897 }

898

899 return Token::Lookup(&literals_.data()[next_.literal_pos]);

900 }	1025 }

901	1026

902	1027

903	1028

904 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {	1029 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {

905 // Checks whether the buffer contains an identifier (no escape).	1030 // Checks whether the buffer contains an identifier (no escape).

906 if (!buffer->has_more()) return false;	1031 if (!buffer->has_more()) return false;

907 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;	1032 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;

908 while (buffer->has_more()) {	1033 while (buffer->has_more()) {

909 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;	1034 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
966 }	1091 }

967 AddCharAdvance();	1092 AddCharAdvance();

968 }	1093 }

969 TerminateLiteral();	1094 TerminateLiteral();

970	1095

971 next_.location.end_pos = source_pos() - 1;	1096 next_.location.end_pos = source_pos() - 1;

972 return true;	1097 return true;

973 }	1098 }

974	1099

975 } } // namespace v8::internal	1100 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | test/cctest/SConscript » ('j') | no next file with comments »