OLD | NEW |
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
187 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); | 187 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); |
188 } | 188 } |
189 | 189 |
190 | 190 |
191 void TwoByteStringUTF16Buffer::SeekForward(int pos) { | 191 void TwoByteStringUTF16Buffer::SeekForward(int pos) { |
192 pos_ = pos; | 192 pos_ = pos; |
193 } | 193 } |
194 | 194 |
195 | 195 |
196 // ---------------------------------------------------------------------------- | 196 // ---------------------------------------------------------------------------- |
| 197 // Keyword Matcher |
| 198 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { |
| 199 { "break", KEYWORD_PREFIX, Token::BREAK }, |
| 200 { NULL, C, Token::ILLEGAL }, |
| 201 { NULL, D, Token::ILLEGAL }, |
| 202 { "else", KEYWORD_PREFIX, Token::ELSE }, |
| 203 { NULL, F, Token::ILLEGAL }, |
| 204 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 205 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 206 { NULL, I, Token::ILLEGAL }, |
| 207 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 208 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 209 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 210 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 211 { NULL, N, Token::ILLEGAL }, |
| 212 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 213 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 214 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 215 { "return", KEYWORD_PREFIX, Token::RETURN }, |
| 216 { "switch", KEYWORD_PREFIX, Token::SWITCH }, |
| 217 { NULL, T, Token::ILLEGAL }, |
| 218 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 219 { NULL, V, Token::ILLEGAL }, |
| 220 { NULL, W, Token::ILLEGAL } |
| 221 }; |
| 222 |
| 223 |
| 224 void KeywordMatcher::Step(uc32 input) { |
| 225 switch (state_) { |
| 226 case INITIAL: { |
| 227 // matching the first character is the only state with significant fanout. |
| 228 // Match only lower-case letters in range 'b'..'w'. |
| 229 unsigned int offset = input - kFirstCharRangeMin; |
| 230 if (offset < kFirstCharRangeLength) { |
| 231 state_ = first_states_[offset].state; |
| 232 if (state_ == KEYWORD_PREFIX) { |
| 233 keyword_ = first_states_[offset].keyword; |
| 234 counter_ = 1; |
| 235 keyword_token_ = first_states_[offset].token; |
| 236 } |
| 237 return; |
| 238 } |
| 239 break; |
| 240 } |
| 241 case KEYWORD_PREFIX: |
| 242 if (keyword_[counter_] == input) { |
| 243 ASSERT_NE(input, '\0'); |
| 244 counter_++; |
| 245 if (keyword_[counter_] == '\0') { |
| 246 state_ = KEYWORD_MATCHED; |
| 247 token_ = keyword_token_; |
| 248 } |
| 249 return; |
| 250 } |
| 251 break; |
| 252 case KEYWORD_MATCHED: |
| 253 token_ = Token::IDENTIFIER; |
| 254 break; |
| 255 case C: |
| 256 if (MatchState(input, 'a', CA)) return; |
| 257 if (MatchState(input, 'o', CO)) return; |
| 258 break; |
| 259 case CA: |
| 260 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return; |
| 261 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return; |
| 262 break; |
| 263 case CO: |
| 264 if (MatchState(input, 'n', CON)) return; |
| 265 break; |
| 266 case CON: |
| 267 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return; |
| 268 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return; |
| 269 break; |
| 270 case D: |
| 271 if (MatchState(input, 'e', DE)) return; |
| 272 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return; |
| 273 break; |
| 274 case DE: |
| 275 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return; |
| 276 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return; |
| 277 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return; |
| 278 break; |
| 279 case F: |
| 280 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return; |
| 281 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; |
| 282 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; |
| 283 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; |
| 284 break; |
| 285 case I: |
| 286 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; |
| 287 if (MatchKeyword(input, 'n', IN, Token::IN)) return; |
| 288 break; |
| 289 case IN: |
| 290 token_ = Token::IDENTIFIER; |
| 291 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) { |
| 292 return; |
| 293 } |
| 294 break; |
| 295 case N: |
| 296 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return; |
| 297 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; |
| 298 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; |
| 299 break; |
| 300 case T: |
| 301 if (MatchState(input, 'h', TH)) return; |
| 302 if (MatchState(input, 'r', TR)) return; |
| 303 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; |
| 304 break; |
| 305 case TH: |
| 306 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return; |
| 307 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return; |
| 308 break; |
| 309 case TR: |
| 310 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return; |
| 311 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return; |
| 312 break; |
| 313 case V: |
| 314 if (MatchKeywordStart(input, "var", 1, Token::VAR)) return; |
| 315 if (MatchKeywordStart(input, "void", 1, Token::VOID)) return; |
| 316 break; |
| 317 case W: |
| 318 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return; |
| 319 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; |
| 320 break; |
| 321 default: |
| 322 UNREACHABLE(); |
| 323 } |
| 324 // On fallthrough, it's a failure. |
| 325 state_ = UNMATCHABLE; |
| 326 } |
| 327 |
| 328 |
| 329 // ---------------------------------------------------------------------------- |
197 // Scanner | 330 // Scanner |
198 | 331 |
199 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { | 332 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { |
200 Token::Initialize(); | 333 Token::Initialize(); |
201 } | 334 } |
202 | 335 |
203 | 336 |
204 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, | 337 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, |
205 int position) { | 338 int position) { |
206 // Initialize the source buffer. | 339 // Initialize the source buffer. |
(...skipping 641 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
848 uc32 c = ScanHexEscape('u', 4); | 981 uc32 c = ScanHexEscape('u', 4); |
849 // We do not allow a unicode escape sequence to start another | 982 // We do not allow a unicode escape sequence to start another |
850 // unicode escape sequence. | 983 // unicode escape sequence. |
851 if (c == '\\') return unibrow::Utf8::kBadChar; | 984 if (c == '\\') return unibrow::Utf8::kBadChar; |
852 return c; | 985 return c; |
853 } | 986 } |
854 | 987 |
855 | 988 |
856 Token::Value Scanner::ScanIdentifier() { | 989 Token::Value Scanner::ScanIdentifier() { |
857 ASSERT(kIsIdentifierStart.get(c0_)); | 990 ASSERT(kIsIdentifierStart.get(c0_)); |
858 bool has_escapes = false; | |
859 | 991 |
860 StartLiteral(); | 992 StartLiteral(); |
| 993 KeywordMatcher keyword_match; |
| 994 |
861 // Scan identifier start character. | 995 // Scan identifier start character. |
862 if (c0_ == '\\') { | 996 if (c0_ == '\\') { |
863 has_escapes = true; | |
864 uc32 c = ScanIdentifierUnicodeEscape(); | 997 uc32 c = ScanIdentifierUnicodeEscape(); |
865 // Only allow legal identifier start characters. | 998 // Only allow legal identifier start characters. |
866 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; | 999 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
867 AddChar(c); | 1000 AddChar(c); |
| 1001 keyword_match.Fail(); |
868 } else { | 1002 } else { |
869 AddChar(c0_); | 1003 AddChar(c0_); |
| 1004 keyword_match.AddChar(c0_); |
870 Advance(); | 1005 Advance(); |
871 } | 1006 } |
872 | 1007 |
873 // Scan the rest of the identifier characters. | 1008 // Scan the rest of the identifier characters. |
874 while (kIsIdentifierPart.get(c0_)) { | 1009 while (kIsIdentifierPart.get(c0_)) { |
875 if (c0_ == '\\') { | 1010 if (c0_ == '\\') { |
876 has_escapes = true; | |
877 uc32 c = ScanIdentifierUnicodeEscape(); | 1011 uc32 c = ScanIdentifierUnicodeEscape(); |
878 // Only allow legal identifier part characters. | 1012 // Only allow legal identifier part characters. |
879 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; | 1013 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; |
880 AddChar(c); | 1014 AddChar(c); |
| 1015 keyword_match.Fail(); |
881 } else { | 1016 } else { |
882 AddChar(c0_); | 1017 AddChar(c0_); |
| 1018 keyword_match.AddChar(c0_); |
883 Advance(); | 1019 Advance(); |
884 } | 1020 } |
885 } | 1021 } |
886 TerminateLiteral(); | 1022 TerminateLiteral(); |
887 | 1023 |
888 // We don't have any 1-letter keywords (this is probably a common case). | 1024 return keyword_match.token(); |
889 if ((next_.literal_end - next_.literal_pos) == 1) { | |
890 return Token::IDENTIFIER; | |
891 } | |
892 | |
893 // If the identifier contains unicode escapes, it must not be | |
894 // resolved to a keyword. | |
895 if (has_escapes) { | |
896 return Token::IDENTIFIER; | |
897 } | |
898 | |
899 return Token::Lookup(&literals_.data()[next_.literal_pos]); | |
900 } | 1025 } |
901 | 1026 |
902 | 1027 |
903 | 1028 |
904 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { | 1029 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { |
905 // Checks whether the buffer contains an identifier (no escape). | 1030 // Checks whether the buffer contains an identifier (no escape). |
906 if (!buffer->has_more()) return false; | 1031 if (!buffer->has_more()) return false; |
907 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; | 1032 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; |
908 while (buffer->has_more()) { | 1033 while (buffer->has_more()) { |
909 if (!kIsIdentifierPart.get(buffer->GetNext())) return false; | 1034 if (!kIsIdentifierPart.get(buffer->GetNext())) return false; |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
966 } | 1091 } |
967 AddCharAdvance(); | 1092 AddCharAdvance(); |
968 } | 1093 } |
969 TerminateLiteral(); | 1094 TerminateLiteral(); |
970 | 1095 |
971 next_.location.end_pos = source_pos() - 1; | 1096 next_.location.end_pos = source_pos() - 1; |
972 return true; | 1097 return true; |
973 } | 1098 } |
974 | 1099 |
975 } } // namespace v8::internal | 1100 } } // namespace v8::internal |
OLD | NEW |