Index: src/scanner.cc |
diff --git a/src/scanner.cc b/src/scanner.cc |
index 3dae414f9d6c60f59cc3bcfa7f3cc5c3318600fc..ec2b2c3fcf4b5df8bfbb2d4464a727009aad29a8 100644 |
--- a/src/scanner.cc |
+++ b/src/scanner.cc |
@@ -194,6 +194,139 @@ void TwoByteStringUTF16Buffer::SeekForward(int pos) { |
// ---------------------------------------------------------------------------- |
+// Keyword Matcher |
+KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { |
+ { "break", KEYWORD_PREFIX, Token::BREAK }, |
+ { NULL, C, Token::ILLEGAL }, |
+ { NULL, D, Token::ILLEGAL }, |
+ { "else", KEYWORD_PREFIX, Token::ELSE }, |
+ { NULL, F, Token::ILLEGAL }, |
+ { NULL, UNMATCHABLE, Token::ILLEGAL }, |
+ { NULL, UNMATCHABLE, Token::ILLEGAL }, |
+ { NULL, I, Token::ILLEGAL }, |
+ { NULL, UNMATCHABLE, Token::ILLEGAL }, |
+ { NULL, UNMATCHABLE, Token::ILLEGAL }, |
+ { NULL, UNMATCHABLE, Token::ILLEGAL }, |
+ { NULL, UNMATCHABLE, Token::ILLEGAL }, |
+ { NULL, N, Token::ILLEGAL }, |
+ { NULL, UNMATCHABLE, Token::ILLEGAL }, |
+ { NULL, UNMATCHABLE, Token::ILLEGAL }, |
+ { NULL, UNMATCHABLE, Token::ILLEGAL }, |
+ { "return", KEYWORD_PREFIX, Token::RETURN }, |
+ { "switch", KEYWORD_PREFIX, Token::SWITCH }, |
+ { NULL, T, Token::ILLEGAL }, |
+ { NULL, UNMATCHABLE, Token::ILLEGAL }, |
+ { NULL, V, Token::ILLEGAL }, |
+ { NULL, W, Token::ILLEGAL } |
+}; |
+ |
+ |
+void KeywordMatcher::Step(uc32 input) { |
+ switch (state_) { |
+ case INITIAL: { |
+ // matching the first character is the only state with significant fanout. |
+ // Match only lower-case letters in range 'b'..'w'. |
+ unsigned int offset = input - kFirstCharRangeMin; |
+ if (offset < kFirstCharRangeLength) { |
+ state_ = first_states_[offset].state; |
+ if (state_ == KEYWORD_PREFIX) { |
+ keyword_ = first_states_[offset].keyword; |
+ counter_ = 1; |
+ keyword_token_ = first_states_[offset].token; |
+ } |
+ return; |
+ } |
+ break; |
+ } |
+ case KEYWORD_PREFIX: |
+ if (keyword_[counter_] == input) { |
+ ASSERT_NE(input, '\0'); |
+ counter_++; |
+ if (keyword_[counter_] == '\0') { |
+ state_ = KEYWORD_MATCHED; |
+ token_ = keyword_token_; |
+ } |
+ return; |
+ } |
+ break; |
+ case KEYWORD_MATCHED: |
+ token_ = Token::IDENTIFIER; |
+ break; |
+ case C: |
+ if (MatchState(input, 'a', CA)) return; |
+ if (MatchState(input, 'o', CO)) return; |
+ break; |
+ case CA: |
+ if (MatchKeywordStart(input, "case", 2, Token::CASE)) return; |
+ if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return; |
+ break; |
+ case CO: |
+ if (MatchState(input, 'n', CON)) return; |
+ break; |
+ case CON: |
+ if (MatchKeywordStart(input, "const", 3, Token::CONST)) return; |
+ if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return; |
+ break; |
+ case D: |
+ if (MatchState(input, 'e', DE)) return; |
+ if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return; |
+ break; |
+ case DE: |
+ if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return; |
+ if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return; |
+ if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return; |
+ break; |
+ case F: |
+ if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return; |
+ if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; |
+ if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; |
+ if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; |
+ break; |
+ case I: |
+ if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; |
+ if (MatchKeyword(input, 'n', IN, Token::IN)) return; |
+ break; |
+ case IN: |
+ token_ = Token::IDENTIFIER; |
+ if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) { |
+ return; |
+ } |
+ break; |
+ case N: |
+ if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return; |
+ if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; |
+ if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; |
+ break; |
+ case T: |
+ if (MatchState(input, 'h', TH)) return; |
+ if (MatchState(input, 'r', TR)) return; |
+ if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; |
+ break; |
+ case TH: |
+ if (MatchKeywordStart(input, "this", 2, Token::THIS)) return; |
+ if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return; |
+ break; |
+ case TR: |
+ if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return; |
+ if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return; |
+ break; |
+ case V: |
+ if (MatchKeywordStart(input, "var", 1, Token::VAR)) return; |
+ if (MatchKeywordStart(input, "void", 1, Token::VOID)) return; |
+ break; |
+ case W: |
+ if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return; |
+ if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ } |
+ // On fallthrough, it's a failure. |
+ state_ = UNMATCHABLE; |
+} |
+ |
+ |
+// ---------------------------------------------------------------------------- |
// Scanner |
Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { |
@@ -855,48 +988,40 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() { |
Token::Value Scanner::ScanIdentifier() { |
ASSERT(kIsIdentifierStart.get(c0_)); |
- bool has_escapes = false; |
StartLiteral(); |
+ KeywordMatcher keyword_match; |
+ |
// Scan identifier start character. |
if (c0_ == '\\') { |
- has_escapes = true; |
uc32 c = ScanIdentifierUnicodeEscape(); |
// Only allow legal identifier start characters. |
if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
AddChar(c); |
+ keyword_match.Fail(); |
} else { |
AddChar(c0_); |
+ keyword_match.AddChar(c0_); |
Advance(); |
} |
// Scan the rest of the identifier characters. |
while (kIsIdentifierPart.get(c0_)) { |
if (c0_ == '\\') { |
- has_escapes = true; |
uc32 c = ScanIdentifierUnicodeEscape(); |
// Only allow legal identifier part characters. |
if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; |
AddChar(c); |
+ keyword_match.Fail(); |
} else { |
AddChar(c0_); |
+ keyword_match.AddChar(c0_); |
Advance(); |
} |
} |
TerminateLiteral(); |
- // We don't have any 1-letter keywords (this is probably a common case). |
- if ((next_.literal_end - next_.literal_pos) == 1) { |
- return Token::IDENTIFIER; |
- } |
- |
- // If the identifier contains unicode escapes, it must not be |
- // resolved to a keyword. |
- if (has_escapes) { |
- return Token::IDENTIFIER; |
- } |
- |
- return Token::Lookup(&literals_.data()[next_.literal_pos]); |
+ return keyword_match.token(); |
} |