| Index: src/scanner-base.cc
|
| ===================================================================
|
| --- src/scanner-base.cc (revision 9006)
|
| +++ src/scanner-base.cc (working copy)
|
| @@ -41,12 +41,12 @@
|
| : unicode_cache_(unicode_cache) { }
|
|
|
|
|
| -uc32 Scanner::ScanHexEscape(uc32 c, int length) {
|
| - ASSERT(length <= 4); // prevent overflow
|
| +uc32 Scanner::ScanHexNumber(int expected_length) {
|
| + ASSERT(expected_length <= 4); // prevent overflow
|
|
|
| - uc32 digits[4];
|
| + uc32 digits[4] = { 0, 0, 0, 0 };
|
| uc32 x = 0;
|
| - for (int i = 0; i < length; i++) {
|
| + for (int i = 0; i < expected_length; i++) {
|
| digits[i] = c0_;
|
| int d = HexValue(c0_);
|
| if (d < 0) {
|
| @@ -54,12 +54,11 @@
|
| // should be illegal, but other JS VMs just return the
|
| // non-escaped version of the original character.
|
|
|
| - // Push back digits read, except the last one (in c0_).
|
| + // Push back digits that we have advanced past.
|
| for (int j = i-1; j >= 0; j--) {
|
| PushBack(digits[j]);
|
| }
|
| - // Notice: No handling of error - treat it as "\u"->"u".
|
| - return c;
|
| + return -1;
|
| }
|
| x = x * 16 + d;
|
| Advance();
|
| @@ -74,7 +73,9 @@
|
| // JavaScriptScanner
|
|
|
| JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants)
|
| - : Scanner(scanner_contants), octal_pos_(Location::invalid()) { }
|
| + : Scanner(scanner_contants),
|
| + octal_pos_(Location::invalid()),
|
| + harmony_block_scoping_(false) { }
|
|
|
|
|
| void JavaScriptScanner::Initialize(UC16CharacterStream* source) {
|
| @@ -94,7 +95,7 @@
|
| STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
|
|
|
| // Table of one-character tokens, by character (0x00..0x7f only).
|
| -static byte one_char_tokens[] = {
|
| +static const byte one_char_tokens[] = {
|
| Token::ILLEGAL,
|
| Token::ILLEGAL,
|
| Token::ILLEGAL,
|
| @@ -638,9 +639,17 @@
|
| case 'n' : c = '\n'; break;
|
| case 'r' : c = '\r'; break;
|
| case 't' : c = '\t'; break;
|
| - case 'u' : c = ScanHexEscape(c, 4); break;
|
| + case 'u' : {
|
| + c = ScanHexNumber(4);
|
| + if (c < 0) c = 'u';
|
| + break;
|
| + }
|
| case 'v' : c = '\v'; break;
|
| - case 'x' : c = ScanHexEscape(c, 2); break;
|
| + case 'x' : {
|
| + c = ScanHexNumber(2);
|
| + if (c < 0) c = 'x';
|
| + break;
|
| + }
|
| case '0' : // fall through
|
| case '1' : // fall through
|
| case '2' : // fall through
|
| @@ -800,25 +809,133 @@
|
|
|
| uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() {
|
| Advance();
|
| - if (c0_ != 'u') return unibrow::Utf8::kBadChar;
|
| + if (c0_ != 'u') return -1;
|
| Advance();
|
| - uc32 c = ScanHexEscape('u', 4);
|
| - // We do not allow a unicode escape sequence to start another
|
| - // unicode escape sequence.
|
| - if (c == '\\') return unibrow::Utf8::kBadChar;
|
| - return c;
|
| + uc32 result = ScanHexNumber(4);
|
| + if (result < 0) PushBack('u');
|
| + return result;
|
| }
|
|
|
|
|
| +// ----------------------------------------------------------------------------
|
| +// Keyword Matcher
|
| +
|
| +#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
|
| + KEYWORD_GROUP('b') \
|
| + KEYWORD("break", Token::BREAK) \
|
| + KEYWORD_GROUP('c') \
|
| + KEYWORD("case", Token::CASE) \
|
| + KEYWORD("catch", Token::CATCH) \
|
| + KEYWORD("class", Token::FUTURE_RESERVED_WORD) \
|
| + KEYWORD("const", Token::CONST) \
|
| + KEYWORD("continue", Token::CONTINUE) \
|
| + KEYWORD_GROUP('d') \
|
| + KEYWORD("debugger", Token::DEBUGGER) \
|
| + KEYWORD("default", Token::DEFAULT) \
|
| + KEYWORD("delete", Token::DELETE) \
|
| + KEYWORD("do", Token::DO) \
|
| + KEYWORD_GROUP('e') \
|
| + KEYWORD("else", Token::ELSE) \
|
| + KEYWORD("enum", Token::FUTURE_RESERVED_WORD) \
|
| + KEYWORD("export", Token::FUTURE_RESERVED_WORD) \
|
| + KEYWORD("extends", Token::FUTURE_RESERVED_WORD) \
|
| + KEYWORD_GROUP('f') \
|
| + KEYWORD("false", Token::FALSE_LITERAL) \
|
| + KEYWORD("finally", Token::FINALLY) \
|
| + KEYWORD("for", Token::FOR) \
|
| + KEYWORD("function", Token::FUNCTION) \
|
| + KEYWORD_GROUP('i') \
|
| + KEYWORD("if", Token::IF) \
|
| + KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
|
| + KEYWORD("import", Token::FUTURE_RESERVED_WORD) \
|
| + KEYWORD("in", Token::IN) \
|
| + KEYWORD("instanceof", Token::INSTANCEOF) \
|
| + KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \
|
| + KEYWORD_GROUP('l') \
|
| + KEYWORD("let", harmony_block_scoping \
|
| + ? Token::LET : Token::FUTURE_STRICT_RESERVED_WORD) \
|
| + KEYWORD_GROUP('n') \
|
| + KEYWORD("new", Token::NEW) \
|
| + KEYWORD("null", Token::NULL_LITERAL) \
|
| + KEYWORD_GROUP('p') \
|
| + KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \
|
| + KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \
|
| + KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \
|
| + KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \
|
| + KEYWORD_GROUP('r') \
|
| + KEYWORD("return", Token::RETURN) \
|
| + KEYWORD_GROUP('s') \
|
| + KEYWORD("static", Token::FUTURE_STRICT_RESERVED_WORD) \
|
| + KEYWORD("super", Token::FUTURE_RESERVED_WORD) \
|
| + KEYWORD("switch", Token::SWITCH) \
|
| + KEYWORD_GROUP('t') \
|
| + KEYWORD("this", Token::THIS) \
|
| + KEYWORD("throw", Token::THROW) \
|
| + KEYWORD("true", Token::TRUE_LITERAL) \
|
| + KEYWORD("try", Token::TRY) \
|
| + KEYWORD("typeof", Token::TYPEOF) \
|
| + KEYWORD_GROUP('v') \
|
| + KEYWORD("var", Token::VAR) \
|
| + KEYWORD("void", Token::VOID) \
|
| + KEYWORD_GROUP('w') \
|
| + KEYWORD("while", Token::WHILE) \
|
| + KEYWORD("with", Token::WITH) \
|
| + KEYWORD_GROUP('y') \
|
| + KEYWORD("yield", Token::FUTURE_STRICT_RESERVED_WORD)
|
| +
|
| +
|
| +static Token::Value KeywordOrIdentifierToken(const char* input,
|
| + int input_length,
|
| + bool harmony_block_scoping) {
|
| + ASSERT(input_length >= 1);
|
| + const int kMinLength = 2;
|
| + const int kMaxLength = 10;
|
| + if (input_length < kMinLength || input_length > kMaxLength) {
|
| + return Token::IDENTIFIER;
|
| + }
|
| + switch (input[0]) {
|
| + default:
|
| +#define KEYWORD_GROUP_CASE(ch) \
|
| + break; \
|
| + case ch:
|
| +#define KEYWORD(keyword, token) \
|
| + { \
|
| + /* 'keyword' is a char array, so sizeof(keyword) is */ \
|
| + /* strlen(keyword) plus 1 for the NUL char. */ \
|
| + const int keyword_length = sizeof(keyword) - 1; \
|
| + STATIC_ASSERT(keyword_length >= kMinLength); \
|
| + STATIC_ASSERT(keyword_length <= kMaxLength); \
|
| + if (input_length == keyword_length && \
|
| + input[1] == keyword[1] && \
|
| + (keyword_length <= 2 || input[2] == keyword[2]) && \
|
| + (keyword_length <= 3 || input[3] == keyword[3]) && \
|
| + (keyword_length <= 4 || input[4] == keyword[4]) && \
|
| + (keyword_length <= 5 || input[5] == keyword[5]) && \
|
| + (keyword_length <= 6 || input[6] == keyword[6]) && \
|
| + (keyword_length <= 7 || input[7] == keyword[7]) && \
|
| + (keyword_length <= 8 || input[8] == keyword[8]) && \
|
| + (keyword_length <= 9 || input[9] == keyword[9])) { \
|
| + return token; \
|
| + } \
|
| + }
|
| + KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
|
| + }
|
| + return Token::IDENTIFIER;
|
| +}
|
| +
|
| +
|
| Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {
|
| ASSERT(unicode_cache_->IsIdentifierStart(c0_));
|
| LiteralScope literal(this);
|
| - KeywordMatcher keyword_match;
|
| // Scan identifier start character.
|
| if (c0_ == '\\') {
|
| uc32 c = ScanIdentifierUnicodeEscape();
|
| // Only allow legal identifier start characters.
|
| - if (!unicode_cache_->IsIdentifierStart(c)) return Token::ILLEGAL;
|
| + if (c < 0 ||
|
| + c == '\\' || // No recursive escapes.
|
| + !unicode_cache_->IsIdentifierStart(c)) {
|
| + return Token::ILLEGAL;
|
| + }
|
| AddLiteralChar(c);
|
| return ScanIdentifierSuffix(&literal);
|
| }
|
| @@ -826,9 +943,6 @@
|
| uc32 first_char = c0_;
|
| Advance();
|
| AddLiteralChar(first_char);
|
| - if (!keyword_match.AddChar(first_char)) {
|
| - return ScanIdentifierSuffix(&literal);
|
| - }
|
|
|
| // Scan the rest of the identifier characters.
|
| while (unicode_cache_->IsIdentifierPart(c0_)) {
|
| @@ -836,14 +950,22 @@
|
| uc32 next_char = c0_;
|
| Advance();
|
| AddLiteralChar(next_char);
|
| - if (keyword_match.AddChar(next_char)) continue;
|
| + continue;
|
| }
|
| - // Fallthrough if no loner able to complete keyword.
|
| + // Fallthrough if no longer able to complete keyword.
|
| return ScanIdentifierSuffix(&literal);
|
| }
|
| +
|
| literal.Complete();
|
|
|
| - return keyword_match.token();
|
| + if (next_.literal_chars->is_ascii()) {
|
| + Vector<const char> chars = next_.literal_chars->ascii_literal();
|
| + return KeywordOrIdentifierToken(chars.start(),
|
| + chars.length(),
|
| + harmony_block_scoping_);
|
| + }
|
| +
|
| + return Token::IDENTIFIER;
|
| }
|
|
|
|
|
| @@ -853,7 +975,11 @@
|
| if (c0_ == '\\') {
|
| uc32 c = ScanIdentifierUnicodeEscape();
|
| // Only allow legal identifier part characters.
|
| - if (!unicode_cache_->IsIdentifierPart(c)) return Token::ILLEGAL;
|
| + if (c < 0 ||
|
| + c == '\\' ||
|
| + !unicode_cache_->IsIdentifierPart(c)) {
|
| + return Token::ILLEGAL;
|
| + }
|
| AddLiteralChar(c);
|
| } else {
|
| AddLiteralChar(c0_);
|
| @@ -879,8 +1005,9 @@
|
| // the scanner should pass uninterpreted bodies to the RegExp
|
| // constructor.
|
| LiteralScope literal(this);
|
| - if (seen_equal)
|
| + if (seen_equal) {
|
| AddLiteralChar('=');
|
| + }
|
|
|
| while (c0_ != '/' || in_character_class) {
|
| if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false;
|
| @@ -912,20 +1039,47 @@
|
| }
|
|
|
|
|
| +bool JavaScriptScanner::ScanLiteralUnicodeEscape() {
|
| + ASSERT(c0_ == '\\');
|
| + uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0};
|
| + Advance();
|
| + int i = 1;
|
| + if (c0_ == 'u') {
|
| + i++;
|
| + while (i < 6) {
|
| + Advance();
|
| + if (!IsHexDigit(c0_)) break;
|
| + chars_read[i] = c0_;
|
| + i++;
|
| + }
|
| + }
|
| + if (i < 6) {
|
| + // Incomplete escape. Undo all advances and return false.
|
| + while (i > 0) {
|
| + i--;
|
| + PushBack(chars_read[i]);
|
| + }
|
| + return false;
|
| + }
|
| + // Complete escape. Add all chars to current literal buffer.
|
| + for (int i = 0; i < 6; i++) {
|
| + AddLiteralChar(chars_read[i]);
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +
|
| bool JavaScriptScanner::ScanRegExpFlags() {
|
| // Scan regular expression flags.
|
| LiteralScope literal(this);
|
| while (unicode_cache_->IsIdentifierPart(c0_)) {
|
| - if (c0_ == '\\') {
|
| - uc32 c = ScanIdentifierUnicodeEscape();
|
| - if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
|
| - // We allow any escaped character, unlike the restriction on
|
| - // IdentifierPart when it is used to build an IdentifierName.
|
| - AddLiteralChar(c);
|
| - continue;
|
| + if (c0_ != '\\') {
|
| + AddLiteralCharAdvance();
|
| + } else {
|
| + if (!ScanLiteralUnicodeEscape()) {
|
| + break;
|
| }
|
| }
|
| - AddLiteralCharAdvance();
|
| }
|
| literal.Complete();
|
|
|
| @@ -933,182 +1087,4 @@
|
| return true;
|
| }
|
|
|
| -// ----------------------------------------------------------------------------
|
| -// Keyword Matcher
|
| -
|
| -const KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {
|
| - { "break", KEYWORD_PREFIX, Token::BREAK },
|
| - { NULL, C, Token::ILLEGAL },
|
| - { NULL, D, Token::ILLEGAL },
|
| - { NULL, E, Token::ILLEGAL },
|
| - { NULL, F, Token::ILLEGAL },
|
| - { NULL, UNMATCHABLE, Token::ILLEGAL },
|
| - { NULL, UNMATCHABLE, Token::ILLEGAL },
|
| - { NULL, I, Token::ILLEGAL },
|
| - { NULL, UNMATCHABLE, Token::ILLEGAL },
|
| - { NULL, UNMATCHABLE, Token::ILLEGAL },
|
| - { "let", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD },
|
| - { NULL, UNMATCHABLE, Token::ILLEGAL },
|
| - { NULL, N, Token::ILLEGAL },
|
| - { NULL, UNMATCHABLE, Token::ILLEGAL },
|
| - { NULL, P, Token::ILLEGAL },
|
| - { NULL, UNMATCHABLE, Token::ILLEGAL },
|
| - { "return", KEYWORD_PREFIX, Token::RETURN },
|
| - { NULL, S, Token::ILLEGAL },
|
| - { NULL, T, Token::ILLEGAL },
|
| - { NULL, UNMATCHABLE, Token::ILLEGAL },
|
| - { NULL, V, Token::ILLEGAL },
|
| - { NULL, W, Token::ILLEGAL },
|
| - { NULL, UNMATCHABLE, Token::ILLEGAL },
|
| - { "yield", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD }
|
| -};
|
| -
|
| -
|
| -void KeywordMatcher::Step(unibrow::uchar input) {
|
| - switch (state_) {
|
| - case INITIAL: {
|
| - // matching the first character is the only state with significant fanout.
|
| - // Match only lower-case letters in range 'b'..'y'.
|
| - unsigned int offset = input - kFirstCharRangeMin;
|
| - if (offset < kFirstCharRangeLength) {
|
| - state_ = first_states_[offset].state;
|
| - if (state_ == KEYWORD_PREFIX) {
|
| - keyword_ = first_states_[offset].keyword;
|
| - counter_ = 1;
|
| - keyword_token_ = first_states_[offset].token;
|
| - }
|
| - return;
|
| - }
|
| - break;
|
| - }
|
| - case KEYWORD_PREFIX:
|
| - if (static_cast<unibrow::uchar>(keyword_[counter_]) == input) {
|
| - counter_++;
|
| - if (keyword_[counter_] == '\0') {
|
| - state_ = KEYWORD_MATCHED;
|
| - token_ = keyword_token_;
|
| - }
|
| - return;
|
| - }
|
| - break;
|
| - case KEYWORD_MATCHED:
|
| - token_ = Token::IDENTIFIER;
|
| - break;
|
| - case C:
|
| - if (MatchState(input, 'a', CA)) return;
|
| - if (MatchKeywordStart(input, "class", 1,
|
| - Token::FUTURE_RESERVED_WORD)) return;
|
| - if (MatchState(input, 'o', CO)) return;
|
| - break;
|
| - case CA:
|
| - if (MatchKeywordStart(input, "case", 2, Token::CASE)) return;
|
| - if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return;
|
| - break;
|
| - case CO:
|
| - if (MatchState(input, 'n', CON)) return;
|
| - break;
|
| - case CON:
|
| - if (MatchKeywordStart(input, "const", 3, Token::CONST)) return;
|
| - if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return;
|
| - break;
|
| - case D:
|
| - if (MatchState(input, 'e', DE)) return;
|
| - if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return;
|
| - break;
|
| - case DE:
|
| - if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return;
|
| - if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return;
|
| - if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return;
|
| - break;
|
| - case E:
|
| - if (MatchKeywordStart(input, "else", 1, Token::ELSE)) return;
|
| - if (MatchKeywordStart(input, "enum", 1,
|
| - Token::FUTURE_RESERVED_WORD)) return;
|
| - if (MatchState(input, 'x', EX)) return;
|
| - break;
|
| - case EX:
|
| - if (MatchKeywordStart(input, "export", 2,
|
| - Token::FUTURE_RESERVED_WORD)) return;
|
| - if (MatchKeywordStart(input, "extends", 2,
|
| - Token::FUTURE_RESERVED_WORD)) return;
|
| - break;
|
| - case F:
|
| - if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return;
|
| - if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;
|
| - if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;
|
| - if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;
|
| - break;
|
| - case I:
|
| - if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;
|
| - if (MatchState(input, 'm', IM)) return;
|
| - if (MatchKeyword(input, 'n', IN, Token::IN)) return;
|
| - break;
|
| - case IM:
|
| - if (MatchState(input, 'p', IMP)) return;
|
| - break;
|
| - case IMP:
|
| - if (MatchKeywordStart(input, "implements", 3,
|
| - Token::FUTURE_STRICT_RESERVED_WORD )) return;
|
| - if (MatchKeywordStart(input, "import", 3,
|
| - Token::FUTURE_RESERVED_WORD)) return;
|
| - break;
|
| - case IN:
|
| - token_ = Token::IDENTIFIER;
|
| - if (MatchKeywordStart(input, "interface", 2,
|
| - Token::FUTURE_STRICT_RESERVED_WORD)) return;
|
| - if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) return;
|
| - break;
|
| - case N:
|
| - if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;
|
| - if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;
|
| - break;
|
| - case P:
|
| - if (MatchKeywordStart(input, "package", 1,
|
| - Token::FUTURE_STRICT_RESERVED_WORD)) return;
|
| - if (MatchState(input, 'r', PR)) return;
|
| - if (MatchKeywordStart(input, "public", 1,
|
| - Token::FUTURE_STRICT_RESERVED_WORD)) return;
|
| - break;
|
| - case PR:
|
| - if (MatchKeywordStart(input, "private", 2,
|
| - Token::FUTURE_STRICT_RESERVED_WORD)) return;
|
| - if (MatchKeywordStart(input, "protected", 2,
|
| - Token::FUTURE_STRICT_RESERVED_WORD)) return;
|
| - break;
|
| - case S:
|
| - if (MatchKeywordStart(input, "static", 1,
|
| - Token::FUTURE_STRICT_RESERVED_WORD)) return;
|
| - if (MatchKeywordStart(input, "super", 1,
|
| - Token::FUTURE_RESERVED_WORD)) return;
|
| - if (MatchKeywordStart(input, "switch", 1,
|
| - Token::SWITCH)) return;
|
| - break;
|
| - case T:
|
| - if (MatchState(input, 'h', TH)) return;
|
| - if (MatchState(input, 'r', TR)) return;
|
| - if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;
|
| - break;
|
| - case TH:
|
| - if (MatchKeywordStart(input, "this", 2, Token::THIS)) return;
|
| - if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return;
|
| - break;
|
| - case TR:
|
| - if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return;
|
| - if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return;
|
| - break;
|
| - case V:
|
| - if (MatchKeywordStart(input, "var", 1, Token::VAR)) return;
|
| - if (MatchKeywordStart(input, "void", 1, Token::VOID)) return;
|
| - break;
|
| - case W:
|
| - if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return;
|
| - if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;
|
| - break;
|
| - case UNMATCHABLE:
|
| - break;
|
| - }
|
| - // On fallthrough, it's a failure.
|
| - state_ = UNMATCHABLE;
|
| -}
|
| -
|
| } } // namespace v8::internal
|
|
|