Chromium Code Reviews| Index: src/scanner.cc |
| diff --git a/src/scanner.cc b/src/scanner.cc |
| index 9e90868131cc7469b6d88d79e2d409e561915dce..6b9b8134455359228a388f5828869a35a414c8ae 100644 |
| --- a/src/scanner.cc |
| +++ b/src/scanner.cc |
| @@ -52,7 +52,8 @@ void Scanner::Initialize(Utf16CharacterStream* source) { |
| // after a newline and scan first token. |
| has_line_terminator_before_next_ = true; |
| SkipWhiteSpace(); |
| - Scan(); |
| + peek_count_ = 1; |
| + Scan(&next_[0]); |
| } |
| @@ -228,21 +229,26 @@ static const byte one_char_tokens[] = { |
| Token::Value Scanner::Next() { |
| - current_ = next_; |
| + current_ = next_[0]; |
| + if (peek_count_ > 1) { |
| + std::memmove(&next_[0], &next_[1], (peek_count_ - 1) * sizeof(TokenDesc)); |
| + next_[--peek_count_].token = Token::ILLEGAL; |
| + return current_.token; |
| + } |
| has_line_terminator_before_next_ = false; |
| has_multiline_comment_before_next_ = false; |
| if (static_cast<unsigned>(c0_) <= 0x7f) { |
| Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); |
| if (token != Token::ILLEGAL) { |
| int pos = source_pos(); |
| - next_.token = token; |
| - next_.location.beg_pos = pos; |
| - next_.location.end_pos = pos + 1; |
| + next_[0].token = token; |
| + next_[0].location.beg_pos = pos; |
| + next_[0].location.end_pos = pos + 1; |
| Advance(); |
| return current_.token; |
| } |
| } |
| - Scan(); |
| + Scan(&next_[0]); |
| return current_.token; |
| } |
| @@ -420,13 +426,35 @@ Token::Value Scanner::ScanHtmlComment() { |
| } |
| -void Scanner::Scan() { |
| - next_.literal_chars = NULL; |
| - next_.raw_literal_chars = NULL; |
| +Token::Value Scanner::peek(int n) { |
| + if (n == 0) return peek(); |
| + PeekScan(n); |
| + return next_[n].token; |
| +} |
| + |
| + |
| +Scanner::Location Scanner::peek_location(int n) { |
| + if (n == 0) return peek_location(); |
| + PeekScan(n); |
| + return next_[n].location; |
| +} |
| + |
| + |
| +void Scanner::PeekScan(int count) { |
| + DCHECK(count > 0 && count < kMaxLookahead); |
| + while (peek_count_ <= count) { |
| + Scan(&next_[peek_count_++]); |
| + } |
| +} |
| + |
| + |
| +void Scanner::Scan(TokenDesc* next) { |
| + next->literal_chars = NULL; |
| + next->raw_literal_chars = NULL; |
| Token::Value token; |
| do { |
| // Remember the position of the next token |
| - next_.location.beg_pos = source_pos(); |
| + next->location.beg_pos = source_pos(); |
| switch (c0_) { |
| case ' ': |
| @@ -680,17 +708,21 @@ void Scanner::Scan() { |
| // whitespace. |
| } while (token == Token::WHITESPACE); |
| - next_.location.end_pos = source_pos(); |
| - next_.token = token; |
| + next->location.end_pos = source_pos(); |
| + next->token = token; |
| } |
| void Scanner::SeekForward(int pos) { |
| + // Not supported when there is multiple lookahead tokens |
|
marja
2015/03/10 09:11:19
... what prevents this from happening?
caitp (gmail)
2015/03/10 14:47:51
it just seemed complicated to make it work with mu
|
| + DCHECK(peek_count_ < 2); |
| + |
| // After this call, we will have the token at the given position as |
| // the "next" token. The "current" token will be invalid. |
| - if (pos == next_.location.beg_pos) return; |
| + TokenDesc* next = &next_[0]; |
| + if (pos == next->location.beg_pos) return; |
| int current_pos = source_pos(); |
| - DCHECK_EQ(next_.location.end_pos, current_pos); |
| + DCHECK_EQ(next->location.end_pos, current_pos); |
| // Positions inside the lookahead token aren't supported. |
| DCHECK(pos >= current_pos); |
| if (pos != current_pos) { |
| @@ -702,12 +734,12 @@ void Scanner::SeekForward(int pos) { |
| has_line_terminator_before_next_ = false; |
| has_multiline_comment_before_next_ = false; |
| } |
| - Scan(); |
| + Scan(next); |
| } |
| template <bool capture_raw, bool in_template_literal> |
| -bool Scanner::ScanEscape() { |
| +bool Scanner::ScanEscape(TokenDesc* next) { |
| uc32 c = c0_; |
| Advance<capture_raw>(); |
| @@ -757,7 +789,7 @@ bool Scanner::ScanEscape() { |
| // According to ECMA-262, section 7.8.4, characters not covered by the |
| // above cases should be illegal, but they are commonly handled as |
| // non-escaped characters by JS VMs. |
| - AddLiteralChar(c); |
| + AddLiteralChar(next, c); |
| return true; |
| } |
| @@ -796,6 +828,7 @@ Token::Value Scanner::ScanString() { |
| Advance<false, false>(); // consume quote |
| LiteralScope literal(this); |
| + TokenDesc* next = literal.next_; |
| while (true) { |
| if (c0_ > kMaxAscii) { |
| HandleLeadSurrogate(); |
| @@ -810,7 +843,7 @@ Token::Value Scanner::ScanString() { |
| uc32 c = c0_; |
| if (c == '\\') break; |
| Advance<false, false>(); |
| - AddLiteralChar(c); |
| + AddLiteralChar(next, c); |
| } |
| while (c0_ != quote && c0_ >= 0 |
| @@ -818,9 +851,9 @@ Token::Value Scanner::ScanString() { |
| uc32 c = c0_; |
| Advance(); |
| if (c == '\\') { |
| - if (c0_ < 0 || !ScanEscape<false, false>()) return Token::ILLEGAL; |
| + if (c0_ < 0 || !ScanEscape<false, false>(next)) return Token::ILLEGAL; |
| } else { |
| - AddLiteralChar(c); |
| + AddLiteralChar(next, c); |
| } |
| } |
| if (c0_ != quote) return Token::ILLEGAL; |
| @@ -844,9 +877,9 @@ Token::Value Scanner::ScanTemplateSpan() { |
| // A TEMPLATE_SPAN should always be followed by an Expression, while a |
| // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be |
| // followed by an Expression. |
| - |
| Token::Value result = Token::TEMPLATE_SPAN; |
| LiteralScope literal(this); |
| + TokenDesc* next = literal.next_; |
| StartRawLiteral(); |
| const bool capture_raw = true; |
| const bool in_template_literal = true; |
| @@ -856,11 +889,11 @@ Token::Value Scanner::ScanTemplateSpan() { |
| Advance<capture_raw>(); |
| if (c == '`') { |
| result = Token::TEMPLATE_TAIL; |
| - ReduceRawLiteralLength(1); |
| + ReduceRawLiteralLength(next, 1); |
| break; |
| } else if (c == '$' && c0_ == '{') { |
| Advance<capture_raw>(); // Consume '{' |
| - ReduceRawLiteralLength(2); |
| + ReduceRawLiteralLength(next, 2); |
| break; |
| } else if (c == '\\') { |
| if (c0_ > 0 && unicode_cache_->IsLineTerminator(c0_)) { |
| @@ -869,14 +902,14 @@ Token::Value Scanner::ScanTemplateSpan() { |
| uc32 lastChar = c0_; |
| Advance<capture_raw>(); |
| if (lastChar == '\r') { |
| - ReduceRawLiteralLength(1); // Remove \r |
| + ReduceRawLiteralLength(next, 1); // Remove \r |
| if (c0_ == '\n') { |
| Advance<capture_raw>(); // Adds \n |
| } else { |
| - AddRawLiteralChar('\n'); |
| + AddRawLiteralChar(next, '\n'); |
| } |
| } |
| - } else if (!ScanEscape<capture_raw, in_template_literal>()) { |
| + } else if (!ScanEscape<capture_raw, in_template_literal>(next)) { |
| return Token::ILLEGAL; |
| } |
| } else if (c < 0) { |
| @@ -888,42 +921,43 @@ Token::Value Scanner::ScanTemplateSpan() { |
| // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence |
| // consisting of the CV 0x000A. |
| if (c == '\r') { |
| - ReduceRawLiteralLength(1); // Remove \r |
| + ReduceRawLiteralLength(next, 1); // Remove \r |
| if (c0_ == '\n') { |
| Advance<capture_raw>(); // Adds \n |
| } else { |
| - AddRawLiteralChar('\n'); |
| + AddRawLiteralChar(next, '\n'); |
| } |
| c = '\n'; |
| } |
| - AddLiteralChar(c); |
| + AddLiteralChar(next, c); |
| } |
| } |
| literal.Complete(); |
| - next_.location.end_pos = source_pos(); |
| - next_.token = result; |
| + next->location.end_pos = source_pos(); |
| + next->token = result; |
| return result; |
| } |
| Token::Value Scanner::ScanTemplateStart() { |
| DCHECK(c0_ == '`'); |
| - next_.location.beg_pos = source_pos(); |
| + TokenDesc* next = PeekTokenDesc(); |
| + next->location.beg_pos = source_pos(); |
| Advance(); // Consume ` |
| return ScanTemplateSpan(); |
| } |
| Token::Value Scanner::ScanTemplateContinuation() { |
| - DCHECK_EQ(next_.token, Token::RBRACE); |
| - next_.location.beg_pos = source_pos() - 1; // We already consumed } |
| + TokenDesc* next = PeekTokenDesc(); |
| + DCHECK_EQ(next->token, Token::RBRACE); |
| + next->location.beg_pos = source_pos() - 1; // We already consumed } |
| return ScanTemplateSpan(); |
| } |
| -void Scanner::ScanDecimalDigits() { |
| - while (IsDecimalDigit(c0_)) |
| - AddLiteralCharAdvance(); |
| +void Scanner::ScanDecimalDigits(TokenDesc* next) { |
| + while (IsDecimalDigit(c0_)) AddLiteralCharAdvance(next); |
| } |
| @@ -933,50 +967,51 @@ Token::Value Scanner::ScanNumber(bool seen_period) { |
| enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; |
| LiteralScope literal(this); |
| + TokenDesc* next = literal.next_; |
| bool at_start = !seen_period; |
| if (seen_period) { |
| // we have already seen a decimal point of the float |
| - AddLiteralChar('.'); |
| - ScanDecimalDigits(); // we know we have at least one digit |
| + AddLiteralChar(next, '.'); |
| + ScanDecimalDigits(next); // we know we have at least one digit |
| } else { |
| // if the first character is '0' we must check for octals and hex |
| if (c0_ == '0') { |
| int start_pos = source_pos(); // For reporting octal positions. |
| - AddLiteralCharAdvance(); |
| + AddLiteralCharAdvance(next); |
| // either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or |
| // an octal number. |
| if (c0_ == 'x' || c0_ == 'X') { |
| // hex number |
| kind = HEX; |
| - AddLiteralCharAdvance(); |
| + AddLiteralCharAdvance(next); |
| if (!IsHexDigit(c0_)) { |
| // we must have at least one hex digit after 'x'/'X' |
| return Token::ILLEGAL; |
| } |
| while (IsHexDigit(c0_)) { |
| - AddLiteralCharAdvance(); |
| + AddLiteralCharAdvance(next); |
| } |
| } else if (harmony_numeric_literals_ && (c0_ == 'o' || c0_ == 'O')) { |
| kind = OCTAL; |
| - AddLiteralCharAdvance(); |
| + AddLiteralCharAdvance(next); |
| if (!IsOctalDigit(c0_)) { |
| // we must have at least one octal digit after 'o'/'O' |
| return Token::ILLEGAL; |
| } |
| while (IsOctalDigit(c0_)) { |
| - AddLiteralCharAdvance(); |
| + AddLiteralCharAdvance(next); |
| } |
| } else if (harmony_numeric_literals_ && (c0_ == 'b' || c0_ == 'B')) { |
| kind = BINARY; |
| - AddLiteralCharAdvance(); |
| + AddLiteralCharAdvance(next); |
| if (!IsBinaryDigit(c0_)) { |
| // we must have at least one binary digit after 'b'/'B' |
| return Token::ILLEGAL; |
| } |
| while (IsBinaryDigit(c0_)) { |
| - AddLiteralCharAdvance(); |
| + AddLiteralCharAdvance(next); |
| } |
| } else if ('0' <= c0_ && c0_ <= '7') { |
| // (possible) octal number |
| @@ -992,7 +1027,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) { |
| octal_pos_ = Location(start_pos, source_pos()); |
| break; |
| } |
| - AddLiteralCharAdvance(); |
| + AddLiteralCharAdvance(next); |
| } |
| } |
| } |
| @@ -1006,10 +1041,10 @@ Token::Value Scanner::ScanNumber(bool seen_period) { |
| uc32 first_char = c0_; |
| Advance<false, false>(); |
| - AddLiteralChar(first_char); |
| + AddLiteralChar(next, first_char); |
| } |
| - if (next_.literal_chars->one_byte_literal().length() < 10 && |
| + if (next->literal_chars->one_byte_literal().length() < 10 && |
| c0_ != '.' && c0_ != 'e' && c0_ != 'E') { |
| smi_value_ = value; |
| literal.Complete(); |
| @@ -1020,10 +1055,10 @@ Token::Value Scanner::ScanNumber(bool seen_period) { |
| HandleLeadSurrogate(); |
| } |
| - ScanDecimalDigits(); // optional |
| + ScanDecimalDigits(next); // optional |
| if (c0_ == '.') { |
| - AddLiteralCharAdvance(); |
| - ScanDecimalDigits(); // optional |
| + AddLiteralCharAdvance(next); |
| + ScanDecimalDigits(next); // optional |
| } |
| } |
| } |
| @@ -1033,14 +1068,13 @@ Token::Value Scanner::ScanNumber(bool seen_period) { |
| DCHECK(kind != HEX); // 'e'/'E' must be scanned as part of the hex number |
| if (kind != DECIMAL) return Token::ILLEGAL; |
| // scan exponent |
| - AddLiteralCharAdvance(); |
| - if (c0_ == '+' || c0_ == '-') |
| - AddLiteralCharAdvance(); |
| + AddLiteralCharAdvance(next); |
| + if (c0_ == '+' || c0_ == '-') AddLiteralCharAdvance(next); |
| if (!IsDecimalDigit(c0_)) { |
| // we must have at least one decimal digit after 'e'/'E' |
| return Token::ILLEGAL; |
| } |
| - ScanDecimalDigits(); |
| + ScanDecimalDigits(next); |
| } |
| // The source character immediately following a numeric literal must |
| @@ -1218,12 +1252,13 @@ bool Scanner::IdentifierIsFutureStrictReserved( |
| Token::Value Scanner::ScanIdentifierOrKeyword() { |
| DCHECK(unicode_cache_->IsIdentifierStart(c0_)); |
| + TokenDesc* next = PeekTokenDesc(); |
| LiteralScope literal(this); |
| if (IsInRange(c0_, 'a', 'z')) { |
| do { |
| uc32 first_char = c0_; |
| Advance<false, false>(); |
| - AddLiteralChar(first_char); |
| + AddLiteralChar(next, first_char); |
| } while (IsInRange(c0_, 'a', 'z')); |
| if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '_' || |
| @@ -1231,11 +1266,11 @@ Token::Value Scanner::ScanIdentifierOrKeyword() { |
| // Identifier starting with lowercase. |
| uc32 first_char = c0_; |
| Advance<false, false>(); |
| - AddLiteralChar(first_char); |
| + AddLiteralChar(next, first_char); |
| while (IsAsciiIdentifier(c0_)) { |
| uc32 first_char = c0_; |
| Advance<false, false>(); |
| - AddLiteralChar(first_char); |
| + AddLiteralChar(next, first_char); |
| } |
| if (c0_ <= kMaxAscii && c0_ != '\\') { |
| literal.Complete(); |
| @@ -1244,7 +1279,7 @@ Token::Value Scanner::ScanIdentifierOrKeyword() { |
| } else if (c0_ <= kMaxAscii && c0_ != '\\') { |
| // Only a-z+: could be a keyword or identifier. |
| literal.Complete(); |
| - Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); |
| + Vector<const uint8_t> chars = next->literal_chars->one_byte_literal(); |
| return KeywordOrIdentifierToken(chars.start(), chars.length(), |
| harmony_scoping_, harmony_modules_, |
| harmony_classes_); |
| @@ -1255,7 +1290,7 @@ Token::Value Scanner::ScanIdentifierOrKeyword() { |
| do { |
| uc32 first_char = c0_; |
| Advance<false, false>(); |
| - AddLiteralChar(first_char); |
| + AddLiteralChar(next, first_char); |
| } while (IsAsciiIdentifier(c0_)); |
| if (c0_ <= kMaxAscii && c0_ != '\\') { |
| @@ -1273,12 +1308,12 @@ Token::Value Scanner::ScanIdentifierOrKeyword() { |
| !unicode_cache_->IsIdentifierStart(c)) { |
| return Token::ILLEGAL; |
| } |
| - AddLiteralChar(c); |
| + AddLiteralChar(next, c); |
| return ScanIdentifierSuffix(&literal); |
| } else { |
| uc32 first_char = c0_; |
| Advance(); |
| - AddLiteralChar(first_char); |
| + AddLiteralChar(next, first_char); |
| } |
| // Scan the rest of the identifier characters. |
| @@ -1286,7 +1321,7 @@ Token::Value Scanner::ScanIdentifierOrKeyword() { |
| if (c0_ != '\\') { |
| uc32 next_char = c0_; |
| Advance(); |
| - AddLiteralChar(next_char); |
| + AddLiteralChar(next, next_char); |
| continue; |
| } |
| // Fallthrough if no longer able to complete keyword. |
| @@ -1295,8 +1330,8 @@ Token::Value Scanner::ScanIdentifierOrKeyword() { |
| literal.Complete(); |
| - if (next_.literal_chars->is_one_byte()) { |
| - Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); |
| + if (next->literal_chars->is_one_byte()) { |
| + Vector<const uint8_t> chars = next->literal_chars->one_byte_literal(); |
| return KeywordOrIdentifierToken(chars.start(), |
| chars.length(), |
| harmony_scoping_, |
| @@ -1309,6 +1344,7 @@ Token::Value Scanner::ScanIdentifierOrKeyword() { |
| Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { |
| // Scan the rest of the identifier characters. |
| + TokenDesc* next = literal->next_; |
| while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
| if (c0_ == '\\') { |
| uc32 c = ScanIdentifierUnicodeEscape(); |
| @@ -1318,9 +1354,9 @@ Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { |
| !unicode_cache_->IsIdentifierPart(c)) { |
| return Token::ILLEGAL; |
| } |
| - AddLiteralChar(c); |
| + AddLiteralChar(next, c); |
| } else { |
| - AddLiteralChar(c0_); |
| + AddLiteralChar(next, c0_); |
| Advance(); |
| } |
| } |
| @@ -1333,26 +1369,27 @@ Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { |
| bool Scanner::ScanRegExpPattern(bool seen_equal) { |
| // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
| bool in_character_class = false; |
| + TokenDesc* next = PeekTokenDesc(); |
| // Previous token is either '/' or '/=', in the second case, the |
| // pattern starts at =. |
| - next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
| - next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
| + next->location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
| + next->location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
| // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| // the scanner should pass uninterpreted bodies to the RegExp |
| // constructor. |
| LiteralScope literal(this); |
| if (seen_equal) { |
| - AddLiteralChar('='); |
| + AddLiteralChar(next, '='); |
| } |
| while (c0_ != '/' || in_character_class) { |
| if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; |
| if (c0_ == '\\') { // Escape sequence. |
| - AddLiteralCharAdvance(); |
| + AddLiteralCharAdvance(next); |
| if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; |
| - AddLiteralCharAdvance(); |
| + AddLiteralCharAdvance(next); |
| // If the escape allows more characters, i.e., \x??, \u????, or \c?, |
| // only "safe" characters are allowed (letters, digits, underscore), |
| // otherwise the escape isn't valid and the invalid character has |
| @@ -1366,7 +1403,7 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) { |
| } else { // Unescaped character. |
| if (c0_ == '[') in_character_class = true; |
| if (c0_ == ']') in_character_class = false; |
| - AddLiteralCharAdvance(); |
| + AddLiteralCharAdvance(next); |
| } |
| } |
| Advance(); // consume '/' |
| @@ -1379,17 +1416,18 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) { |
| bool Scanner::ScanRegExpFlags() { |
| // Scan regular expression flags. |
| + TokenDesc* next = PeekTokenDesc(); |
| LiteralScope literal(this); |
| while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
| if (c0_ != '\\') { |
| - AddLiteralCharAdvance(); |
| + AddLiteralCharAdvance(next); |
| } else { |
| return false; |
| } |
| } |
| literal.Complete(); |
| - next_.location.end_pos = source_pos() - 1; |
| + next->location.end_pos = source_pos() - 1; |
| return true; |
| } |