src/scanner.cc - Issue 987083003: [es6] support rest parameters in arrow functions

Unified Diff: src/scanner.cc

Issue 987083003: [es6] support rest parameters in arrow functions (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Fix preparser bug Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/scanner.cc

diff --git a/src/scanner.cc b/src/scanner.cc

index 9e90868131cc7469b6d88d79e2d409e561915dce..6b9b8134455359228a388f5828869a35a414c8ae 100644

--- a/src/scanner.cc

+++ b/src/scanner.cc

@@ -52,7 +52,8 @@ void Scanner::Initialize(Utf16CharacterStream* source) {

// after a newline and scan first token.

has_line_terminator_before_next_ = true;

SkipWhiteSpace();

- Scan();

+ peek_count_ = 1;

+ Scan(&next_[0]);

}

@@ -228,21 +229,26 @@ static const byte one_char_tokens[] = {

Token::Value Scanner::Next() {

- current_ = next_;

+ current_ = next_[0];

+ if (peek_count_ > 1) {

+ std::memmove(&next_[0], &next_[1], (peek_count_ - 1) * sizeof(TokenDesc));

+ next_[--peek_count_].token = Token::ILLEGAL;

+ return current_.token;

+ }

has_line_terminator_before_next_ = false;

has_multiline_comment_before_next_ = false;

if (static_cast<unsigned>(c0_) <= 0x7f) {

Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);

if (token != Token::ILLEGAL) {

int pos = source_pos();

- next_.token = token;

- next_.location.beg_pos = pos;

- next_.location.end_pos = pos + 1;

+ next_[0].token = token;

+ next_[0].location.beg_pos = pos;

+ next_[0].location.end_pos = pos + 1;

Advance();

return current_.token;

}

- Scan();

+ Scan(&next_[0]);

return current_.token;

}

@@ -420,13 +426,35 @@ Token::Value Scanner::ScanHtmlComment() {

}

-void Scanner::Scan() {

- next_.literal_chars = NULL;

- next_.raw_literal_chars = NULL;

+Token::Value Scanner::peek(int n) {

+ if (n == 0) return peek();

+ PeekScan(n);

+ return next_[n].token;

+Scanner::Location Scanner::peek_location(int n) {

+ if (n == 0) return peek_location();

+ PeekScan(n);

+ return next_[n].location;

+void Scanner::PeekScan(int count) {

+ DCHECK(count > 0 && count < kMaxLookahead);

+ while (peek_count_ <= count) {

+ Scan(&next_[peek_count_++]);

+ }

+void Scanner::Scan(TokenDesc* next) {

+ next->literal_chars = NULL;

+ next->raw_literal_chars = NULL;

Token::Value token;

do {

// Remember the position of the next token

- next_.location.beg_pos = source_pos();

+ next->location.beg_pos = source_pos();

switch (c0_) {

case ' ':

@@ -680,17 +708,21 @@ void Scanner::Scan() {

// whitespace.

} while (token == Token::WHITESPACE);

- next_.location.end_pos = source_pos();

- next_.token = token;

+ next->location.end_pos = source_pos();

+ next->token = token;

}

void Scanner::SeekForward(int pos) {

+ // Not supported when there is multiple lookahead tokens

marja 2015/03/10 09:11:19 ... what prevents this from happening?

caitp (gmail) 2015/03/10 14:47:51 it just seemed complicated to make it work with mu

+ DCHECK(peek_count_ < 2);

// After this call, we will have the token at the given position as

// the "next" token. The "current" token will be invalid.

- if (pos == next_.location.beg_pos) return;

+ TokenDesc* next = &next_[0];

+ if (pos == next->location.beg_pos) return;

int current_pos = source_pos();

- DCHECK_EQ(next_.location.end_pos, current_pos);

+ DCHECK_EQ(next->location.end_pos, current_pos);

// Positions inside the lookahead token aren't supported.

DCHECK(pos >= current_pos);

if (pos != current_pos) {

@@ -702,12 +734,12 @@ void Scanner::SeekForward(int pos) {

has_line_terminator_before_next_ = false;

has_multiline_comment_before_next_ = false;

}

- Scan();

+ Scan(next);

}

template <bool capture_raw, bool in_template_literal>

-bool Scanner::ScanEscape() {

+bool Scanner::ScanEscape(TokenDesc* next) {

uc32 c = c0_;

Advance<capture_raw>();

@@ -757,7 +789,7 @@ bool Scanner::ScanEscape() {

// According to ECMA-262, section 7.8.4, characters not covered by the

// above cases should be illegal, but they are commonly handled as

// non-escaped characters by JS VMs.

- AddLiteralChar(c);

+ AddLiteralChar(next, c);

return true;

}

@@ -796,6 +828,7 @@ Token::Value Scanner::ScanString() {

Advance<false, false>(); // consume quote

LiteralScope literal(this);

+ TokenDesc* next = literal.next_;

while (true) {

if (c0_ > kMaxAscii) {

HandleLeadSurrogate();

@@ -810,7 +843,7 @@ Token::Value Scanner::ScanString() {

uc32 c = c0_;

if (c == '\\') break;

Advance<false, false>();

- AddLiteralChar(c);

+ AddLiteralChar(next, c);

}

while (c0_ != quote && c0_ >= 0

@@ -818,9 +851,9 @@ Token::Value Scanner::ScanString() {

uc32 c = c0_;

Advance();

if (c == '\\') {

- if (c0_ < 0 || !ScanEscape<false, false>()) return Token::ILLEGAL;

+ if (c0_ < 0 || !ScanEscape<false, false>(next)) return Token::ILLEGAL;

} else {

- AddLiteralChar(c);

+ AddLiteralChar(next, c);

}

if (c0_ != quote) return Token::ILLEGAL;

@@ -844,9 +877,9 @@ Token::Value Scanner::ScanTemplateSpan() {

// A TEMPLATE_SPAN should always be followed by an Expression, while a

// TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be

// followed by an Expression.

Token::Value result = Token::TEMPLATE_SPAN;

LiteralScope literal(this);

+ TokenDesc* next = literal.next_;

StartRawLiteral();

const bool capture_raw = true;

const bool in_template_literal = true;

@@ -856,11 +889,11 @@ Token::Value Scanner::ScanTemplateSpan() {

Advance<capture_raw>();

if (c == '`') {

result = Token::TEMPLATE_TAIL;

- ReduceRawLiteralLength(1);

+ ReduceRawLiteralLength(next, 1);

break;

} else if (c == '$' && c0_ == '{') {

Advance<capture_raw>(); // Consume '{'

- ReduceRawLiteralLength(2);

+ ReduceRawLiteralLength(next, 2);

break;

} else if (c == '\\') {

if (c0_ > 0 && unicode_cache_->IsLineTerminator(c0_)) {

@@ -869,14 +902,14 @@ Token::Value Scanner::ScanTemplateSpan() {

uc32 lastChar = c0_;

Advance<capture_raw>();

if (lastChar == '\r') {

- ReduceRawLiteralLength(1); // Remove \r

+ ReduceRawLiteralLength(next, 1); // Remove \r

if (c0_ == '\n') {

Advance<capture_raw>(); // Adds \n

} else {

- AddRawLiteralChar('\n');

+ AddRawLiteralChar(next, '\n');

}

- } else if (!ScanEscape<capture_raw, in_template_literal>()) {

+ } else if (!ScanEscape<capture_raw, in_template_literal>(next)) {

return Token::ILLEGAL;

}

} else if (c < 0) {

@@ -888,42 +921,43 @@ Token::Value Scanner::ScanTemplateSpan() {

// The TRV of LineTerminatorSequence :: <CR><LF> is the sequence

// consisting of the CV 0x000A.

if (c == '\r') {

- ReduceRawLiteralLength(1); // Remove \r

+ ReduceRawLiteralLength(next, 1); // Remove \r

if (c0_ == '\n') {

Advance<capture_raw>(); // Adds \n

} else {

- AddRawLiteralChar('\n');

+ AddRawLiteralChar(next, '\n');

}

c = '\n';

}

- AddLiteralChar(c);

+ AddLiteralChar(next, c);

}

literal.Complete();

- next_.location.end_pos = source_pos();

- next_.token = result;

+ next->location.end_pos = source_pos();

+ next->token = result;

return result;

}

Token::Value Scanner::ScanTemplateStart() {

DCHECK(c0_ == '`');

- next_.location.beg_pos = source_pos();

+ TokenDesc* next = PeekTokenDesc();

+ next->location.beg_pos = source_pos();

Advance(); // Consume `

return ScanTemplateSpan();

}

Token::Value Scanner::ScanTemplateContinuation() {

- DCHECK_EQ(next_.token, Token::RBRACE);

- next_.location.beg_pos = source_pos() - 1; // We already consumed }

+ TokenDesc* next = PeekTokenDesc();

+ DCHECK_EQ(next->token, Token::RBRACE);

+ next->location.beg_pos = source_pos() - 1; // We already consumed }

return ScanTemplateSpan();

}

-void Scanner::ScanDecimalDigits() {

- while (IsDecimalDigit(c0_))

- AddLiteralCharAdvance();

+void Scanner::ScanDecimalDigits(TokenDesc* next) {

+ while (IsDecimalDigit(c0_)) AddLiteralCharAdvance(next);

}

@@ -933,50 +967,51 @@ Token::Value Scanner::ScanNumber(bool seen_period) {

enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;

LiteralScope literal(this);

+ TokenDesc* next = literal.next_;

bool at_start = !seen_period;

if (seen_period) {

// we have already seen a decimal point of the float

- AddLiteralChar('.');

- ScanDecimalDigits(); // we know we have at least one digit

+ AddLiteralChar(next, '.');

+ ScanDecimalDigits(next); // we know we have at least one digit

} else {

// if the first character is '0' we must check for octals and hex

if (c0_ == '0') {

int start_pos = source_pos(); // For reporting octal positions.

- AddLiteralCharAdvance();

+ AddLiteralCharAdvance(next);

// either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or

// an octal number.

if (c0_ == 'x' || c0_ == 'X') {

// hex number

kind = HEX;

- AddLiteralCharAdvance();

+ AddLiteralCharAdvance(next);

if (!IsHexDigit(c0_)) {

// we must have at least one hex digit after 'x'/'X'

return Token::ILLEGAL;

}

while (IsHexDigit(c0_)) {

- AddLiteralCharAdvance();

+ AddLiteralCharAdvance(next);

}

} else if (harmony_numeric_literals_ && (c0_ == 'o' || c0_ == 'O')) {

kind = OCTAL;

- AddLiteralCharAdvance();

+ AddLiteralCharAdvance(next);

if (!IsOctalDigit(c0_)) {

// we must have at least one octal digit after 'o'/'O'

return Token::ILLEGAL;

}

while (IsOctalDigit(c0_)) {

- AddLiteralCharAdvance();

+ AddLiteralCharAdvance(next);

}

} else if (harmony_numeric_literals_ && (c0_ == 'b' || c0_ == 'B')) {

kind = BINARY;

- AddLiteralCharAdvance();

+ AddLiteralCharAdvance(next);

if (!IsBinaryDigit(c0_)) {

// we must have at least one binary digit after 'b'/'B'

return Token::ILLEGAL;

}

while (IsBinaryDigit(c0_)) {

- AddLiteralCharAdvance();

+ AddLiteralCharAdvance(next);

}

} else if ('0' <= c0_ && c0_ <= '7') {

// (possible) octal number

@@ -992,7 +1027,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {

octal_pos_ = Location(start_pos, source_pos());

break;

}

- AddLiteralCharAdvance();

+ AddLiteralCharAdvance(next);

}

@@ -1006,10 +1041,10 @@ Token::Value Scanner::ScanNumber(bool seen_period) {

uc32 first_char = c0_;

Advance<false, false>();

- AddLiteralChar(first_char);

+ AddLiteralChar(next, first_char);

}

- if (next_.literal_chars->one_byte_literal().length() < 10 &&

+ if (next->literal_chars->one_byte_literal().length() < 10 &&

c0_ != '.' && c0_ != 'e' && c0_ != 'E') {

smi_value_ = value;

literal.Complete();

@@ -1020,10 +1055,10 @@ Token::Value Scanner::ScanNumber(bool seen_period) {

HandleLeadSurrogate();

}

- ScanDecimalDigits(); // optional

+ ScanDecimalDigits(next); // optional

if (c0_ == '.') {

- AddLiteralCharAdvance();

- ScanDecimalDigits(); // optional

+ AddLiteralCharAdvance(next);

+ ScanDecimalDigits(next); // optional

}

@@ -1033,14 +1068,13 @@ Token::Value Scanner::ScanNumber(bool seen_period) {

DCHECK(kind != HEX); // 'e'/'E' must be scanned as part of the hex number

if (kind != DECIMAL) return Token::ILLEGAL;

// scan exponent

- AddLiteralCharAdvance();

- if (c0_ == '+' || c0_ == '-')

- AddLiteralCharAdvance();

+ AddLiteralCharAdvance(next);

+ if (c0_ == '+' || c0_ == '-') AddLiteralCharAdvance(next);

if (!IsDecimalDigit(c0_)) {

// we must have at least one decimal digit after 'e'/'E'

return Token::ILLEGAL;

}

- ScanDecimalDigits();

+ ScanDecimalDigits(next);

}

// The source character immediately following a numeric literal must

@@ -1218,12 +1252,13 @@ bool Scanner::IdentifierIsFutureStrictReserved(

Token::Value Scanner::ScanIdentifierOrKeyword() {

DCHECK(unicode_cache_->IsIdentifierStart(c0_));

+ TokenDesc* next = PeekTokenDesc();

LiteralScope literal(this);

if (IsInRange(c0_, 'a', 'z')) {

do {

uc32 first_char = c0_;

Advance<false, false>();

- AddLiteralChar(first_char);

+ AddLiteralChar(next, first_char);

} while (IsInRange(c0_, 'a', 'z'));

if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '_' ||

@@ -1231,11 +1266,11 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {

// Identifier starting with lowercase.

uc32 first_char = c0_;

Advance<false, false>();

- AddLiteralChar(first_char);

+ AddLiteralChar(next, first_char);

while (IsAsciiIdentifier(c0_)) {

uc32 first_char = c0_;

Advance<false, false>();

- AddLiteralChar(first_char);

+ AddLiteralChar(next, first_char);

}

if (c0_ <= kMaxAscii && c0_ != '\\') {

literal.Complete();

@@ -1244,7 +1279,7 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {

} else if (c0_ <= kMaxAscii && c0_ != '\\') {

// Only a-z+: could be a keyword or identifier.

literal.Complete();

- Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();

+ Vector<const uint8_t> chars = next->literal_chars->one_byte_literal();

return KeywordOrIdentifierToken(chars.start(), chars.length(),

harmony_scoping_, harmony_modules_,

harmony_classes_);

@@ -1255,7 +1290,7 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {

do {

uc32 first_char = c0_;

Advance<false, false>();

- AddLiteralChar(first_char);

+ AddLiteralChar(next, first_char);

} while (IsAsciiIdentifier(c0_));

if (c0_ <= kMaxAscii && c0_ != '\\') {

@@ -1273,12 +1308,12 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {

!unicode_cache_->IsIdentifierStart(c)) {

return Token::ILLEGAL;

}

- AddLiteralChar(c);

+ AddLiteralChar(next, c);

return ScanIdentifierSuffix(&literal);

} else {

uc32 first_char = c0_;

Advance();

- AddLiteralChar(first_char);

+ AddLiteralChar(next, first_char);

}

// Scan the rest of the identifier characters.

@@ -1286,7 +1321,7 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {

if (c0_ != '\\') {

uc32 next_char = c0_;

Advance();

- AddLiteralChar(next_char);

+ AddLiteralChar(next, next_char);

continue;

}

// Fallthrough if no longer able to complete keyword.

@@ -1295,8 +1330,8 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {

literal.Complete();

- if (next_.literal_chars->is_one_byte()) {

- Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();

+ if (next->literal_chars->is_one_byte()) {

+ Vector<const uint8_t> chars = next->literal_chars->one_byte_literal();

return KeywordOrIdentifierToken(chars.start(),

chars.length(),

harmony_scoping_,

@@ -1309,6 +1344,7 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {

Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {

// Scan the rest of the identifier characters.

+ TokenDesc* next = literal->next_;

while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {

if (c0_ == '\\') {

uc32 c = ScanIdentifierUnicodeEscape();

@@ -1318,9 +1354,9 @@ Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {

!unicode_cache_->IsIdentifierPart(c)) {

return Token::ILLEGAL;

}

- AddLiteralChar(c);

+ AddLiteralChar(next, c);

} else {

- AddLiteralChar(c0_);

+ AddLiteralChar(next, c0_);

Advance();

}

@@ -1333,26 +1369,27 @@ Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {

bool Scanner::ScanRegExpPattern(bool seen_equal) {

// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags

bool in_character_class = false;

+ TokenDesc* next = PeekTokenDesc();

// Previous token is either '/' or '/=', in the second case, the

// pattern starts at =.

- next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);

- next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);

+ next->location.beg_pos = source_pos() - (seen_equal ? 2 : 1);

+ next->location.end_pos = source_pos() - (seen_equal ? 1 : 0);

// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

// the scanner should pass uninterpreted bodies to the RegExp

// constructor.

LiteralScope literal(this);

if (seen_equal) {

- AddLiteralChar('=');

+ AddLiteralChar(next, '=');

}

while (c0_ != '/' || in_character_class) {

if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false;

if (c0_ == '\\') { // Escape sequence.

- AddLiteralCharAdvance();

+ AddLiteralCharAdvance(next);

if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false;

- AddLiteralCharAdvance();

+ AddLiteralCharAdvance(next);

// If the escape allows more characters, i.e., \x??, \u????, or \c?,

// only "safe" characters are allowed (letters, digits, underscore),

// otherwise the escape isn't valid and the invalid character has

@@ -1366,7 +1403,7 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {

} else { // Unescaped character.

if (c0_ == '[') in_character_class = true;

if (c0_ == ']') in_character_class = false;

- AddLiteralCharAdvance();

+ AddLiteralCharAdvance(next);

}

Advance(); // consume '/'

@@ -1379,17 +1416,18 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {

bool Scanner::ScanRegExpFlags() {

// Scan regular expression flags.

+ TokenDesc* next = PeekTokenDesc();

LiteralScope literal(this);

while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {

if (c0_ != '\\') {

- AddLiteralCharAdvance();

+ AddLiteralCharAdvance(next);

} else {

return false;

}

literal.Complete();

- next_.location.end_pos = source_pos() - 1;

+ next->location.end_pos = source_pos() - 1;

return true;

}

« src/preparser.h ('K') | « src/scanner.h ('k') | src/typing.cc » ('j') | no next file with comments »