src/parsing/scanner.cc - Issue 1841543003: [esnext] implement frontend changes for async/await proposal

Side by Side Diff: src/parsing/scanner.cc

Issue 1841543003: [esnext] implement frontend changes for async/await proposal (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: A bunch more tests, some fixes, ExpressionClassifier gets fatter :( Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/parsing/scanner.h"	7 #include "src/parsing/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
49 }	49 }

50	50

51	51

52 void Scanner::Initialize(Utf16CharacterStream* source) {	52 void Scanner::Initialize(Utf16CharacterStream* source) {

53 source_ = source;	53 source_ = source;

54 // Need to capture identifiers in order to recognize "get" and "set"	54 // Need to capture identifiers in order to recognize "get" and "set"

55 // in object literals.	55 // in object literals.

56 Init();	56 Init();

57 // Skip initial whitespace allowing HTML comment ends just like	57 // Skip initial whitespace allowing HTML comment ends just like

58 // after a newline and scan first token.	58 // after a newline and scan first token.

59 has_line_terminator_before_next_ = true;	59 has_preceding_line_terminator_ = true;

	60 did_see_multiline_comment_ = false;

60 SkipWhiteSpace();	61 SkipWhiteSpace();

61 Scan();	62 Scan();

62 }	63 }

63	64

64 template <bool capture_raw, bool unicode>	65 template <bool capture_raw, bool unicode>

65 uc32 Scanner::ScanHexNumber(int expected_length) {	66 uc32 Scanner::ScanHexNumber(int expected_length) {

66 DCHECK(expected_length <= 4); // prevent overflow	67 DCHECK(expected_length <= 4); // prevent overflow

67	68

68 int begin = source_pos() - 2;	69 int begin = source_pos() - 2;

69 uc32 x = 0;	70 uc32 x = 0;

(...skipping 172 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
242	243

243 Token::Value Scanner::Next() {	244 Token::Value Scanner::Next() {

244 if (next_.token == Token::EOS) {	245 if (next_.token == Token::EOS) {

245 next_.location.beg_pos = current_.location.beg_pos;	246 next_.location.beg_pos = current_.location.beg_pos;

246 next_.location.end_pos = current_.location.end_pos;	247 next_.location.end_pos = current_.location.end_pos;

247 }	248 }

248 current_ = next_;	249 current_ = next_;

249 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {	250 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {

250 next_ = next_next_;	251 next_ = next_next_;

251 next_next_.token = Token::UNINITIALIZED;	252 next_next_.token = Token::UNINITIALIZED;

	253 has_preceding_line_terminator_ = next_has_preceding_line_terminator_;

252 return current_.token;	254 return current_.token;

253 }	255 }

254 has_line_terminator_before_next_ = false;	256 has_preceding_line_terminator_ = false;

255 has_multiline_comment_before_next_ = false;

256 if (static_cast<unsigned>(c0_) <= 0x7f) {	257 if (static_cast<unsigned>(c0_) <= 0x7f) {

257 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);	258 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);

258 if (token != Token::ILLEGAL) {	259 if (token != Token::ILLEGAL) {

259 int pos = source_pos();	260 int pos = source_pos();

260 next_.token = token;	261 next_.token = token;

261 next_.location.beg_pos = pos;	262 next_.location.beg_pos = pos;

262 next_.location.end_pos = pos + 1;	263 next_.location.end_pos = pos + 1;

263 Advance();	264 Advance();

264 return current_.token;	265 return current_.token;

265 }	266 }

266 }	267 }

267 Scan();	268 Scan();

268 return current_.token;	269 return current_.token;

269 }	270 }

270	271

271	272

272 Token::Value Scanner::PeekAhead() {	273 Token::Value Scanner::PeekAhead() {

273 if (next_next_.token != Token::UNINITIALIZED) {	274 if (next_next_.token != Token::UNINITIALIZED) {

274 return next_next_.token;	275 return next_next_.token;

275 }	276 }

276 TokenDesc prev = current_;	277 TokenDesc prev = current_;

	278 bool has_preceding_line_terminator = has_preceding_line_terminator_;

277 Next();	279 Next();

	280 next_has_preceding_line_terminator_ = has_preceding_line_terminator_;

	281 has_preceding_line_terminator_ = has_preceding_line_terminator;

278 Token::Value ret = next_.token;	282 Token::Value ret = next_.token;

279 next_next_ = next_;	283 next_next_ = next_;

280 next_ = current_;	284 next_ = current_;

281 current_ = prev;	285 current_ = prev;

282 return ret;	286 return ret;

283 }	287 }

284	288

285	289

286 // TODO(yangguo): check whether this is actually necessary.	290 // TODO(yangguo): check whether this is actually necessary.

287 static inline bool IsLittleEndianByteOrderMark(uc32 c) {	291 static inline bool IsLittleEndianByteOrderMark(uc32 c) {

288 // The Unicode value U+FFFE is guaranteed never to be assigned as a	292 // The Unicode value U+FFFE is guaranteed never to be assigned as a

289 // Unicode character; this implies that in a Unicode context the	293 // Unicode character; this implies that in a Unicode context the

290 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	294 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

291 // character expressed in little-endian byte order (since it could	295 // character expressed in little-endian byte order (since it could

292 // not be a U+FFFE character expressed in big-endian byte	296 // not be a U+FFFE character expressed in big-endian byte

293 // order). Nevertheless, we check for it to be compatible with	297 // order). Nevertheless, we check for it to be compatible with

294 // Spidermonkey.	298 // Spidermonkey.

295 return c == 0xFFFE;	299 return c == 0xFFFE;

296 }	300 }

297	301

298	302

299 bool Scanner::SkipWhiteSpace() {	303 bool Scanner::SkipWhiteSpace() {

300 int start_position = source_pos();	304 int start_position = source_pos();

301

302 while (true) {	305 while (true) {

303 while (true) {	306 while (true) {

304 // The unicode cache accepts unsigned inputs.	307 // The unicode cache accepts unsigned inputs.

305 if (c0_ < 0) break;	308 if (c0_ < 0) break;

306 // Advance as long as character is a WhiteSpace or LineTerminator.	309 // Advance as long as character is a WhiteSpace or LineTerminator.

307 // Remember if the latter is the case.	310 // Remember if the latter is the case.

308 if (unicode_cache_->IsLineTerminator(c0_)) {	311 if (unicode_cache_->IsLineTerminator(c0_)) {

309 has_line_terminator_before_next_ = true;	312 has_preceding_line_terminator_ = true;

	313 did_see_multiline_comment_ = false;

310 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&	314 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&

311 !IsLittleEndianByteOrderMark(c0_)) {	315 !IsLittleEndianByteOrderMark(c0_)) {

312 break;	316 break;

313 }	317 }

314 Advance();	318 Advance();

315 }	319 }

316	320

317 // If there is an HTML comment end '-->' at the beginning of a	321 // If there is an HTML comment end '-->' at the beginning of a

318 // line (with only whitespace in front of it), we treat the rest	322 // line (with only whitespace in front of it), we treat the rest

319 // of the line as a comment. This is in line with the way	323 // of the line as a comment. This is in line with the way

320 // SpiderMonkey handles it.	324 // SpiderMonkey handles it.

321 if (c0_ == '-' && has_line_terminator_before_next_) {	325 if (c0_ == '-' && has_preceding_line_terminator_ &&

	326 !did_see_multiline_comment_) {

322 Advance();	327 Advance();

323 if (c0_ == '-') {	328 if (c0_ == '-') {

324 Advance();	329 Advance();

325 if (c0_ == '>') {	330 if (c0_ == '>') {

326 // Treat the rest of the line as a comment.	331 // Treat the rest of the line as a comment.

327 SkipSingleLineComment();	332 SkipSingleLineComment();

328 // Continue skipping white space after the comment.	333 // Continue skipping white space after the comment.

329 continue;	334 continue;

330 }	335 }

331 PushBack('-'); // undo Advance()	336 PushBack('-'); // undo Advance()

(...skipping 86 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
418 Token::Value Scanner::SkipMultiLineComment() {	423 Token::Value Scanner::SkipMultiLineComment() {

419 DCHECK(c0_ == '*');	424 DCHECK(c0_ == '*');

420 Advance();	425 Advance();

421	426

422 while (c0_ >= 0) {	427 while (c0_ >= 0) {

423 uc32 ch = c0_;	428 uc32 ch = c0_;

424 Advance();	429 Advance();

425 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) {	430 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) {

426 // Following ECMA-262, section 7.4, a comment containing	431 // Following ECMA-262, section 7.4, a comment containing

427 // a newline will make the comment count as a line-terminator.	432 // a newline will make the comment count as a line-terminator.

428 has_multiline_comment_before_next_ = true;	433 has_preceding_line_terminator_ = true;

	434 did_see_multiline_comment_ = true;

429 }	435 }

430 // If we have reached the end of the multi-line comment, we	436 // If we have reached the end of the multi-line comment, we

431 // consume the '/' and insert a whitespace. This way all	437 // consume the '/' and insert a whitespace. This way all

432 // multi-line comments are treated as whitespace.	438 // multi-line comments are treated as whitespace.

433 if (ch == '*' && c0_ == '/') {	439 if (ch == '*' && c0_ == '/') {

434 c0_ = ' ';	440 c0_ = ' ';

435 return Token::WHITESPACE;	441 return Token::WHITESPACE;

436 }	442 }

437 }	443 }

438	444

(...skipping 17 matching lines...) Expand all Loading...
456 PushBack('!'); // undo Advance()	462 PushBack('!'); // undo Advance()

457 DCHECK(c0_ == '!');	463 DCHECK(c0_ == '!');

458 return Token::LT;	464 return Token::LT;

459 }	465 }

460	466

461	467

462 void Scanner::Scan() {	468 void Scanner::Scan() {

463 next_.literal_chars = NULL;	469 next_.literal_chars = NULL;

464 next_.raw_literal_chars = NULL;	470 next_.raw_literal_chars = NULL;

465 Token::Value token;	471 Token::Value token;

	472

466 do {	473 do {

467 // Remember the position of the next token	474 // Remember the position of the next token

468 next_.location.beg_pos = source_pos();	475 next_.location.beg_pos = source_pos();

469	476

470 switch (c0_) {	477 switch (c0_) {

471 case ' ':	478 case ' ':

472 case '\t':	479 case '\t':

473 Advance();	480 Advance();

474 token = Token::WHITESPACE;	481 token = Token::WHITESPACE;

475 break;	482 break;

476	483

477 case '\n':	484 case '\n':

478 Advance();	485 Advance();

479 has_line_terminator_before_next_ = true;	486 has_preceding_line_terminator_ = true;

	487 did_see_multiline_comment_ = false;

480 token = Token::WHITESPACE;	488 token = Token::WHITESPACE;

481 break;	489 break;

482	490

483 case '"': case '\'':	491 case '"': case '\'':

484 token = ScanString();	492 token = ScanString();

485 break;	493 break;

486	494

487 case '<':	495 case '<':

488 // < <= << <<= <!--	496 // < <= << <<= <!--

489 Advance();	497 Advance();

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
550 } else {	558 } else {

551 token = Token::ADD;	559 token = Token::ADD;

552 }	560 }

553 break;	561 break;

554	562

555 case '-':	563 case '-':

556 // - -- --> -=	564 // - -- --> -=

557 Advance();	565 Advance();

558 if (c0_ == '-') {	566 if (c0_ == '-') {

559 Advance();	567 Advance();

560 if (c0_ == '>' && has_line_terminator_before_next_) {	568 if (c0_ == '>' && has_preceding_line_terminator_ &&

	569 !did_see_multiline_comment_) {

561 // For compatibility with SpiderMonkey, we skip lines that	570 // For compatibility with SpiderMonkey, we skip lines that

562 // start with an HTML comment end '-->'.	571 // start with an HTML comment end '-->'.

563 token = SkipSingleLineComment();	572 token = SkipSingleLineComment();

564 } else {	573 } else {

565 token = Token::DEC;	574 token = Token::DEC;

566 }	575 }

567 } else if (c0_ == '=') {	576 } else if (c0_ == '=') {

568 token = Select(Token::ASSIGN_SUB);	577 token = Select(Token::ASSIGN_SUB);

569 } else {	578 } else {

570 token = Token::SUB;	579 token = Token::SUB;

(...skipping 165 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
736 int current_pos = source_pos();	745 int current_pos = source_pos();

737 DCHECK_EQ(next_.location.end_pos, current_pos);	746 DCHECK_EQ(next_.location.end_pos, current_pos);

738 // Positions inside the lookahead token aren't supported.	747 // Positions inside the lookahead token aren't supported.

739 DCHECK(pos >= current_pos);	748 DCHECK(pos >= current_pos);

740 if (pos != current_pos) {	749 if (pos != current_pos) {

741 source_->SeekForward(pos - source_->pos());	750 source_->SeekForward(pos - source_->pos());

742 Advance();	751 Advance();

743 // This function is only called to seek to the location	752 // This function is only called to seek to the location

744 // of the end of a function (at the "}" token). It doesn't matter	753 // of the end of a function (at the "}" token). It doesn't matter

745 // whether there was a line terminator in the part we skip.	754 // whether there was a line terminator in the part we skip.

746 has_line_terminator_before_next_ = false;	755 has_preceding_line_terminator_ = false;

747 has_multiline_comment_before_next_ = false;

748 }	756 }

749 Scan();	757 Scan();

750 }	758 }

751	759

752	760

753 template <bool capture_raw, bool in_template_literal>	761 template <bool capture_raw, bool in_template_literal>

754 bool Scanner::ScanEscape() {	762 bool Scanner::ScanEscape() {

755 uc32 c = c0_;	763 uc32 c = c0_;

756 Advance<capture_raw>();	764 Advance<capture_raw>();

757	765

(...skipping 704 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1462 }	1470 }

1463	1471

1464	1472

1465 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {	1473 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {

1466 if (is_next_literal_one_byte()) {	1474 if (is_next_literal_one_byte()) {

1467 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());	1475 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());

1468 }	1476 }

1469 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());	1477 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());

1470 }	1478 }

1471	1479

	1480 const AstRawString* Scanner::NextNextSymbol(

	1481 AstValueFactory* ast_value_factory) {

	1482 DCHECK(next_next_.token != Token::UNINITIALIZED);

	1483 LiteralBuffer* literal = next_next_.literal_chars;

	1484 if (literal->is_one_byte()) {

	1485 return ast_value_factory->GetOneByteString(literal->one_byte_literal());

	1486 }

	1487 return ast_value_factory->GetTwoByteString(literal->two_byte_literal());

	1488 }

1472	1489

1473 const AstRawString* Scanner::CurrentRawSymbol(	1490 const AstRawString* Scanner::CurrentRawSymbol(

1474 AstValueFactory* ast_value_factory) {	1491 AstValueFactory* ast_value_factory) {

1475 if (is_raw_literal_one_byte()) {	1492 if (is_raw_literal_one_byte()) {

1476 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());	1493 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());

1477 }	1494 }

1478 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());	1495 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());

1479 }	1496 }

1480	1497

1481	1498

(...skipping 205 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1687 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1704 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1688 }	1705 }

1689 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1706 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1690	1707

1691 backing_store_.AddBlock(bytes);	1708 backing_store_.AddBlock(bytes);

1692 return backing_store_.EndSequence().start();	1709 return backing_store_.EndSequence().start();

1693 }	1710 }

1694	1711

1695 } // namespace internal	1712 } // namespace internal

1696 } // namespace v8	1713 } // namespace v8

OLD	NEW

« src/parsing/preparser.cc ('K') | « src/parsing/scanner.h ('k') | test/cctest/test-parsing.cc » ('j') | test/cctest/test-parsing.cc » ('J')