src/parsing/scanner.cc - Issue 1841543003: [esnext] implement frontend changes for async/await proposal

Side by Side Diff: src/parsing/scanner.cc

Issue 1841543003: [esnext] implement frontend changes for async/await proposal (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Fix more problems Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/parsing/scanner.h"	7 #include "src/parsing/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
49 }	49 }

50	50

51	51

52 void Scanner::Initialize(Utf16CharacterStream* source) {	52 void Scanner::Initialize(Utf16CharacterStream* source) {

53 source_ = source;	53 source_ = source;

54 // Need to capture identifiers in order to recognize "get" and "set"	54 // Need to capture identifiers in order to recognize "get" and "set"

55 // in object literals.	55 // in object literals.

56 Init();	56 Init();

57 // Skip initial whitespace allowing HTML comment ends just like	57 // Skip initial whitespace allowing HTML comment ends just like

58 // after a newline and scan first token.	58 // after a newline and scan first token.

59 has_line_terminator_before_next_ = true;	59 has_preceding_line_terminator_ = true;

	60 has_preceding_multiline_comment_ = false;

60 SkipWhiteSpace();	61 SkipWhiteSpace();

61 Scan();	62 Scan();

62 }	63 }

63	64

64 template <bool capture_raw, bool unicode>	65 template <bool capture_raw, bool unicode>

65 uc32 Scanner::ScanHexNumber(int expected_length) {	66 uc32 Scanner::ScanHexNumber(int expected_length) {

66 DCHECK(expected_length <= 4); // prevent overflow	67 DCHECK(expected_length <= 4); // prevent overflow

67	68

68 int begin = source_pos() - 2;	69 int begin = source_pos() - 2;

69 uc32 x = 0;	70 uc32 x = 0;

(...skipping 172 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
242	243

243 Token::Value Scanner::Next() {	244 Token::Value Scanner::Next() {

244 if (next_.token == Token::EOS) {	245 if (next_.token == Token::EOS) {

245 next_.location.beg_pos = current_.location.beg_pos;	246 next_.location.beg_pos = current_.location.beg_pos;

246 next_.location.end_pos = current_.location.end_pos;	247 next_.location.end_pos = current_.location.end_pos;

247 }	248 }

248 current_ = next_;	249 current_ = next_;

249 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {	250 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {

250 next_ = next_next_;	251 next_ = next_next_;

251 next_next_.token = Token::UNINITIALIZED;	252 next_next_.token = Token::UNINITIALIZED;

	253 has_preceding_line_terminator_ = next_has_preceding_line_terminator_;

252 return current_.token;	254 return current_.token;

253 }	255 }

254 has_line_terminator_before_next_ = false;	256 has_preceding_line_terminator_ = false;

255 has_multiline_comment_before_next_ = false;	257 has_preceding_multiline_comment_ = false;

256 if (static_cast<unsigned>(c0_) <= 0x7f) {	258 if (static_cast<unsigned>(c0_) <= 0x7f) {

257 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);	259 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);

258 if (token != Token::ILLEGAL) {	260 if (token != Token::ILLEGAL) {

259 int pos = source_pos();	261 int pos = source_pos();

260 next_.token = token;	262 next_.token = token;

261 next_.location.beg_pos = pos;	263 next_.location.beg_pos = pos;

262 next_.location.end_pos = pos + 1;	264 next_.location.end_pos = pos + 1;

263 Advance();	265 Advance();

264 return current_.token;	266 return current_.token;

265 }	267 }

266 }	268 }

267 Scan();	269 Scan();

268 return current_.token;	270 return current_.token;

269 }	271 }

270	272

271	273

272 Token::Value Scanner::PeekAhead() {	274 Token::Value Scanner::PeekAhead() {

273 if (next_next_.token != Token::UNINITIALIZED) {	275 if (next_next_.token != Token::UNINITIALIZED) {

274 return next_next_.token;	276 return next_next_.token;

275 }	277 }

276 TokenDesc prev = current_;	278 TokenDesc prev = current_;

	279 bool has_preceding_line_terminator =

	280 has_preceding_line_terminator_ \|\| has_preceding_multiline_comment_;

277 Next();	281 Next();

	282 next_has_preceding_line_terminator_ =

	283 has_preceding_line_terminator_ \|\| has_preceding_multiline_comment_;

	284 has_preceding_line_terminator_ = has_preceding_line_terminator;

278 Token::Value ret = next_.token;	285 Token::Value ret = next_.token;

279 next_next_ = next_;	286 next_next_ = next_;

280 next_ = current_;	287 next_ = current_;

281 current_ = prev;	288 current_ = prev;

282 return ret;	289 return ret;

283 }	290 }

284	291

285	292

286 // TODO(yangguo): check whether this is actually necessary.	293 // TODO(yangguo): check whether this is actually necessary.

287 static inline bool IsLittleEndianByteOrderMark(uc32 c) {	294 static inline bool IsLittleEndianByteOrderMark(uc32 c) {

288 // The Unicode value U+FFFE is guaranteed never to be assigned as a	295 // The Unicode value U+FFFE is guaranteed never to be assigned as a

289 // Unicode character; this implies that in a Unicode context the	296 // Unicode character; this implies that in a Unicode context the

290 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	297 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

291 // character expressed in little-endian byte order (since it could	298 // character expressed in little-endian byte order (since it could

292 // not be a U+FFFE character expressed in big-endian byte	299 // not be a U+FFFE character expressed in big-endian byte

293 // order). Nevertheless, we check for it to be compatible with	300 // order). Nevertheless, we check for it to be compatible with

294 // Spidermonkey.	301 // Spidermonkey.

295 return c == 0xFFFE;	302 return c == 0xFFFE;

296 }	303 }

297	304

298	305

299 bool Scanner::SkipWhiteSpace() {	306 bool Scanner::SkipWhiteSpace() {

300 int start_position = source_pos();	307 int start_position = source_pos();

301

302 while (true) {	308 while (true) {

303 while (true) {	309 while (true) {

304 // The unicode cache accepts unsigned inputs.	310 // The unicode cache accepts unsigned inputs.

305 if (c0_ < 0) break;	311 if (c0_ < 0) break;

306 // Advance as long as character is a WhiteSpace or LineTerminator.	312 // Advance as long as character is a WhiteSpace or LineTerminator.

307 // Remember if the latter is the case.	313 // Remember if the latter is the case.

308 if (unicode_cache_->IsLineTerminator(c0_)) {	314 if (unicode_cache_->IsLineTerminator(c0_)) {

309 has_line_terminator_before_next_ = true;	315 has_preceding_line_terminator_ = true;

310 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&	316 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&

311 !IsLittleEndianByteOrderMark(c0_)) {	317 !IsLittleEndianByteOrderMark(c0_)) {

312 break;	318 break;

313 }	319 }

314 Advance();	320 Advance();

315 }	321 }

316	322

317 // If there is an HTML comment end '-->' at the beginning of a	323 // If there is an HTML comment end '-->' at the beginning of a

318 // line (with only whitespace in front of it), we treat the rest	324 // line (with only whitespace in front of it), we treat the rest

319 // of the line as a comment. This is in line with the way	325 // of the line as a comment. This is in line with the way

320 // SpiderMonkey handles it.	326 // SpiderMonkey handles it.

321 if (c0_ == '-' && has_line_terminator_before_next_) {	327 if (c0_ == '-' && has_preceding_line_terminator_) {

322 Advance();	328 Advance();

323 if (c0_ == '-') {	329 if (c0_ == '-') {

324 Advance();	330 Advance();

325 if (c0_ == '>') {	331 if (c0_ == '>') {

326 // Treat the rest of the line as a comment.	332 // Treat the rest of the line as a comment.

327 SkipSingleLineComment();	333 SkipSingleLineComment();

328 // Continue skipping white space after the comment.	334 // Continue skipping white space after the comment.

329 continue;	335 continue;

330 }	336 }

331 PushBack('-'); // undo Advance()	337 PushBack('-'); // undo Advance()

(...skipping 86 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
418 Token::Value Scanner::SkipMultiLineComment() {	424 Token::Value Scanner::SkipMultiLineComment() {

419 DCHECK(c0_ == '*');	425 DCHECK(c0_ == '*');

420 Advance();	426 Advance();

421	427

422 while (c0_ >= 0) {	428 while (c0_ >= 0) {

423 uc32 ch = c0_;	429 uc32 ch = c0_;

424 Advance();	430 Advance();

425 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) {	431 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) {

426 // Following ECMA-262, section 7.4, a comment containing	432 // Following ECMA-262, section 7.4, a comment containing

427 // a newline will make the comment count as a line-terminator.	433 // a newline will make the comment count as a line-terminator.

428 has_multiline_comment_before_next_ = true;	434 has_preceding_multiline_comment_ = true;

429 }	435 }

430 // If we have reached the end of the multi-line comment, we	436 // If we have reached the end of the multi-line comment, we

431 // consume the '/' and insert a whitespace. This way all	437 // consume the '/' and insert a whitespace. This way all

432 // multi-line comments are treated as whitespace.	438 // multi-line comments are treated as whitespace.

433 if (ch == '*' && c0_ == '/') {	439 if (ch == '*' && c0_ == '/') {

434 c0_ = ' ';	440 c0_ = ' ';

435 return Token::WHITESPACE;	441 return Token::WHITESPACE;

436 }	442 }

437 }	443 }

438	444

(...skipping 17 matching lines...) Expand all Loading...
456 PushBack('!'); // undo Advance()	462 PushBack('!'); // undo Advance()

457 DCHECK(c0_ == '!');	463 DCHECK(c0_ == '!');

458 return Token::LT;	464 return Token::LT;

459 }	465 }

460	466

461	467

462 void Scanner::Scan() {	468 void Scanner::Scan() {

463 next_.literal_chars = NULL;	469 next_.literal_chars = NULL;

464 next_.raw_literal_chars = NULL;	470 next_.raw_literal_chars = NULL;

465 Token::Value token;	471 Token::Value token;

	472

466 do {	473 do {

467 // Remember the position of the next token	474 // Remember the position of the next token

468 next_.location.beg_pos = source_pos();	475 next_.location.beg_pos = source_pos();

469	476

470 switch (c0_) {	477 switch (c0_) {

471 case ' ':	478 case ' ':

472 case '\t':	479 case '\t':

473 Advance();	480 Advance();

474 token = Token::WHITESPACE;	481 token = Token::WHITESPACE;

475 break;	482 break;

476	483

477 case '\n':	484 case '\n':

478 Advance();	485 Advance();

479 has_line_terminator_before_next_ = true;	486 has_preceding_line_terminator_ = true;

480 token = Token::WHITESPACE;	487 token = Token::WHITESPACE;

481 break;	488 break;

482	489

483 case '"': case '\'':	490 case '"': case '\'':

484 token = ScanString();	491 token = ScanString();

485 break;	492 break;

486	493

487 case '<':	494 case '<':

488 // < <= << <<= <!--	495 // < <= << <<= <!--

489 Advance();	496 Advance();

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
550 } else {	557 } else {

551 token = Token::ADD;	558 token = Token::ADD;

552 }	559 }

553 break;	560 break;

554	561

555 case '-':	562 case '-':

556 // - -- --> -=	563 // - -- --> -=

557 Advance();	564 Advance();

558 if (c0_ == '-') {	565 if (c0_ == '-') {

559 Advance();	566 Advance();

560 if (c0_ == '>' && has_line_terminator_before_next_) {	567 if (c0_ == '>' && has_preceding_line_terminator_) {

561 // For compatibility with SpiderMonkey, we skip lines that	568 // For compatibility with SpiderMonkey, we skip lines that

562 // start with an HTML comment end '-->'.	569 // start with an HTML comment end '-->'.

563 token = SkipSingleLineComment();	570 token = SkipSingleLineComment();

564 } else {	571 } else {

565 token = Token::DEC;	572 token = Token::DEC;

566 }	573 }

567 } else if (c0_ == '=') {	574 } else if (c0_ == '=') {

568 token = Select(Token::ASSIGN_SUB);	575 token = Select(Token::ASSIGN_SUB);

569 } else {	576 } else {

570 token = Token::SUB;	577 token = Token::SUB;

(...skipping 165 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
736 int current_pos = source_pos();	743 int current_pos = source_pos();

737 DCHECK_EQ(next_.location.end_pos, current_pos);	744 DCHECK_EQ(next_.location.end_pos, current_pos);

738 // Positions inside the lookahead token aren't supported.	745 // Positions inside the lookahead token aren't supported.

739 DCHECK(pos >= current_pos);	746 DCHECK(pos >= current_pos);

740 if (pos != current_pos) {	747 if (pos != current_pos) {

741 source_->SeekForward(pos - source_->pos());	748 source_->SeekForward(pos - source_->pos());

742 Advance();	749 Advance();

743 // This function is only called to seek to the location	750 // This function is only called to seek to the location

744 // of the end of a function (at the "}" token). It doesn't matter	751 // of the end of a function (at the "}" token). It doesn't matter

745 // whether there was a line terminator in the part we skip.	752 // whether there was a line terminator in the part we skip.

746 has_line_terminator_before_next_ = false;	753 has_preceding_line_terminator_ = false;

747 has_multiline_comment_before_next_ = false;	754 has_preceding_multiline_comment_ = false;

748 }	755 }

749 Scan();	756 Scan();

750 }	757 }

751	758

752	759

753 template <bool capture_raw, bool in_template_literal>	760 template <bool capture_raw, bool in_template_literal>

754 bool Scanner::ScanEscape() {	761 bool Scanner::ScanEscape() {

755 uc32 c = c0_;	762 uc32 c = c0_;

756 Advance<capture_raw>();	763 Advance<capture_raw>();

757	764

(...skipping 704 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1462 }	1469 }

1463	1470

1464	1471

1465 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {	1472 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {

1466 if (is_next_literal_one_byte()) {	1473 if (is_next_literal_one_byte()) {

1467 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());	1474 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());

1468 }	1475 }

1469 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());	1476 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());

1470 }	1477 }

1471	1478

	1479 const AstRawString* Scanner::NextNextSymbol(

	1480 AstValueFactory* ast_value_factory) {

	1481 DCHECK(next_next_.token != Token::UNINITIALIZED);

	1482 LiteralBuffer* literal = next_next_.literal_chars;

	1483 if (literal->is_one_byte()) {

	1484 return ast_value_factory->GetOneByteString(literal->one_byte_literal());

	1485 }

	1486 return ast_value_factory->GetTwoByteString(literal->two_byte_literal());

	1487 }

1472	1488

1473 const AstRawString* Scanner::CurrentRawSymbol(	1489 const AstRawString* Scanner::CurrentRawSymbol(

1474 AstValueFactory* ast_value_factory) {	1490 AstValueFactory* ast_value_factory) {

1475 if (is_raw_literal_one_byte()) {	1491 if (is_raw_literal_one_byte()) {

1476 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());	1492 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());

1477 }	1493 }

1478 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());	1494 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());

1479 }	1495 }

1480	1496

1481	1497

(...skipping 205 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1687 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1703 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1688 }	1704 }

1689 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1705 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1690	1706

1691 backing_store_.AddBlock(bytes);	1707 backing_store_.AddBlock(bytes);

1692 return backing_store_.EndSequence().start();	1708 return backing_store_.EndSequence().start();

1693 }	1709 }

1694	1710

1695 } // namespace internal	1711 } // namespace internal

1696 } // namespace v8	1712 } // namespace v8

OLD	NEW

« src/globals.h ('K') | « src/parsing/scanner.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »