src/parsing/scanner.cc - Issue 1841543003: [esnext] implement frontend changes for async/await proposal

Side by Side Diff: src/parsing/scanner.cc

Issue 1841543003: [esnext] implement frontend changes for async/await proposal (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Fix a pointless edit Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/parsing/scanner.h"	7 #include "src/parsing/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
49 }	49 }

50	50

51	51

52 void Scanner::Initialize(Utf16CharacterStream* source) {	52 void Scanner::Initialize(Utf16CharacterStream* source) {

53 source_ = source;	53 source_ = source;

54 // Need to capture identifiers in order to recognize "get" and "set"	54 // Need to capture identifiers in order to recognize "get" and "set"

55 // in object literals.	55 // in object literals.

56 Init();	56 Init();

57 // Skip initial whitespace allowing HTML comment ends just like	57 // Skip initial whitespace allowing HTML comment ends just like

58 // after a newline and scan first token.	58 // after a newline and scan first token.

59 has_line_terminator_before_next_ = true;	59 has_preceding_line_terminator_ = true;

	60 has_preceding_multiline_comment_ = false;

60 SkipWhiteSpace();	61 SkipWhiteSpace();

61 Scan();	62 Scan();

62 }	63 }

63	64

64 template <bool capture_raw, bool unicode>	65 template <bool capture_raw, bool unicode>

65 uc32 Scanner::ScanHexNumber(int expected_length) {	66 uc32 Scanner::ScanHexNumber(int expected_length) {

66 DCHECK(expected_length <= 4); // prevent overflow	67 DCHECK(expected_length <= 4); // prevent overflow

67	68

68 int begin = source_pos() - 2;	69 int begin = source_pos() - 2;

69 uc32 x = 0;	70 uc32 x = 0;

(...skipping 172 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
242	243

243 Token::Value Scanner::Next() {	244 Token::Value Scanner::Next() {

244 if (next_.token == Token::EOS) {	245 if (next_.token == Token::EOS) {

245 next_.location.beg_pos = current_.location.beg_pos;	246 next_.location.beg_pos = current_.location.beg_pos;

246 next_.location.end_pos = current_.location.end_pos;	247 next_.location.end_pos = current_.location.end_pos;

247 }	248 }

248 current_ = next_;	249 current_ = next_;

249 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {	250 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {

250 next_ = next_next_;	251 next_ = next_next_;

251 next_next_.token = Token::UNINITIALIZED;	252 next_next_.token = Token::UNINITIALIZED;

	253 has_preceding_line_terminator_ = next_has_preceding_line_terminator_;

252 return current_.token;	254 return current_.token;

253 }	255 }

254 has_line_terminator_before_next_ = false;	256 has_preceding_line_terminator_ = false;

255 has_multiline_comment_before_next_ = false;	257 has_preceding_multiline_comment_ = false;

256 if (static_cast<unsigned>(c0_) <= 0x7f) {	258 if (static_cast<unsigned>(c0_) <= 0x7f) {

257 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);	259 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);

258 if (token != Token::ILLEGAL) {	260 if (token != Token::ILLEGAL) {

259 int pos = source_pos();	261 int pos = source_pos();

260 next_.token = token;	262 next_.token = token;

261 next_.location.beg_pos = pos;	263 next_.location.beg_pos = pos;

262 next_.location.end_pos = pos + 1;	264 next_.location.end_pos = pos + 1;

263 Advance();	265 Advance();

264 return current_.token;	266 return current_.token;

265 }	267 }

266 }	268 }

267 Scan();	269 Scan();

268 return current_.token;	270 return current_.token;

269 }	271 }

270	272

271	273

272 Token::Value Scanner::PeekAhead() {	274 Token::Value Scanner::PeekAhead() {

273 if (next_next_.token != Token::UNINITIALIZED) {	275 if (next_next_.token != Token::UNINITIALIZED) {

274 return next_next_.token;	276 return next_next_.token;

275 }	277 }

276 TokenDesc prev = current_;	278 TokenDesc prev = current_;

	279 bool has_preceding_line_terminator =

	280 has_preceding_line_terminator_ \|\| has_preceding_multiline_comment_;
	Dan Ehrenberg 2016/05/12 19:24:32 What does has_preceding_line_terminator actually m What does has_preceding_line_terminator actually mean after this change? It would be nice to document how it all works currently, in case something else comes along that wants to refer to it. caitp (gmail) 2016/05/12 19:56:05 There's no real difference in how it currently wor Show quoted text On 2016/05/12 19:24:32, Dan Ehrenberg wrote: > What does has_preceding_line_terminator actually mean after this change? It > would be nice to document how it all works currently, in case something else > comes along that wants to refer to it. There's no real difference in how it currently works, it's just renamed to something that makes more sense to me. I can change it back (and restore the old comments that were modified).
277 Next();	281 Next();

	282 next_has_preceding_line_terminator_ =

	283 has_preceding_line_terminator_ \|\| has_preceding_multiline_comment_;

	284 has_preceding_line_terminator_ = has_preceding_line_terminator;

278 Token::Value ret = next_.token;	285 Token::Value ret = next_.token;

279 next_next_ = next_;	286 next_next_ = next_;

280 next_ = current_;	287 next_ = current_;

281 current_ = prev;	288 current_ = prev;

282 return ret;	289 return ret;

283 }	290 }

284	291

285	292

286 // TODO(yangguo): check whether this is actually necessary.	293 // TODO(yangguo): check whether this is actually necessary.

287 static inline bool IsLittleEndianByteOrderMark(uc32 c) {	294 static inline bool IsLittleEndianByteOrderMark(uc32 c) {

288 // The Unicode value U+FFFE is guaranteed never to be assigned as a	295 // The Unicode value U+FFFE is guaranteed never to be assigned as a

289 // Unicode character; this implies that in a Unicode context the	296 // Unicode character; this implies that in a Unicode context the

290 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	297 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

291 // character expressed in little-endian byte order (since it could	298 // character expressed in little-endian byte order (since it could

292 // not be a U+FFFE character expressed in big-endian byte	299 // not be a U+FFFE character expressed in big-endian byte

293 // order). Nevertheless, we check for it to be compatible with	300 // order). Nevertheless, we check for it to be compatible with

294 // Spidermonkey.	301 // Spidermonkey.

295 return c == 0xFFFE;	302 return c == 0xFFFE;

296 }	303 }

297	304

298	305

299 bool Scanner::SkipWhiteSpace() {	306 bool Scanner::SkipWhiteSpace() {

300 int start_position = source_pos();	307 int start_position = source_pos();

301

302 while (true) {	308 while (true) {

303 while (true) {	309 while (true) {

304 // The unicode cache accepts unsigned inputs.	310 // The unicode cache accepts unsigned inputs.

305 if (c0_ < 0) break;	311 if (c0_ < 0) break;

306 // Advance as long as character is a WhiteSpace or LineTerminator.	312 // Advance as long as character is a WhiteSpace or LineTerminator.

307 // Remember if the latter is the case.	313 // Remember if the latter is the case.

308 if (unicode_cache_->IsLineTerminator(c0_)) {	314 if (unicode_cache_->IsLineTerminator(c0_)) {

309 has_line_terminator_before_next_ = true;	315 has_preceding_line_terminator_ = true;

310 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&	316 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&

311 !IsLittleEndianByteOrderMark(c0_)) {	317 !IsLittleEndianByteOrderMark(c0_)) {

312 break;	318 break;

313 }	319 }

314 Advance();	320 Advance();

315 }	321 }

316	322

317 // If there is an HTML comment end '-->' at the beginning of a	323 // If there is an HTML comment end '-->' at the beginning of a

318 // line (with only whitespace in front of it), we treat the rest	324 // line (with only whitespace in front of it), we treat the rest

319 // of the line as a comment. This is in line with the way	325 // of the line as a comment. This is in line with the way

320 // SpiderMonkey handles it.	326 // SpiderMonkey handles it.

321 if (c0_ == '-' && has_line_terminator_before_next_) {	327 if (c0_ == '-' && has_preceding_line_terminator_) {

322 Advance();	328 Advance();

323 if (c0_ == '-') {	329 if (c0_ == '-') {

324 Advance();	330 Advance();

325 if (c0_ == '>') {	331 if (c0_ == '>') {

326 // Treat the rest of the line as a comment.	332 // Treat the rest of the line as a comment.

327 SkipSingleLineComment();	333 SkipSingleLineComment();

328 // Continue skipping white space after the comment.	334 // Continue skipping white space after the comment.

329 continue;	335 continue;

330 }	336 }

331 PushBack('-'); // undo Advance()	337 PushBack('-'); // undo Advance()

(...skipping 86 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
418 Token::Value Scanner::SkipMultiLineComment() {	424 Token::Value Scanner::SkipMultiLineComment() {

419 DCHECK(c0_ == '*');	425 DCHECK(c0_ == '*');

420 Advance();	426 Advance();

421	427

422 while (c0_ >= 0) {	428 while (c0_ >= 0) {

423 uc32 ch = c0_;	429 uc32 ch = c0_;

424 Advance();	430 Advance();

425 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) {	431 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) {

426 // Following ECMA-262, section 7.4, a comment containing	432 // Following ECMA-262, section 7.4, a comment containing

427 // a newline will make the comment count as a line-terminator.	433 // a newline will make the comment count as a line-terminator.

428 has_multiline_comment_before_next_ = true;	434 has_preceding_multiline_comment_ = true;

429 }	435 }

430 // If we have reached the end of the multi-line comment, we	436 // If we have reached the end of the multi-line comment, we

431 // consume the '/' and insert a whitespace. This way all	437 // consume the '/' and insert a whitespace. This way all

432 // multi-line comments are treated as whitespace.	438 // multi-line comments are treated as whitespace.

433 if (ch == '*' && c0_ == '/') {	439 if (ch == '*' && c0_ == '/') {

434 c0_ = ' ';	440 c0_ = ' ';

435 return Token::WHITESPACE;	441 return Token::WHITESPACE;

436 }	442 }

437 }	443 }

438	444

(...skipping 17 matching lines...) Expand all Loading...
456 PushBack('!'); // undo Advance()	462 PushBack('!'); // undo Advance()

457 DCHECK(c0_ == '!');	463 DCHECK(c0_ == '!');

458 return Token::LT;	464 return Token::LT;

459 }	465 }

460	466

461	467

462 void Scanner::Scan() {	468 void Scanner::Scan() {

463 next_.literal_chars = NULL;	469 next_.literal_chars = NULL;

464 next_.raw_literal_chars = NULL;	470 next_.raw_literal_chars = NULL;

465 Token::Value token;	471 Token::Value token;

	472

466 do {	473 do {

467 // Remember the position of the next token	474 // Remember the position of the next token

468 next_.location.beg_pos = source_pos();	475 next_.location.beg_pos = source_pos();

469	476

470 switch (c0_) {	477 switch (c0_) {

471 case ' ':	478 case ' ':

472 case '\t':	479 case '\t':

473 Advance();	480 Advance();

474 token = Token::WHITESPACE;	481 token = Token::WHITESPACE;

475 break;	482 break;

476	483

477 case '\n':	484 case '\n':

478 Advance();	485 Advance();

479 has_line_terminator_before_next_ = true;	486 has_preceding_line_terminator_ = true;

480 token = Token::WHITESPACE;	487 token = Token::WHITESPACE;

481 break;	488 break;

482	489

483 case '"': case '\'':	490 case '"': case '\'':

484 token = ScanString();	491 token = ScanString();

485 break;	492 break;

486	493

487 case '<':	494 case '<':

488 // < <= << <<= <!--	495 // < <= << <<= <!--

489 Advance();	496 Advance();

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
550 } else {	557 } else {

551 token = Token::ADD;	558 token = Token::ADD;

552 }	559 }

553 break;	560 break;

554	561

555 case '-':	562 case '-':

556 // - -- --> -=	563 // - -- --> -=

557 Advance();	564 Advance();

558 if (c0_ == '-') {	565 if (c0_ == '-') {

559 Advance();	566 Advance();

560 if (c0_ == '>' && has_line_terminator_before_next_) {	567 if (c0_ == '>' && has_preceding_line_terminator_) {

561 // For compatibility with SpiderMonkey, we skip lines that	568 // For compatibility with SpiderMonkey, we skip lines that

562 // start with an HTML comment end '-->'.	569 // start with an HTML comment end '-->'.

563 token = SkipSingleLineComment();	570 token = SkipSingleLineComment();

564 } else {	571 } else {

565 token = Token::DEC;	572 token = Token::DEC;

566 }	573 }

567 } else if (c0_ == '=') {	574 } else if (c0_ == '=') {

568 token = Select(Token::ASSIGN_SUB);	575 token = Select(Token::ASSIGN_SUB);

569 } else {	576 } else {

570 token = Token::SUB;	577 token = Token::SUB;

(...skipping 165 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
736 int current_pos = source_pos();	743 int current_pos = source_pos();

737 DCHECK_EQ(next_.location.end_pos, current_pos);	744 DCHECK_EQ(next_.location.end_pos, current_pos);

738 // Positions inside the lookahead token aren't supported.	745 // Positions inside the lookahead token aren't supported.

739 DCHECK(pos >= current_pos);	746 DCHECK(pos >= current_pos);

740 if (pos != current_pos) {	747 if (pos != current_pos) {

741 source_->SeekForward(pos - source_->pos());	748 source_->SeekForward(pos - source_->pos());

742 Advance();	749 Advance();

743 // This function is only called to seek to the location	750 // This function is only called to seek to the location

744 // of the end of a function (at the "}" token). It doesn't matter	751 // of the end of a function (at the "}" token). It doesn't matter

745 // whether there was a line terminator in the part we skip.	752 // whether there was a line terminator in the part we skip.

746 has_line_terminator_before_next_ = false;	753 has_preceding_line_terminator_ = false;

747 has_multiline_comment_before_next_ = false;	754 has_preceding_multiline_comment_ = false;

748 }	755 }

749 Scan();	756 Scan();

750 }	757 }

751	758

752	759

753 template <bool capture_raw, bool in_template_literal>	760 template <bool capture_raw, bool in_template_literal>

754 bool Scanner::ScanEscape() {	761 bool Scanner::ScanEscape() {

755 uc32 c = c0_;	762 uc32 c = c0_;

756 Advance<capture_raw>();	763 Advance<capture_raw>();

757	764

(...skipping 371 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1129 const bool unicode = true;	1136 const bool unicode = true;

1130 return ScanHexNumber<capture_raw, unicode>(4);	1137 return ScanHexNumber<capture_raw, unicode>(4);

1131 }	1138 }

1132	1139

1133	1140

1134 // ----------------------------------------------------------------------------	1141 // ----------------------------------------------------------------------------

1135 // Keyword Matcher	1142 // Keyword Matcher

1136	1143

1137 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \	1144 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \

1138 KEYWORD_GROUP('a') \	1145 KEYWORD_GROUP('a') \

	1146 KEYWORD("async", Token::ASYNC) \

1139 KEYWORD("await", Token::AWAIT) \	1147 KEYWORD("await", Token::AWAIT) \

1140 KEYWORD_GROUP('b') \	1148 KEYWORD_GROUP('b') \

1141 KEYWORD("break", Token::BREAK) \	1149 KEYWORD("break", Token::BREAK) \

1142 KEYWORD_GROUP('c') \	1150 KEYWORD_GROUP('c') \

1143 KEYWORD("case", Token::CASE) \	1151 KEYWORD("case", Token::CASE) \

1144 KEYWORD("catch", Token::CATCH) \	1152 KEYWORD("catch", Token::CATCH) \

1145 KEYWORD("class", Token::CLASS) \	1153 KEYWORD("class", Token::CLASS) \

1146 KEYWORD("const", Token::CONST) \	1154 KEYWORD("const", Token::CONST) \

1147 KEYWORD("continue", Token::CONTINUE) \	1155 KEYWORD("continue", Token::CONTINUE) \

1148 KEYWORD_GROUP('d') \	1156 KEYWORD_GROUP('d') \

(...skipping 313 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1462 }	1470 }

1463	1471

1464	1472

1465 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {	1473 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {

1466 if (is_next_literal_one_byte()) {	1474 if (is_next_literal_one_byte()) {

1467 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());	1475 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());

1468 }	1476 }

1469 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());	1477 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());

1470 }	1478 }

1471	1479

	1480 const AstRawString* Scanner::NextNextSymbol(

	1481 AstValueFactory* ast_value_factory) {

	1482 DCHECK(next_next_.token != Token::UNINITIALIZED);

	1483 LiteralBuffer* literal = next_next_.literal_chars;

	1484 if (literal->is_one_byte()) {

	1485 return ast_value_factory->GetOneByteString(literal->one_byte_literal());

	1486 }

	1487 return ast_value_factory->GetTwoByteString(literal->two_byte_literal());

	1488 }

1472	1489

1473 const AstRawString* Scanner::CurrentRawSymbol(	1490 const AstRawString* Scanner::CurrentRawSymbol(

1474 AstValueFactory* ast_value_factory) {	1491 AstValueFactory* ast_value_factory) {

1475 if (is_raw_literal_one_byte()) {	1492 if (is_raw_literal_one_byte()) {

1476 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());	1493 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());

1477 }	1494 }

1478 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());	1495 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());

1479 }	1496 }

1480	1497

1481	1498

(...skipping 205 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1687 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1704 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1688 }	1705 }

1689 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1706 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1690	1707

1691 backing_store_.AddBlock(bytes);	1708 backing_store_.AddBlock(bytes);

1692 return backing_store_.EndSequence().start();	1709 return backing_store_.EndSequence().start();

1693 }	1710 }

1694	1711

1695 } // namespace internal	1712 } // namespace internal

1696 } // namespace v8	1713 } // namespace v8

OLD	NEW

« src/parsing/parser-base.h ('K') | « src/parsing/scanner.h ('k') | src/parsing/token.h » ('j') | no next file with comments »