Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(555)

Side by Side Diff: src/parsing/scanner.cc

Issue 1841543003: [esnext] implement frontend changes for async/await proposal (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Fix a pointless edit Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include "src/parsing/scanner.h" 7 #include "src/parsing/scanner.h"
8 8
9 #include <stdint.h> 9 #include <stdint.h>
10 10
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
49 } 49 }
50 50
51 51
52 void Scanner::Initialize(Utf16CharacterStream* source) { 52 void Scanner::Initialize(Utf16CharacterStream* source) {
53 source_ = source; 53 source_ = source;
54 // Need to capture identifiers in order to recognize "get" and "set" 54 // Need to capture identifiers in order to recognize "get" and "set"
55 // in object literals. 55 // in object literals.
56 Init(); 56 Init();
57 // Skip initial whitespace allowing HTML comment ends just like 57 // Skip initial whitespace allowing HTML comment ends just like
58 // after a newline and scan first token. 58 // after a newline and scan first token.
59 has_line_terminator_before_next_ = true; 59 has_preceding_line_terminator_ = true;
60 has_preceding_multiline_comment_ = false;
60 SkipWhiteSpace(); 61 SkipWhiteSpace();
61 Scan(); 62 Scan();
62 } 63 }
63 64
64 template <bool capture_raw, bool unicode> 65 template <bool capture_raw, bool unicode>
65 uc32 Scanner::ScanHexNumber(int expected_length) { 66 uc32 Scanner::ScanHexNumber(int expected_length) {
66 DCHECK(expected_length <= 4); // prevent overflow 67 DCHECK(expected_length <= 4); // prevent overflow
67 68
68 int begin = source_pos() - 2; 69 int begin = source_pos() - 2;
69 uc32 x = 0; 70 uc32 x = 0;
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after
242 243
243 Token::Value Scanner::Next() { 244 Token::Value Scanner::Next() {
244 if (next_.token == Token::EOS) { 245 if (next_.token == Token::EOS) {
245 next_.location.beg_pos = current_.location.beg_pos; 246 next_.location.beg_pos = current_.location.beg_pos;
246 next_.location.end_pos = current_.location.end_pos; 247 next_.location.end_pos = current_.location.end_pos;
247 } 248 }
248 current_ = next_; 249 current_ = next_;
249 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) { 250 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {
250 next_ = next_next_; 251 next_ = next_next_;
251 next_next_.token = Token::UNINITIALIZED; 252 next_next_.token = Token::UNINITIALIZED;
253 has_preceding_line_terminator_ = next_has_preceding_line_terminator_;
252 return current_.token; 254 return current_.token;
253 } 255 }
254 has_line_terminator_before_next_ = false; 256 has_preceding_line_terminator_ = false;
255 has_multiline_comment_before_next_ = false; 257 has_preceding_multiline_comment_ = false;
256 if (static_cast<unsigned>(c0_) <= 0x7f) { 258 if (static_cast<unsigned>(c0_) <= 0x7f) {
257 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); 259 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
258 if (token != Token::ILLEGAL) { 260 if (token != Token::ILLEGAL) {
259 int pos = source_pos(); 261 int pos = source_pos();
260 next_.token = token; 262 next_.token = token;
261 next_.location.beg_pos = pos; 263 next_.location.beg_pos = pos;
262 next_.location.end_pos = pos + 1; 264 next_.location.end_pos = pos + 1;
263 Advance(); 265 Advance();
264 return current_.token; 266 return current_.token;
265 } 267 }
266 } 268 }
267 Scan(); 269 Scan();
268 return current_.token; 270 return current_.token;
269 } 271 }
270 272
271 273
272 Token::Value Scanner::PeekAhead() { 274 Token::Value Scanner::PeekAhead() {
273 if (next_next_.token != Token::UNINITIALIZED) { 275 if (next_next_.token != Token::UNINITIALIZED) {
274 return next_next_.token; 276 return next_next_.token;
275 } 277 }
276 TokenDesc prev = current_; 278 TokenDesc prev = current_;
279 bool has_preceding_line_terminator =
280 has_preceding_line_terminator_ || has_preceding_multiline_comment_;
Dan Ehrenberg 2016/05/12 19:24:32 What does has_preceding_line_terminator actually m
caitp (gmail) 2016/05/12 19:56:05 There's no real difference in how it currently wor
277 Next(); 281 Next();
282 next_has_preceding_line_terminator_ =
283 has_preceding_line_terminator_ || has_preceding_multiline_comment_;
284 has_preceding_line_terminator_ = has_preceding_line_terminator;
278 Token::Value ret = next_.token; 285 Token::Value ret = next_.token;
279 next_next_ = next_; 286 next_next_ = next_;
280 next_ = current_; 287 next_ = current_;
281 current_ = prev; 288 current_ = prev;
282 return ret; 289 return ret;
283 } 290 }
284 291
285 292
286 // TODO(yangguo): check whether this is actually necessary. 293 // TODO(yangguo): check whether this is actually necessary.
287 static inline bool IsLittleEndianByteOrderMark(uc32 c) { 294 static inline bool IsLittleEndianByteOrderMark(uc32 c) {
288 // The Unicode value U+FFFE is guaranteed never to be assigned as a 295 // The Unicode value U+FFFE is guaranteed never to be assigned as a
289 // Unicode character; this implies that in a Unicode context the 296 // Unicode character; this implies that in a Unicode context the
290 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 297 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
291 // character expressed in little-endian byte order (since it could 298 // character expressed in little-endian byte order (since it could
292 // not be a U+FFFE character expressed in big-endian byte 299 // not be a U+FFFE character expressed in big-endian byte
293 // order). Nevertheless, we check for it to be compatible with 300 // order). Nevertheless, we check for it to be compatible with
294 // Spidermonkey. 301 // Spidermonkey.
295 return c == 0xFFFE; 302 return c == 0xFFFE;
296 } 303 }
297 304
298 305
299 bool Scanner::SkipWhiteSpace() { 306 bool Scanner::SkipWhiteSpace() {
300 int start_position = source_pos(); 307 int start_position = source_pos();
301
302 while (true) { 308 while (true) {
303 while (true) { 309 while (true) {
304 // The unicode cache accepts unsigned inputs. 310 // The unicode cache accepts unsigned inputs.
305 if (c0_ < 0) break; 311 if (c0_ < 0) break;
306 // Advance as long as character is a WhiteSpace or LineTerminator. 312 // Advance as long as character is a WhiteSpace or LineTerminator.
307 // Remember if the latter is the case. 313 // Remember if the latter is the case.
308 if (unicode_cache_->IsLineTerminator(c0_)) { 314 if (unicode_cache_->IsLineTerminator(c0_)) {
309 has_line_terminator_before_next_ = true; 315 has_preceding_line_terminator_ = true;
310 } else if (!unicode_cache_->IsWhiteSpace(c0_) && 316 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&
311 !IsLittleEndianByteOrderMark(c0_)) { 317 !IsLittleEndianByteOrderMark(c0_)) {
312 break; 318 break;
313 } 319 }
314 Advance(); 320 Advance();
315 } 321 }
316 322
317 // If there is an HTML comment end '-->' at the beginning of a 323 // If there is an HTML comment end '-->' at the beginning of a
318 // line (with only whitespace in front of it), we treat the rest 324 // line (with only whitespace in front of it), we treat the rest
319 // of the line as a comment. This is in line with the way 325 // of the line as a comment. This is in line with the way
320 // SpiderMonkey handles it. 326 // SpiderMonkey handles it.
321 if (c0_ == '-' && has_line_terminator_before_next_) { 327 if (c0_ == '-' && has_preceding_line_terminator_) {
322 Advance(); 328 Advance();
323 if (c0_ == '-') { 329 if (c0_ == '-') {
324 Advance(); 330 Advance();
325 if (c0_ == '>') { 331 if (c0_ == '>') {
326 // Treat the rest of the line as a comment. 332 // Treat the rest of the line as a comment.
327 SkipSingleLineComment(); 333 SkipSingleLineComment();
328 // Continue skipping white space after the comment. 334 // Continue skipping white space after the comment.
329 continue; 335 continue;
330 } 336 }
331 PushBack('-'); // undo Advance() 337 PushBack('-'); // undo Advance()
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
418 Token::Value Scanner::SkipMultiLineComment() { 424 Token::Value Scanner::SkipMultiLineComment() {
419 DCHECK(c0_ == '*'); 425 DCHECK(c0_ == '*');
420 Advance(); 426 Advance();
421 427
422 while (c0_ >= 0) { 428 while (c0_ >= 0) {
423 uc32 ch = c0_; 429 uc32 ch = c0_;
424 Advance(); 430 Advance();
425 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { 431 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) {
426 // Following ECMA-262, section 7.4, a comment containing 432 // Following ECMA-262, section 7.4, a comment containing
427 // a newline will make the comment count as a line-terminator. 433 // a newline will make the comment count as a line-terminator.
428 has_multiline_comment_before_next_ = true; 434 has_preceding_multiline_comment_ = true;
429 } 435 }
430 // If we have reached the end of the multi-line comment, we 436 // If we have reached the end of the multi-line comment, we
431 // consume the '/' and insert a whitespace. This way all 437 // consume the '/' and insert a whitespace. This way all
432 // multi-line comments are treated as whitespace. 438 // multi-line comments are treated as whitespace.
433 if (ch == '*' && c0_ == '/') { 439 if (ch == '*' && c0_ == '/') {
434 c0_ = ' '; 440 c0_ = ' ';
435 return Token::WHITESPACE; 441 return Token::WHITESPACE;
436 } 442 }
437 } 443 }
438 444
(...skipping 17 matching lines...) Expand all
456 PushBack('!'); // undo Advance() 462 PushBack('!'); // undo Advance()
457 DCHECK(c0_ == '!'); 463 DCHECK(c0_ == '!');
458 return Token::LT; 464 return Token::LT;
459 } 465 }
460 466
461 467
462 void Scanner::Scan() { 468 void Scanner::Scan() {
463 next_.literal_chars = NULL; 469 next_.literal_chars = NULL;
464 next_.raw_literal_chars = NULL; 470 next_.raw_literal_chars = NULL;
465 Token::Value token; 471 Token::Value token;
472
466 do { 473 do {
467 // Remember the position of the next token 474 // Remember the position of the next token
468 next_.location.beg_pos = source_pos(); 475 next_.location.beg_pos = source_pos();
469 476
470 switch (c0_) { 477 switch (c0_) {
471 case ' ': 478 case ' ':
472 case '\t': 479 case '\t':
473 Advance(); 480 Advance();
474 token = Token::WHITESPACE; 481 token = Token::WHITESPACE;
475 break; 482 break;
476 483
477 case '\n': 484 case '\n':
478 Advance(); 485 Advance();
479 has_line_terminator_before_next_ = true; 486 has_preceding_line_terminator_ = true;
480 token = Token::WHITESPACE; 487 token = Token::WHITESPACE;
481 break; 488 break;
482 489
483 case '"': case '\'': 490 case '"': case '\'':
484 token = ScanString(); 491 token = ScanString();
485 break; 492 break;
486 493
487 case '<': 494 case '<':
488 // < <= << <<= <!-- 495 // < <= << <<= <!--
489 Advance(); 496 Advance();
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
550 } else { 557 } else {
551 token = Token::ADD; 558 token = Token::ADD;
552 } 559 }
553 break; 560 break;
554 561
555 case '-': 562 case '-':
556 // - -- --> -= 563 // - -- --> -=
557 Advance(); 564 Advance();
558 if (c0_ == '-') { 565 if (c0_ == '-') {
559 Advance(); 566 Advance();
560 if (c0_ == '>' && has_line_terminator_before_next_) { 567 if (c0_ == '>' && has_preceding_line_terminator_) {
561 // For compatibility with SpiderMonkey, we skip lines that 568 // For compatibility with SpiderMonkey, we skip lines that
562 // start with an HTML comment end '-->'. 569 // start with an HTML comment end '-->'.
563 token = SkipSingleLineComment(); 570 token = SkipSingleLineComment();
564 } else { 571 } else {
565 token = Token::DEC; 572 token = Token::DEC;
566 } 573 }
567 } else if (c0_ == '=') { 574 } else if (c0_ == '=') {
568 token = Select(Token::ASSIGN_SUB); 575 token = Select(Token::ASSIGN_SUB);
569 } else { 576 } else {
570 token = Token::SUB; 577 token = Token::SUB;
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
736 int current_pos = source_pos(); 743 int current_pos = source_pos();
737 DCHECK_EQ(next_.location.end_pos, current_pos); 744 DCHECK_EQ(next_.location.end_pos, current_pos);
738 // Positions inside the lookahead token aren't supported. 745 // Positions inside the lookahead token aren't supported.
739 DCHECK(pos >= current_pos); 746 DCHECK(pos >= current_pos);
740 if (pos != current_pos) { 747 if (pos != current_pos) {
741 source_->SeekForward(pos - source_->pos()); 748 source_->SeekForward(pos - source_->pos());
742 Advance(); 749 Advance();
743 // This function is only called to seek to the location 750 // This function is only called to seek to the location
744 // of the end of a function (at the "}" token). It doesn't matter 751 // of the end of a function (at the "}" token). It doesn't matter
745 // whether there was a line terminator in the part we skip. 752 // whether there was a line terminator in the part we skip.
746 has_line_terminator_before_next_ = false; 753 has_preceding_line_terminator_ = false;
747 has_multiline_comment_before_next_ = false; 754 has_preceding_multiline_comment_ = false;
748 } 755 }
749 Scan(); 756 Scan();
750 } 757 }
751 758
752 759
753 template <bool capture_raw, bool in_template_literal> 760 template <bool capture_raw, bool in_template_literal>
754 bool Scanner::ScanEscape() { 761 bool Scanner::ScanEscape() {
755 uc32 c = c0_; 762 uc32 c = c0_;
756 Advance<capture_raw>(); 763 Advance<capture_raw>();
757 764
(...skipping 371 matching lines...) Expand 10 before | Expand all | Expand 10 after
1129 const bool unicode = true; 1136 const bool unicode = true;
1130 return ScanHexNumber<capture_raw, unicode>(4); 1137 return ScanHexNumber<capture_raw, unicode>(4);
1131 } 1138 }
1132 1139
1133 1140
1134 // ---------------------------------------------------------------------------- 1141 // ----------------------------------------------------------------------------
1135 // Keyword Matcher 1142 // Keyword Matcher
1136 1143
1137 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ 1144 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
1138 KEYWORD_GROUP('a') \ 1145 KEYWORD_GROUP('a') \
1146 KEYWORD("async", Token::ASYNC) \
1139 KEYWORD("await", Token::AWAIT) \ 1147 KEYWORD("await", Token::AWAIT) \
1140 KEYWORD_GROUP('b') \ 1148 KEYWORD_GROUP('b') \
1141 KEYWORD("break", Token::BREAK) \ 1149 KEYWORD("break", Token::BREAK) \
1142 KEYWORD_GROUP('c') \ 1150 KEYWORD_GROUP('c') \
1143 KEYWORD("case", Token::CASE) \ 1151 KEYWORD("case", Token::CASE) \
1144 KEYWORD("catch", Token::CATCH) \ 1152 KEYWORD("catch", Token::CATCH) \
1145 KEYWORD("class", Token::CLASS) \ 1153 KEYWORD("class", Token::CLASS) \
1146 KEYWORD("const", Token::CONST) \ 1154 KEYWORD("const", Token::CONST) \
1147 KEYWORD("continue", Token::CONTINUE) \ 1155 KEYWORD("continue", Token::CONTINUE) \
1148 KEYWORD_GROUP('d') \ 1156 KEYWORD_GROUP('d') \
(...skipping 313 matching lines...) Expand 10 before | Expand all | Expand 10 after
1462 } 1470 }
1463 1471
1464 1472
1465 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { 1473 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {
1466 if (is_next_literal_one_byte()) { 1474 if (is_next_literal_one_byte()) {
1467 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); 1475 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
1468 } 1476 }
1469 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); 1477 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
1470 } 1478 }
1471 1479
1480 const AstRawString* Scanner::NextNextSymbol(
1481 AstValueFactory* ast_value_factory) {
1482 DCHECK(next_next_.token != Token::UNINITIALIZED);
1483 LiteralBuffer* literal = next_next_.literal_chars;
1484 if (literal->is_one_byte()) {
1485 return ast_value_factory->GetOneByteString(literal->one_byte_literal());
1486 }
1487 return ast_value_factory->GetTwoByteString(literal->two_byte_literal());
1488 }
1472 1489
1473 const AstRawString* Scanner::CurrentRawSymbol( 1490 const AstRawString* Scanner::CurrentRawSymbol(
1474 AstValueFactory* ast_value_factory) { 1491 AstValueFactory* ast_value_factory) {
1475 if (is_raw_literal_one_byte()) { 1492 if (is_raw_literal_one_byte()) {
1476 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string()); 1493 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());
1477 } 1494 }
1478 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string()); 1495 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());
1479 } 1496 }
1480 1497
1481 1498
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
1687 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); 1704 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
1688 } 1705 }
1689 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); 1706 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
1690 1707
1691 backing_store_.AddBlock(bytes); 1708 backing_store_.AddBlock(bytes);
1692 return backing_store_.EndSequence().start(); 1709 return backing_store_.EndSequence().start();
1693 } 1710 }
1694 1711
1695 } // namespace internal 1712 } // namespace internal
1696 } // namespace v8 1713 } // namespace v8
OLDNEW
« src/parsing/parser-base.h ('K') | « src/parsing/scanner.h ('k') | src/parsing/token.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698