src/scanner.cc - Issue 663683006: Implement ES6 Template Literals

Side by Side Diff: src/scanner.cc

Issue 663683006: Implement ES6 Template Literals (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Tiny fixups Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <cmath>	9 #include <cmath>

10	10

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
47 // in object literals.	47 // in object literals.

48 Init();	48 Init();

49 // Skip initial whitespace allowing HTML comment ends just like	49 // Skip initial whitespace allowing HTML comment ends just like

50 // after a newline and scan first token.	50 // after a newline and scan first token.

51 has_line_terminator_before_next_ = true;	51 has_line_terminator_before_next_ = true;

52 SkipWhiteSpace();	52 SkipWhiteSpace();

53 Scan();	53 Scan();

54 }	54 }

55	55

56	56

57 uc32 Scanner::ScanHexNumber(int expected_length) {	57 uc32 Scanner::ScanHexNumber(int expected_length, bool recordRaw) {

58 DCHECK(expected_length <= 4); // prevent overflow	58 DCHECK(expected_length <= 4); // prevent overflow

59	59

60 uc32 x = 0;	60 uc32 x = 0;

61 for (int i = 0; i < expected_length; i++) {	61 for (int i = 0; i < expected_length; i++) {

62 int d = HexValue(c0_);	62 int d = HexValue(c0_);

63 if (d < 0) {	63 if (d < 0) {

64 return -1;	64 return -1;

65 }	65 }

66 x = x * 16 + d;	66 x = x * 16 + d;

	67 if (recordRaw) {

	68 AddRawLiteralChar(c0_);

	69 }

67 Advance();	70 Advance();

68 }	71 }

69	72

70 return x;	73 return x;

71 }	74 }

72	75

73	76

74 // Ensure that tokens can be stored in a byte.	77 // Ensure that tokens can be stored in a byte.

75 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);	78 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);

76	79

(...skipping 319 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
396 PushBack('-'); // undo Advance()	399 PushBack('-'); // undo Advance()

397 }	400 }

398 PushBack('!'); // undo Advance()	401 PushBack('!'); // undo Advance()

399 DCHECK(c0_ == '!');	402 DCHECK(c0_ == '!');

400 return Token::LT;	403 return Token::LT;

401 }	404 }

402	405

403	406

404 void Scanner::Scan() {	407 void Scanner::Scan() {

405 next_.literal_chars = NULL;	408 next_.literal_chars = NULL;

	409 next_.raw_literal_chars = NULL;

406 Token::Value token;	410 Token::Value token;

	411

407 do {	412 do {

408 // Remember the position of the next token	413 // Remember the position of the next token

409 next_.location.beg_pos = source_pos();	414 next_.location.beg_pos = source_pos();

410	415

411 switch (c0_) {	416 switch (c0_) {

412 case ' ':	417 case ' ':

413 case '\t':	418 case '\t':

414 Advance();	419 Advance();

415 token = Token::WHITESPACE;	420 token = Token::WHITESPACE;

416 break;	421 break;

(...skipping 202 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
619 break;	624 break;

620	625

621 case '?':	626 case '?':

622 token = Select(Token::CONDITIONAL);	627 token = Select(Token::CONDITIONAL);

623 break;	628 break;

624	629

625 case '~':	630 case '~':

626 token = Select(Token::BIT_NOT);	631 token = Select(Token::BIT_NOT);

627 break;	632 break;

628	633

	634 case '`':

	635 if (HarmonyTemplates()) {

	636 token = ScanTemplateSpan();

	637 break;

	638 }

	639

629 default:	640 default:

630 if (c0_ < 0) {	641 if (c0_ < 0) {

631 token = Token::EOS;	642 token = Token::EOS;

632 } else if (unicode_cache_->IsIdentifierStart(c0_)) {	643 } else if (unicode_cache_->IsIdentifierStart(c0_)) {

633 token = ScanIdentifierOrKeyword();	644 token = ScanIdentifierOrKeyword();

634 } else if (IsDecimalDigit(c0_)) {	645 } else if (IsDecimalDigit(c0_)) {

635 token = ScanNumber(false);	646 token = ScanNumber(false);

636 } else if (SkipWhiteSpace()) {	647 } else if (SkipWhiteSpace()) {

637 token = Token::WHITESPACE;	648 token = Token::WHITESPACE;

638 } else {	649 } else {

(...skipping 25 matching lines...) Expand all Loading...
664 // This function is only called to seek to the location	675 // This function is only called to seek to the location

665 // of the end of a function (at the "}" token). It doesn't matter	676 // of the end of a function (at the "}" token). It doesn't matter

666 // whether there was a line terminator in the part we skip.	677 // whether there was a line terminator in the part we skip.

667 has_line_terminator_before_next_ = false;	678 has_line_terminator_before_next_ = false;

668 has_multiline_comment_before_next_ = false;	679 has_multiline_comment_before_next_ = false;

669 }	680 }

670 Scan();	681 Scan();

671 }	682 }

672	683

673	684

674 bool Scanner::ScanEscape() {	685 bool Scanner::ScanEscape(bool recordRaw) {

675 uc32 c = c0_;	686 uc32 c = c0_;

	687 uc32 rc = c;

	688 bool singleCharEscape = true;

676 Advance();	689 Advance();

677	690

678 // Skip escaped newlines.	691 // Skip escaped newlines.

679 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {	692 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {

680 // Allow CR+LF newlines in multiline string literals.	693 // Allow CR+LF newlines in multiline string literals.

681 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();	694 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

682 // Allow LF+CR newlines in multiline string literals.	695 // Allow LF+CR newlines in multiline string literals.

683 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();	696 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();

684 return true;	697 return true;

685 }	698 }

686	699

687 switch (c) {	700 switch (c) {

688 case '\'': // fall through	701 case '\'': // fall through

689 case '"' : // fall through	702 case '"' : // fall through

690 case '\\': break;	703 case '\\': break;

691 case 'b' : c = '\b'; break;	704 case 'b' : c = '\b'; break;

692 case 'f' : c = '\f'; break;	705 case 'f' : c = '\f'; break;

693 case 'n' : c = '\n'; break;	706 case 'n' : c = '\n'; break;

694 case 'r' : c = '\r'; break;	707 case 'r' : c = '\r'; break;

695 case 't' : c = '\t'; break;	708 case 't' : c = '\t'; break;

696 case 'u' : {	709 case 'u' : {

697 c = ScanHexNumber(4);	710 if (recordRaw) AddRawLiteralChar('u');

	711 singleCharEscape = false;

	712 c = ScanHexNumber(4, recordRaw);

698 if (c < 0) return false;	713 if (c < 0) return false;

699 break;	714 break;

700 }	715 }

701 case 'v' : c = '\v'; break;	716 case 'v' : c = '\v'; break;

702 case 'x' : {	717 case 'x' : {

703 c = ScanHexNumber(2);	718 if (recordRaw) AddRawLiteralChar('x');

	719 singleCharEscape = false;

	720 c = ScanHexNumber(2, recordRaw);

704 if (c < 0) return false;	721 if (c < 0) return false;

705 break;	722 break;

706 }	723 }

707 case '0' : // fall through	724 case '0' : // fall through

708 case '1' : // fall through	725 case '1' : // fall through

709 case '2' : // fall through	726 case '2' : // fall through

710 case '3' : // fall through	727 case '3' : // fall through

711 case '4' : // fall through	728 case '4' : // fall through

712 case '5' : // fall through	729 case '5' : // fall through

713 case '6' : // fall through	730 case '6' : // fall through

714 case '7' : c = ScanOctalEscape(c, 2); break;	731 case '7':

	732 singleCharEscape = false;

	733 c = ScanOctalEscape(c, 2, recordRaw);

	734 break;

715 }	735 }

716	736

717 // According to ECMA-262, section 7.8.4, characters not covered by the	737 // According to ECMA-262, section 7.8.4, characters not covered by the

718 // above cases should be illegal, but they are commonly handled as	738 // above cases should be illegal, but they are commonly handled as

719 // non-escaped characters by JS VMs.	739 // non-escaped characters by JS VMs.

	740 if (singleCharEscape && recordRaw) AddRawLiteralChar(rc);

720 AddLiteralChar(c);	741 AddLiteralChar(c);

721 return true;	742 return true;

722 }	743 }

723	744

724	745

725 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of	746 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of

726 // ECMA-262. Other JS VMs support them.	747 // ECMA-262. Other JS VMs support them.

727 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {	748 uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool recordRaw) {

728 uc32 x = c - '0';	749 uc32 x = c - '0';

729 int i = 0;	750 int i = 0;

730 for (; i < length; i++) {	751 for (; i < length; i++) {

731 int d = c0_ - '0';	752 int d = c0_ - '0';

732 if (d < 0 \|\| d > 7) break;	753 if (d < 0 \|\| d > 7) break;

733 int nx = x * 8 + d;	754 int nx = x * 8 + d;

734 if (nx >= 256) break;	755 if (nx >= 256) break;

735 x = nx;	756 x = nx;

	757 if (recordRaw) {

	758 AddRawLiteralChar(c0_);

	759 }

736 Advance();	760 Advance();

737 }	761 }

738 // Anything except '\0' is an octal escape sequence, illegal in strict mode.	762 // Anything except '\0' is an octal escape sequence, illegal in strict mode.

739 // Remember the position of octal escape sequences so that an error	763 // Remember the position of octal escape sequences so that an error

740 // can be reported later (in strict mode).	764 // can be reported later (in strict mode).

741 // We don't report the error immediately, because the octal escape can	765 // We don't report the error immediately, because the octal escape can

742 // occur before the "use strict" directive.	766 // occur before the "use strict" directive.

743 if (c != '0' \|\| i > 0) {	767 if (c != '0' \|\| i > 0) {

744 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);	768 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);

745 }	769 }

(...skipping 17 matching lines...) Expand all Loading...
763 }	787 }

764 }	788 }

765 if (c0_ != quote) return Token::ILLEGAL;	789 if (c0_ != quote) return Token::ILLEGAL;

766 literal.Complete();	790 literal.Complete();

767	791

768 Advance(); // consume quote	792 Advance(); // consume quote

769 return Token::STRING;	793 return Token::STRING;

770 }	794 }

771	795

772	796

	797 Token::Value Scanner::ScanTemplateSpan() {

	798 // When scanning a TemplateSpan, we are looking for the following construct:

	799 // TEMPLATE_SPAN ::

	800 // ` LiteralChars* ${

	801 // \| } LiteralChars* ${

	802 //

	803 // TEMPLATE_TAIL ::

	804 // ` LiteralChars* `

	805 // \| } LiteralChar* `

	806 //

	807 // A TEMPLATE_SPAN should always be followed by an Expression, while a

	808 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be

	809 // followed by an Expression.

	810 //

	811 // raw_literal_chars_ represents TRV or the raw value of the template span,

	812 // per the spec, while literal_chars_ represents TV or the cooked value of

	813 // the template span.

	814 //

	815 // TODO(caitp): Do not store a separate literal buffer for the span TRV.

	816 //

	817

	818 if (next_.token == Token::RBRACE) {

	819 PushBack('}');
	marja 2014/11/11 09:47:24 Hmm, when does this happen and why do we PushBack? Hmm, when does this happen and why do we PushBack? caitp (gmail) 2014/11/11 13:59:29 After parsing an expression, the scanner ends up w Show quoted text On 2014/11/11 09:47:24, marja wrote: > Hmm, when does this happen and why do we PushBack? After parsing an expression, the scanner ends up with a peeked RBRACE and we're at the wrong position in source, I'm not sure if this is the right way to deal with that but it seems to work. Without this, it if you have a template like `${a}`, it misses the ending backtick and breaks, which is bad marja 2014/11/11 15:01:56 But after this, we anyway do Advance() right away. Show quoted text On 2014/11/11 13:59:29, caitp wrote: > On 2014/11/11 09:47:24, marja wrote: > > Hmm, when does this happen and why do we PushBack? > > After parsing an expression, the scanner ends up with a peeked RBRACE and we're > at the wrong position in source, I'm not sure if this is the right way to deal > with that but it seems to work. > > Without this, it if you have a template like `${a}`, it misses the ending > backtick and breaks, which is bad But after this, we anyway do Advance() right away. Would the code be clearer if it was restructured to be PushBack-free?
	820 }

	821 next_.location.beg_pos = source_pos();

	822 Token::Value result = Token::ILLEGAL;

	823 DCHECK(c0_ == '`' \|\| c0_ == '}');

	824 Advance(); // Consume ` or }

	825

	826 LiteralScope literal(this);

	827 while (true) {

	828 uc32 c = c0_;

	829 Advance();

	830 if (c == '`') {

	831 result = Token::TEMPLATE_TAIL;

	832 break;

	833 } else if (c == '$' && c0_ == '{') {

	834 Advance(); // Consume '{'

	835 result = Token::TEMPLATE_SPAN;

	836 break;

	837 } else if (c == '\\') {

	838 AddRawLiteralChar('\\');

	839 if (unicode_cache_->IsLineTerminator(c0_)) {

	840 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty

	841 // code unit sequence.

	842 do {

	843 uc32 lastChar = c0_;

	844 Advance();

	845 if (lastChar == '\r' && c0_ == '\n') Advance();

	846 AddRawLiteralChar('\n');

	847 } while (unicode_cache_->IsLineTerminator(c0_));

	848 } else if (c0_ == '0') {

	849 Advance();

	850 AddRawLiteralChar('0');

	851 AddLiteralChar('0');

	852 } else {

	853 ScanEscape(true);

	854 }

	855 } else if (c < 0) {

	856 // Unterminated template literal

	857 PushBack(c);

	858 break;

	859 } else {

	860 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.

	861 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence

	862 // consisting of the CV 0x000A.

	863 if (c == '\r') {

	864 if (c0_ == '\n') Advance();

	865 c = '\n';

	866 }

	867 AddLiteralChar(c);

	868 AddRawLiteralChar(c);

	869 }

	870 }

	871 literal.Complete();

	872 next_.location.end_pos = source_pos();

	873 next_.token = result;

	874 return result;

	875 }

	876

	877

773 void Scanner::ScanDecimalDigits() {	878 void Scanner::ScanDecimalDigits() {

774 while (IsDecimalDigit(c0_))	879 while (IsDecimalDigit(c0_))

775 AddLiteralCharAdvance();	880 AddLiteralCharAdvance();

776 }	881 }

777	882

778	883

779 Token::Value Scanner::ScanNumber(bool seen_period) {	884 Token::Value Scanner::ScanNumber(bool seen_period) {

780 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction	885 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction

781	886

782 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;	887 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;

(...skipping 373 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1156	1261

1157	1262

1158 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {	1263 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {

1159 if (is_literal_one_byte()) {	1264 if (is_literal_one_byte()) {

1160 return ast_value_factory->GetOneByteString(literal_one_byte_string());	1265 return ast_value_factory->GetOneByteString(literal_one_byte_string());

1161 }	1266 }

1162 return ast_value_factory->GetTwoByteString(literal_two_byte_string());	1267 return ast_value_factory->GetTwoByteString(literal_two_byte_string());

1163 }	1268 }

1164	1269

1165	1270

	1271 const AstRawString* Scanner::CurrentRawSymbol(

	1272 AstValueFactory* ast_value_factory) {

	1273 if (is_raw_one_byte()) {

	1274 return ast_value_factory->GetOneByteString(raw_one_byte_string());

	1275 }

	1276 return ast_value_factory->GetTwoByteString(raw_two_byte_string());

	1277 }

	1278

	1279

1166 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {	1280 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {

1167 if (is_next_literal_one_byte()) {	1281 if (is_next_literal_one_byte()) {

1168 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());	1282 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());

1169 }	1283 }

1170 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());	1284 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());

1171 }	1285 }

1172	1286

1173	1287

	1288 const AstRawString* Scanner::NextRawSymbol(AstValueFactory* ast_value_factory) {

	1289 if (is_next_raw_one_byte()) {

	1290 return ast_value_factory->GetOneByteString(next_raw_one_byte_string());

	1291 }

	1292 return ast_value_factory->GetTwoByteString(next_raw_two_byte_string());

	1293 }

	1294

	1295

1174 double Scanner::DoubleValue() {	1296 double Scanner::DoubleValue() {

1175 DCHECK(is_literal_one_byte());	1297 DCHECK(is_literal_one_byte());

1176 return StringToDouble(	1298 return StringToDouble(

1177 unicode_cache_,	1299 unicode_cache_,

1178 literal_one_byte_string(),	1300 literal_one_byte_string(),

1179 ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY);	1301 ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY);

1180 }	1302 }

1181	1303

1182	1304

1183 int Scanner::FindNumber(DuplicateFinder* finder, int value) {	1305 int Scanner::FindNumber(DuplicateFinder* finder, int value) {

(...skipping 142 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1326 }	1448 }

1327 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1449 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1328 }	1450 }

1329 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1451 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1330	1452

1331 backing_store_.AddBlock(bytes);	1453 backing_store_.AddBlock(bytes);

1332 return backing_store_.EndSequence().start();	1454 return backing_store_.EndSequence().start();

1333 }	1455 }

1334	1456

1335 } } // namespace v8::internal	1457 } } // namespace v8::internal

OLD	NEW

« src/preparser.h ('K') | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »