src/scanner.cc - Issue 663683006: Implement ES6 Template Literals

Side by Side Diff: src/scanner.cc

Issue 663683006: Implement ES6 Template Literals (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: More tests again Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <cmath>	9 #include <cmath>

10	10

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
47 // in object literals.	47 // in object literals.

48 Init();	48 Init();

49 // Skip initial whitespace allowing HTML comment ends just like	49 // Skip initial whitespace allowing HTML comment ends just like

50 // after a newline and scan first token.	50 // after a newline and scan first token.

51 has_line_terminator_before_next_ = true;	51 has_line_terminator_before_next_ = true;

52 SkipWhiteSpace();	52 SkipWhiteSpace();

53 Scan();	53 Scan();

54 }	54 }

55	55

56	56

57 uc32 Scanner::ScanHexNumber(int expected_length) {	57 uc32 Scanner::ScanHexNumber(int expected_length, bool recordRaw) {

58 DCHECK(expected_length <= 4); // prevent overflow	58 DCHECK(expected_length <= 4); // prevent overflow

59	59

60 uc32 x = 0;	60 uc32 x = 0;

61 for (int i = 0; i < expected_length; i++) {	61 for (int i = 0; i < expected_length; i++) {

62 int d = HexValue(c0_);	62 int d = HexValue(c0_);

63 if (d < 0) {	63 if (d < 0) {

64 return -1;	64 return -1;

65 }	65 }

66 x = x * 16 + d;	66 x = x * 16 + d;

	67 if (recordRaw) {

	68 AddRawLiteralChar(c0_);

	69 }

67 Advance();	70 Advance();

68 }	71 }

69	72

70 return x;	73 return x;

71 }	74 }

72	75

73	76

74 // Ensure that tokens can be stored in a byte.	77 // Ensure that tokens can be stored in a byte.

75 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);	78 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);

76	79

(...skipping 319 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
396 PushBack('-'); // undo Advance()	399 PushBack('-'); // undo Advance()

397 }	400 }

398 PushBack('!'); // undo Advance()	401 PushBack('!'); // undo Advance()

399 DCHECK(c0_ == '!');	402 DCHECK(c0_ == '!');

400 return Token::LT;	403 return Token::LT;

401 }	404 }

402	405

403	406

404 void Scanner::Scan() {	407 void Scanner::Scan() {

405 next_.literal_chars = NULL;	408 next_.literal_chars = NULL;

	409 next_.raw_literal_chars = NULL;

406 Token::Value token;	410 Token::Value token;

	411

407 do {	412 do {

408 // Remember the position of the next token	413 // Remember the position of the next token

409 next_.location.beg_pos = source_pos();	414 next_.location.beg_pos = source_pos();

410	415

411 switch (c0_) {	416 switch (c0_) {

412 case ' ':	417 case ' ':

413 case '\t':	418 case '\t':

414 Advance();	419 Advance();

415 token = Token::WHITESPACE;	420 token = Token::WHITESPACE;

416 break;	421 break;

(...skipping 202 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
619 break;	624 break;

620	625

621 case '?':	626 case '?':

622 token = Select(Token::CONDITIONAL);	627 token = Select(Token::CONDITIONAL);

623 break;	628 break;

624	629

625 case '~':	630 case '~':

626 token = Select(Token::BIT_NOT);	631 token = Select(Token::BIT_NOT);

627 break;	632 break;

628	633

	634 case '`':

	635 if (HarmonyTemplates()) {

	636 token = ScanTemplateSpan();

	637 break;

	638 }

	639

629 default:	640 default:

630 if (c0_ < 0) {	641 if (c0_ < 0) {

631 token = Token::EOS;	642 token = Token::EOS;

632 } else if (unicode_cache_->IsIdentifierStart(c0_)) {	643 } else if (unicode_cache_->IsIdentifierStart(c0_)) {

633 token = ScanIdentifierOrKeyword();	644 token = ScanIdentifierOrKeyword();

634 } else if (IsDecimalDigit(c0_)) {	645 } else if (IsDecimalDigit(c0_)) {

635 token = ScanNumber(false);	646 token = ScanNumber(false);

636 } else if (SkipWhiteSpace()) {	647 } else if (SkipWhiteSpace()) {

637 token = Token::WHITESPACE;	648 token = Token::WHITESPACE;

638 } else {	649 } else {

(...skipping 25 matching lines...) Expand all Loading...
664 // This function is only called to seek to the location	675 // This function is only called to seek to the location

665 // of the end of a function (at the "}" token). It doesn't matter	676 // of the end of a function (at the "}" token). It doesn't matter

666 // whether there was a line terminator in the part we skip.	677 // whether there was a line terminator in the part we skip.

667 has_line_terminator_before_next_ = false;	678 has_line_terminator_before_next_ = false;

668 has_multiline_comment_before_next_ = false;	679 has_multiline_comment_before_next_ = false;

669 }	680 }

670 Scan();	681 Scan();

671 }	682 }

672	683

673	684

674 bool Scanner::ScanEscape() {	685 bool Scanner::ScanEscape(bool recordRaw) {

675 uc32 c = c0_;	686 uc32 c = c0_;

	687 uc32 rc = c;

	688 bool singleCharEscape = true;

676 Advance();	689 Advance();

677	690

678 // Skip escaped newlines.	691 // Skip escaped newlines.

679 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {	692 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {

680 // Allow CR+LF newlines in multiline string literals.	693 // Allow CR+LF newlines in multiline string literals.

681 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();	694 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

682 // Allow LF+CR newlines in multiline string literals.	695 // Allow LF+CR newlines in multiline string literals.

683 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();	696 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();

684 return true;	697 return true;

685 }	698 }

686	699

687 switch (c) {	700 switch (c) {

688 case '\'': // fall through	701 case '\'': // fall through

689 case '"' : // fall through	702 case '"' : // fall through

690 case '\\': break;	703 case '\\': break;

691 case 'b' : c = '\b'; break;	704 case 'b' : c = '\b'; break;

692 case 'f' : c = '\f'; break;	705 case 'f' : c = '\f'; break;

693 case 'n' : c = '\n'; break;	706 case 'n' : c = '\n'; break;

694 case 'r' : c = '\r'; break;	707 case 'r' : c = '\r'; break;

695 case 't' : c = '\t'; break;	708 case 't' : c = '\t'; break;

696 case 'u' : {	709 case 'u' : {

697 c = ScanHexNumber(4);	710 if (recordRaw) AddRawLiteralChar('u');

	711 singleCharEscape = false;

	712 c = ScanHexNumber(4, recordRaw);

698 if (c < 0) return false;	713 if (c < 0) return false;

699 break;	714 break;

700 }	715 }

701 case 'v' : c = '\v'; break;	716 case 'v' : c = '\v'; break;

702 case 'x' : {	717 case 'x' : {

703 c = ScanHexNumber(2);	718 if (recordRaw) AddRawLiteralChar('x');

	719 singleCharEscape = false;

	720 c = ScanHexNumber(2, recordRaw);

704 if (c < 0) return false;	721 if (c < 0) return false;

705 break;	722 break;

706 }	723 }

707 case '0' : // fall through	724 case '0' : // fall through

708 case '1' : // fall through	725 case '1' : // fall through

709 case '2' : // fall through	726 case '2' : // fall through

710 case '3' : // fall through	727 case '3' : // fall through

711 case '4' : // fall through	728 case '4' : // fall through

712 case '5' : // fall through	729 case '5' : // fall through

713 case '6' : // fall through	730 case '6' : // fall through

714 case '7' : c = ScanOctalEscape(c, 2); break;	731 case '7':

	732 singleCharEscape = false;

	733 c = ScanOctalEscape(c, 2, recordRaw);

	734 break;

715 }	735 }

716	736

717 // According to ECMA-262, section 7.8.4, characters not covered by the	737 // According to ECMA-262, section 7.8.4, characters not covered by the

718 // above cases should be illegal, but they are commonly handled as	738 // above cases should be illegal, but they are commonly handled as

719 // non-escaped characters by JS VMs.	739 // non-escaped characters by JS VMs.

	740 if (singleCharEscape && recordRaw) AddRawLiteralChar(rc);

720 AddLiteralChar(c);	741 AddLiteralChar(c);

721 return true;	742 return true;

722 }	743 }

723	744

724	745

725 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of	746 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of

726 // ECMA-262. Other JS VMs support them.	747 // ECMA-262. Other JS VMs support them.

727 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {	748 uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool recordRaw) {

728 uc32 x = c - '0';	749 uc32 x = c - '0';

729 int i = 0;	750 int i = 0;

730 for (; i < length; i++) {	751 for (; i < length; i++) {

731 int d = c0_ - '0';	752 int d = c0_ - '0';

732 if (d < 0 \|\| d > 7) break;	753 if (d < 0 \|\| d > 7) break;

733 int nx = x * 8 + d;	754 int nx = x * 8 + d;

734 if (nx >= 256) break;	755 if (nx >= 256) break;

735 x = nx;	756 x = nx;

	757 if (recordRaw) {

	758 AddRawLiteralChar(c0_);

	759 }

736 Advance();	760 Advance();

737 }	761 }

738 // Anything except '\0' is an octal escape sequence, illegal in strict mode.	762 // Anything except '\0' is an octal escape sequence, illegal in strict mode.

739 // Remember the position of octal escape sequences so that an error	763 // Remember the position of octal escape sequences so that an error

740 // can be reported later (in strict mode).	764 // can be reported later (in strict mode).

741 // We don't report the error immediately, because the octal escape can	765 // We don't report the error immediately, because the octal escape can

742 // occur before the "use strict" directive.	766 // occur before the "use strict" directive.

743 if (c != '0' \|\| i > 0) {	767 if (c != '0' \|\| i > 0) {

744 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);	768 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);

745 }	769 }

(...skipping 17 matching lines...) Expand all Loading...
763 }	787 }

764 }	788 }

765 if (c0_ != quote) return Token::ILLEGAL;	789 if (c0_ != quote) return Token::ILLEGAL;

766 literal.Complete();	790 literal.Complete();

767	791

768 Advance(); // consume quote	792 Advance(); // consume quote

769 return Token::STRING;	793 return Token::STRING;

770 }	794 }

771	795

772	796

	797 Token::Value Scanner::ScanTemplateSpan() {
	marja 2014/11/10 15:32:02 Hmm, I don't fully understand this function. Pls Hmm, I don't fully understand this function. Pls add comments: 1) about how literal_chars_ and raw_literal_chars_ are used and why (I was briefly confused when I read that part at first...) 2) about which part exactly this function scans; what's the returned token and what's in the literal buffer... I'm also confused because the terminology doesn't seem to match the spec draft... what's "TEMPLATE_SPAN"? (The spec has stuff like template head, template middle, template tail..) caitp (gmail) 2014/11/10 15:43:56 literal_chars_ are being used as the TV or cooked Show quoted text On 2014/11/10 15:32:02, marja wrote: > Hmm, I don't fully understand this function. > > Pls add comments: > > 1) about how literal_chars_ and raw_literal_chars_ are used and why (I was > briefly confused when I read that part at first...) > 2) about which part exactly this function scans; what's the returned token and > what's in the literal buffer... > > I'm also confused because the terminology doesn't seem to match the spec > draft... what's "TEMPLATE_SPAN"? (The spec has stuff like template head, > template middle, template tail..) literal_chars_ are being used as the TV or cooked value of the template (escaped characters -> unescaped form, basically), raw_literal_chars_ are being used as the TRV or raw value of the template (escaped characters as they appear in the source, basically). arv@ has suggested that we do this without keeping a separate string for the raw values, but I don't know how it will actually work in all cases, since raw values need to normalize line endings anyways. Ideas on that are welcome. So the spec has concept of template head -> template middle -> template end -> etc, but in practice I think there are only 2 cases that matter: the case where you have a literal span followed by an expression, and the case where you have just a literal span. TEMPLATE_SPAN -> always followed by an expression (ends with ${), while TEMPLATE_TAIL -> always followed by ` or the end of the template literal (if not, then it's a bad token). Basically, all of the NoSubstTemplate productions become TEMPLATE_TAIL, and all of the other template productions become TEMPLATE_SPAN, I think it's simpler this way
	798 if (next_.token == Token::RBRACE) {

	799 PushBack('}');

	800 }

	801 next_.location.beg_pos = source_pos();

	802 Token::Value result = Token::ILLEGAL;

	803 DCHECK(c0_ == '`' \|\| c0_ == '}');

	804 Advance(); // Consume ` or }

	805

	806 LiteralScope literal(this);

	807 while (true) {

	808 uc32 c = c0_;

	809 Advance();

	810 if (c == '`') {

	811 result = Token::TEMPLATE_TAIL;

	812 break;

	813 } else if (c == '$' && c0_ == '{') {

	814 Advance(); // Consume '{'

	815 result = Token::TEMPLATE_SPAN;

	816 break;

	817 } else if (c == '\\') {

	818 AddRawLiteralChar('\\');

	819 if (unicode_cache_->IsLineTerminator(c0_)) {

	820 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty

	821 // code unit sequence.

	822 do {

	823 uc32 lastChar = c0_;

	824 Advance();

	825 if (lastChar == '\r' && c0_ == '\n') Advance();

	826 AddRawLiteralChar('\n');

	827 } while (unicode_cache_->IsLineTerminator(c0_));

	828 } else if (c0_ == '0') {

	829 Advance();

	830 AddRawLiteralChar('0');

	831 AddLiteralChar('0');

	832 } else {

	833 ScanEscape(true);

	834 }

	835 } else if (c < 0) {

	836 // Unterminated template literal

	837 PushBack(c);

	838 break;

	839 } else {

	840 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.

	841 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence

	842 // consisting of the CV 0x000A.

	843 if (c == '\r') {

	844 if (c0_ == '\n') Advance();

	845 c = '\n';

	846 }

	847 AddLiteralChar(c);

	848 AddRawLiteralChar(c);

	849 }

	850 }

	851 literal.Complete();

	852 next_.location.end_pos = source_pos();

	853 next_.token = result;

	854 return result;

	855 }

	856

	857

773 void Scanner::ScanDecimalDigits() {	858 void Scanner::ScanDecimalDigits() {

774 while (IsDecimalDigit(c0_))	859 while (IsDecimalDigit(c0_))

775 AddLiteralCharAdvance();	860 AddLiteralCharAdvance();

776 }	861 }

777	862

778	863

779 Token::Value Scanner::ScanNumber(bool seen_period) {	864 Token::Value Scanner::ScanNumber(bool seen_period) {

780 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction	865 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction

781	866

782 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;	867 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;

(...skipping 394 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1177	1262

1178	1263

1179 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {	1264 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {

1180 if (is_literal_one_byte()) {	1265 if (is_literal_one_byte()) {

1181 return ast_value_factory->GetOneByteString(literal_one_byte_string());	1266 return ast_value_factory->GetOneByteString(literal_one_byte_string());

1182 }	1267 }

1183 return ast_value_factory->GetTwoByteString(literal_two_byte_string());	1268 return ast_value_factory->GetTwoByteString(literal_two_byte_string());

1184 }	1269 }

1185	1270

1186	1271

	1272 const AstRawString* Scanner::CurrentRawSymbol(

	1273 AstValueFactory* ast_value_factory) {

	1274 if (is_raw_one_byte()) {

	1275 return ast_value_factory->GetOneByteString(raw_one_byte_string());

	1276 }

	1277 return ast_value_factory->GetTwoByteString(raw_two_byte_string());

	1278 }

	1279

	1280

1187 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {	1281 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {

1188 if (is_next_literal_one_byte()) {	1282 if (is_next_literal_one_byte()) {

1189 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());	1283 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());

1190 }	1284 }

1191 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());	1285 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());

1192 }	1286 }

1193	1287

1194	1288

	1289 const AstRawString* Scanner::NextRawSymbol(AstValueFactory* ast_value_factory) {

	1290 if (is_next_raw_one_byte()) {

	1291 return ast_value_factory->GetOneByteString(next_raw_one_byte_string());

	1292 }

	1293 return ast_value_factory->GetTwoByteString(next_raw_two_byte_string());

	1294 }

	1295

	1296

1195 double Scanner::DoubleValue() {	1297 double Scanner::DoubleValue() {

1196 DCHECK(is_literal_one_byte());	1298 DCHECK(is_literal_one_byte());

1197 return StringToDouble(	1299 return StringToDouble(

1198 unicode_cache_,	1300 unicode_cache_,

1199 literal_one_byte_string(),	1301 literal_one_byte_string(),

1200 ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY);	1302 ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY);

1201 }	1303 }

1202	1304

1203	1305

1204 int Scanner::FindNumber(DuplicateFinder* finder, int value) {	1306 int Scanner::FindNumber(DuplicateFinder* finder, int value) {

(...skipping 142 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1347 }	1449 }

1348 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1450 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1349 }	1451 }

1350 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1452 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1351	1453

1352 backing_store_.AddBlock(bytes);	1454 backing_store_.AddBlock(bytes);

1353 return backing_store_.EndSequence().start();	1455 return backing_store_.EndSequence().start();

1354 }	1456 }

1355	1457

1356 } } // namespace v8::internal	1458 } } // namespace v8::internal

OLD	NEW

« src/preparser.h ('K') | « src/scanner.h ('k') | src/token.h » ('j') | test/mjsunit/es6/templates.js » ('J')