src/scanner.cc - Issue 663683006: Implement ES6 Template Literals

Side by Side Diff: src/scanner.cc

Issue 663683006: Implement ES6 Template Literals (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Prevent fall-through to template token handlers Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <cmath>	9 #include <cmath>

10	10

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
47 // in object literals.	47 // in object literals.

48 Init();	48 Init();

49 // Skip initial whitespace allowing HTML comment ends just like	49 // Skip initial whitespace allowing HTML comment ends just like

50 // after a newline and scan first token.	50 // after a newline and scan first token.

51 has_line_terminator_before_next_ = true;	51 has_line_terminator_before_next_ = true;

52 SkipWhiteSpace();	52 SkipWhiteSpace();

53 Scan();	53 Scan();

54 }	54 }

55	55

56	56

57 uc32 Scanner::ScanHexNumber(int expected_length) {	57 uc32 Scanner::ScanHexNumber(int expected_length, bool recordRaw) {

58 DCHECK(expected_length <= 4); // prevent overflow	58 DCHECK(expected_length <= 4); // prevent overflow

59	59

60 uc32 digits[4] = { 0, 0, 0, 0 };	60 uc32 digits[4] = { 0, 0, 0, 0 };

61 uc32 x = 0;	61 uc32 x = 0;

62 for (int i = 0; i < expected_length; i++) {	62 for (int i = 0; i < expected_length; i++) {

63 digits[i] = c0_;	63 digits[i] = c0_;

64 int d = HexValue(c0_);	64 int d = HexValue(c0_);

65 if (d < 0) {	65 if (d < 0) {

66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes	66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes

67 // should be illegal, but other JS VMs just return the	67 // should be illegal, but other JS VMs just return the

68 // non-escaped version of the original character.	68 // non-escaped version of the original character.

69	69

70 // Push back digits that we have advanced past.	70 // Push back digits that we have advanced past.

71 for (int j = i-1; j >= 0; j--) {	71 for (int j = i-1; j >= 0; j--) {

72 PushBack(digits[j]);	72 PushBack(digits[j]);

73 }	73 }

74 return -1;	74 return -1;

75 }	75 }

76 x = x * 16 + d;	76 x = x * 16 + d;

	77 if (recordRaw) {

	78 AddRawLiteralChar(c0_);

	79 }

77 Advance();	80 Advance();

78 }	81 }

79	82

80 return x;	83 return x;

81 }	84 }

82	85

83	86

84 // Ensure that tokens can be stored in a byte.	87 // Ensure that tokens can be stored in a byte.

85 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);	88 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);

86	89

(...skipping 317 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
404 PushBack('-'); // undo Advance()	407 PushBack('-'); // undo Advance()

405 }	408 }

406 PushBack('!'); // undo Advance()	409 PushBack('!'); // undo Advance()

407 DCHECK(c0_ == '!');	410 DCHECK(c0_ == '!');

408 return Token::LT;	411 return Token::LT;

409 }	412 }

410	413

411	414

412 void Scanner::Scan() {	415 void Scanner::Scan() {

413 next_.literal_chars = NULL;	416 next_.literal_chars = NULL;

	417 next_.raw_literal_chars = NULL;

414 Token::Value token;	418 Token::Value token;

	419

415 do {	420 do {

416 // Remember the position of the next token	421 // Remember the position of the next token

417 next_.location.beg_pos = source_pos();	422 next_.location.beg_pos = source_pos();

418	423

419 switch (c0_) {	424 switch (c0_) {

420 case ' ':	425 case ' ':

421 case '\t':	426 case '\t':

422 Advance();	427 Advance();

423 token = Token::WHITESPACE;	428 token = Token::WHITESPACE;

424 break;	429 break;

(...skipping 202 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
627 break;	632 break;

628	633

629 case '?':	634 case '?':

630 token = Select(Token::CONDITIONAL);	635 token = Select(Token::CONDITIONAL);

631 break;	636 break;

632	637

633 case '~':	638 case '~':

634 token = Select(Token::BIT_NOT);	639 token = Select(Token::BIT_NOT);

635 break;	640 break;

636	641

	642 case '`':

	643 if (HarmonyTemplates()) {

	644 token = ScanTemplateSpan();

	645 break;

	646 }

	647

637 default:	648 default:

638 if (unicode_cache_->IsIdentifierStart(c0_)) {	649 if (unicode_cache_->IsIdentifierStart(c0_)) {

639 token = ScanIdentifierOrKeyword();	650 token = ScanIdentifierOrKeyword();

640 } else if (IsDecimalDigit(c0_)) {	651 } else if (IsDecimalDigit(c0_)) {

641 token = ScanNumber(false);	652 token = ScanNumber(false);

642 } else if (SkipWhiteSpace()) {	653 } else if (SkipWhiteSpace()) {

643 token = Token::WHITESPACE;	654 token = Token::WHITESPACE;

644 } else if (c0_ < 0) {	655 } else if (c0_ < 0) {

645 token = Token::EOS;	656 token = Token::EOS;

646 } else {	657 } else {

(...skipping 25 matching lines...) Expand all Loading...
672 // This function is only called to seek to the location	683 // This function is only called to seek to the location

673 // of the end of a function (at the "}" token). It doesn't matter	684 // of the end of a function (at the "}" token). It doesn't matter

674 // whether there was a line terminator in the part we skip.	685 // whether there was a line terminator in the part we skip.

675 has_line_terminator_before_next_ = false;	686 has_line_terminator_before_next_ = false;

676 has_multiline_comment_before_next_ = false;	687 has_multiline_comment_before_next_ = false;

677 }	688 }

678 Scan();	689 Scan();

679 }	690 }

680	691

681	692

682 bool Scanner::ScanEscape() {	693 bool Scanner::ScanEscape(bool recordRaw) {

683 uc32 c = c0_;	694 uc32 c = c0_;

	695 uc32 rc = c;

	696 bool singleCharEscape = true;

684 Advance();	697 Advance();

685	698

686 // Skip escaped newlines.	699 // Skip escaped newlines.

687 if (unicode_cache_->IsLineTerminator(c)) {	700 if (unicode_cache_->IsLineTerminator(c)) {

688 // Allow CR+LF newlines in multiline string literals.	701 // Allow CR+LF newlines in multiline string literals.

689 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();	702 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

690 // Allow LF+CR newlines in multiline string literals.	703 // Allow LF+CR newlines in multiline string literals.

691 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();	704 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();

692 return true;	705 return true;

693 }	706 }

694	707

695 switch (c) {	708 switch (c) {

696 case '\'': // fall through	709 case '\'': // fall through

697 case '"' : // fall through	710 case '"' : // fall through

698 case '\\': break;	711 case '\\': break;

699 case 'b' : c = '\b'; break;	712 case 'b' : c = '\b'; break;

700 case 'f' : c = '\f'; break;	713 case 'f' : c = '\f'; break;

701 case 'n' : c = '\n'; break;	714 case 'n' : c = '\n'; break;

702 case 'r' : c = '\r'; break;	715 case 'r' : c = '\r'; break;

703 case 't' : c = '\t'; break;	716 case 't' : c = '\t'; break;

704 case 'u' : {	717 case 'u' : {

705 c = ScanHexNumber(4);	718 if (recordRaw) AddRawLiteralChar('u');

	719 singleCharEscape = false;

	720 c = ScanHexNumber(4, recordRaw);

706 if (c < 0) return false;	721 if (c < 0) return false;

707 break;	722 break;

708 }	723 }

709 case 'v' : c = '\v'; break;	724 case 'v' : c = '\v'; break;

710 case 'x' : {	725 case 'x' : {

711 c = ScanHexNumber(2);	726 if (recordRaw) AddRawLiteralChar('x');

	727 singleCharEscape = false;

	728 c = ScanHexNumber(2, recordRaw);

712 if (c < 0) return false;	729 if (c < 0) return false;

713 break;	730 break;

714 }	731 }

715 case '0' : // fall through	732 case '0' : // fall through

716 case '1' : // fall through	733 case '1' : // fall through

717 case '2' : // fall through	734 case '2' : // fall through

718 case '3' : // fall through	735 case '3' : // fall through

719 case '4' : // fall through	736 case '4' : // fall through

720 case '5' : // fall through	737 case '5' : // fall through

721 case '6' : // fall through	738 case '6' : // fall through

722 case '7' : c = ScanOctalEscape(c, 2); break;	739 case '7':

	740 singleCharEscape = false;

	741 c = ScanOctalEscape(c, 2, recordRaw);

	742 break;

723 }	743 }

724	744

725 // According to ECMA-262, section 7.8.4, characters not covered by the	745 // According to ECMA-262, section 7.8.4, characters not covered by the

726 // above cases should be illegal, but they are commonly handled as	746 // above cases should be illegal, but they are commonly handled as

727 // non-escaped characters by JS VMs.	747 // non-escaped characters by JS VMs.

	748 if (singleCharEscape && recordRaw) AddRawLiteralChar(rc);

728 AddLiteralChar(c);	749 AddLiteralChar(c);

729 return true;	750 return true;

730 }	751 }

731	752

732	753

733 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of	754 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of

734 // ECMA-262. Other JS VMs support them.	755 // ECMA-262. Other JS VMs support them.

735 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {	756 uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool recordRaw) {

736 uc32 x = c - '0';	757 uc32 x = c - '0';

737 int i = 0;	758 int i = 0;

738 for (; i < length; i++) {	759 for (; i < length; i++) {

739 int d = c0_ - '0';	760 int d = c0_ - '0';

740 if (d < 0 \|\| d > 7) break;	761 if (d < 0 \|\| d > 7) break;

741 int nx = x * 8 + d;	762 int nx = x * 8 + d;

742 if (nx >= 256) break;	763 if (nx >= 256) break;

743 x = nx;	764 x = nx;

	765 if (recordRaw) {

	766 AddRawLiteralChar(c0_);

	767 }

744 Advance();	768 Advance();

745 }	769 }

746 // Anything except '\0' is an octal escape sequence, illegal in strict mode.	770 // Anything except '\0' is an octal escape sequence, illegal in strict mode.

747 // Remember the position of octal escape sequences so that an error	771 // Remember the position of octal escape sequences so that an error

748 // can be reported later (in strict mode).	772 // can be reported later (in strict mode).

749 // We don't report the error immediately, because the octal escape can	773 // We don't report the error immediately, because the octal escape can

750 // occur before the "use strict" directive.	774 // occur before the "use strict" directive.

751 if (c != '0' \|\| i > 0) {	775 if (c != '0' \|\| i > 0) {

752 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);	776 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);

753 }	777 }

(...skipping 17 matching lines...) Expand all Loading...
771 }	795 }

772 }	796 }

773 if (c0_ != quote) return Token::ILLEGAL;	797 if (c0_ != quote) return Token::ILLEGAL;

774 literal.Complete();	798 literal.Complete();

775	799

776 Advance(); // consume quote	800 Advance(); // consume quote

777 return Token::STRING;	801 return Token::STRING;

778 }	802 }

779	803

780	804

	805 Token::Value Scanner::ScanTemplateSpan() {

	806 if (next_.token == Token::RBRACE) {

	807 PushBack('}');

	808 }

	809 next_.location.beg_pos = source_pos();

	810 Token::Value result = Token::ILLEGAL;

	811 DCHECK(c0_ == '`' \|\| c0_ == '}');

	812 Advance(); // Consume ` or }

	813

	814 LiteralScope literal(this);

	815 while (true) {

	816 uc32 c = c0_;

	817 Advance();

	818 if (c == '`') {

	819 result = Token::TEMPLATE_TAIL;

	820 break;

	821 } else if (c == '$' && c0_ == '{') {

	822 Advance(); // Consume '{'

	823 result = Token::TEMPLATE_SPAN;

	824 break;

	825 } else if (c == '\\') {

	826 AddRawLiteralChar('\\');

	827 if (unicode_cache_->IsLineTerminator(c0_)) {

	828 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty

	829 // code unit sequence.

	830 do {

	831 uc32 lastChar = c0_;

	832 Advance();

	833 if (lastChar == '\r' && c0_ == '\n') Advance();

	834 AddRawLiteralChar('\n');

	835 } while (unicode_cache_->IsLineTerminator(c0_));

	836 } else if (c0_ == '0') {

	837 Advance();

	838 AddRawLiteralChar('0');

	839 AddLiteralChar('0');

	840 } else {

	841 ScanEscape(true);

	842 }

	843 } else if (c < 0) {

	844 // Unterminated template literal

	845 PushBack(c);

	846 break;

	847 } else {

	848 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.

	849 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence

	850 // consisting of the CV 0x000A.

	851 if (c == '\r') {

	852 if (c0_ == '\n') Advance();

	853 c = '\n';

	854 }

	855 AddLiteralChar(c);

	856 AddRawLiteralChar(c);

	857 }

	858 }

	859 literal.Complete();

	860 next_.location.end_pos = source_pos();

	861 next_.token = result;

	862 return result;

	863 }

	864

	865

781 void Scanner::ScanDecimalDigits() {	866 void Scanner::ScanDecimalDigits() {

782 while (IsDecimalDigit(c0_))	867 while (IsDecimalDigit(c0_))

783 AddLiteralCharAdvance();	868 AddLiteralCharAdvance();

784 }	869 }

785	870

786	871

787 Token::Value Scanner::ScanNumber(bool seen_period) {	872 Token::Value Scanner::ScanNumber(bool seen_period) {

788 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction	873 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction

789	874

790 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;	875 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;

(...skipping 403 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1194	1279

1195	1280

1196 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {	1281 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {

1197 if (is_literal_one_byte()) {	1282 if (is_literal_one_byte()) {

1198 return ast_value_factory->GetOneByteString(literal_one_byte_string());	1283 return ast_value_factory->GetOneByteString(literal_one_byte_string());

1199 }	1284 }

1200 return ast_value_factory->GetTwoByteString(literal_two_byte_string());	1285 return ast_value_factory->GetTwoByteString(literal_two_byte_string());

1201 }	1286 }

1202	1287

1203	1288

	1289 const AstRawString* Scanner::CurrentRawSymbol(

	1290 AstValueFactory* ast_value_factory) {

	1291 if (is_raw_one_byte()) {

	1292 return ast_value_factory->GetOneByteString(raw_one_byte_string());

	1293 }

	1294 return ast_value_factory->GetTwoByteString(raw_two_byte_string());

	1295 }

	1296

	1297

1204 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {	1298 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {

1205 if (is_next_literal_one_byte()) {	1299 if (is_next_literal_one_byte()) {

1206 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());	1300 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());

1207 }	1301 }

1208 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());	1302 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());

1209 }	1303 }

1210	1304

1211	1305

	1306 const AstRawString* Scanner::NextRawSymbol(AstValueFactory* ast_value_factory) {

	1307 if (is_next_raw_one_byte()) {

	1308 return ast_value_factory->GetOneByteString(next_raw_one_byte_string());

	1309 }

	1310 return ast_value_factory->GetTwoByteString(next_raw_two_byte_string());

	1311 }

	1312

	1313

1212 double Scanner::DoubleValue() {	1314 double Scanner::DoubleValue() {

1213 DCHECK(is_literal_one_byte());	1315 DCHECK(is_literal_one_byte());

1214 return StringToDouble(	1316 return StringToDouble(

1215 unicode_cache_,	1317 unicode_cache_,

1216 literal_one_byte_string(),	1318 literal_one_byte_string(),

1217 ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY);	1319 ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY);

1218 }	1320 }

1219	1321

1220	1322

1221 int Scanner::FindNumber(DuplicateFinder* finder, int value) {	1323 int Scanner::FindNumber(DuplicateFinder* finder, int value) {

(...skipping 142 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1364 }	1466 }

1365 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1467 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1366 }	1468 }

1367 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1469 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1368	1470

1369 backing_store_.AddBlock(bytes);	1471 backing_store_.AddBlock(bytes);

1370 return backing_store_.EndSequence().start();	1472 return backing_store_.EndSequence().start();

1371 }	1473 }

1372	1474

1373 } } // namespace v8::internal	1475 } } // namespace v8::internal

OLD	NEW

« src/parser.cc ('K') | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »