src/scanner.cc - Issue 663683006: Implement ES6 Template Literals

Side by Side Diff: src/scanner.cc

Issue 663683006: Implement ES6 Template Literals (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Implement tagged template literals Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <cmath>	9 #include <cmath>

10	10

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
47 // in object literals.	47 // in object literals.

48 Init();	48 Init();

49 // Skip initial whitespace allowing HTML comment ends just like	49 // Skip initial whitespace allowing HTML comment ends just like

50 // after a newline and scan first token.	50 // after a newline and scan first token.

51 has_line_terminator_before_next_ = true;	51 has_line_terminator_before_next_ = true;

52 SkipWhiteSpace();	52 SkipWhiteSpace();

53 Scan();	53 Scan();

54 }	54 }

55	55

56	56

57 uc32 Scanner::ScanHexNumber(int expected_length) {	57 uc32 Scanner::ScanHexNumber(int expected_length, bool recordRaw) {

58 DCHECK(expected_length <= 4); // prevent overflow	58 DCHECK(expected_length <= 4); // prevent overflow

59	59

60 uc32 digits[4] = { 0, 0, 0, 0 };	60 uc32 digits[4] = { 0, 0, 0, 0 };

61 uc32 x = 0;	61 uc32 x = 0;

62 for (int i = 0; i < expected_length; i++) {	62 for (int i = 0; i < expected_length; i++) {

63 digits[i] = c0_;	63 digits[i] = c0_;

64 int d = HexValue(c0_);	64 int d = HexValue(c0_);

65 if (d < 0) {	65 if (d < 0) {

66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes	66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes

67 // should be illegal, but other JS VMs just return the	67 // should be illegal, but other JS VMs just return the

68 // non-escaped version of the original character.	68 // non-escaped version of the original character.

69	69

70 // Push back digits that we have advanced past.	70 // Push back digits that we have advanced past.

71 for (int j = i-1; j >= 0; j--) {	71 for (int j = i-1; j >= 0; j--) {

72 PushBack(digits[j]);	72 PushBack(digits[j]);

73 }	73 }

74 return -1;	74 return -1;

75 }	75 }

76 x = x * 16 + d;	76 x = x * 16 + d;

	77 if (recordRaw) {

	78 AddRawLiteralChar(c0_);

	79 }

77 Advance();	80 Advance();

78 }	81 }

79	82

80 return x;	83 return x;

81 }	84 }

82	85

83	86

84 // Ensure that tokens can be stored in a byte.	87 // Ensure that tokens can be stored in a byte.

85 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);	88 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);

86	89

(...skipping 317 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
404 PushBack('-'); // undo Advance()	407 PushBack('-'); // undo Advance()

405 }	408 }

406 PushBack('!'); // undo Advance()	409 PushBack('!'); // undo Advance()

407 DCHECK(c0_ == '!');	410 DCHECK(c0_ == '!');

408 return Token::LT;	411 return Token::LT;

409 }	412 }

410	413

411	414

412 void Scanner::Scan() {	415 void Scanner::Scan() {

413 next_.literal_chars = NULL;	416 next_.literal_chars = NULL;

	417 next_.raw_literal_chars = NULL;

414 Token::Value token;	418 Token::Value token;

	419

415 do {	420 do {

416 // Remember the position of the next token	421 // Remember the position of the next token

417 next_.location.beg_pos = source_pos();	422 next_.location.beg_pos = source_pos();

418	423

419 switch (c0_) {	424 switch (c0_) {

420 case ' ':	425 case ' ':

421 case '\t':	426 case '\t':

422 Advance();	427 Advance();

423 token = Token::WHITESPACE;	428 token = Token::WHITESPACE;

424 break;	429 break;

(...skipping 202 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
627 break;	632 break;

628	633

629 case '?':	634 case '?':

630 token = Select(Token::CONDITIONAL);	635 token = Select(Token::CONDITIONAL);

631 break;	636 break;

632	637

633 case '~':	638 case '~':

634 token = Select(Token::BIT_NOT);	639 token = Select(Token::BIT_NOT);

635 break;	640 break;

636	641

	642 case '`':

	643 if (HarmonyTemplates()) {

	644 token = ScanTemplateSpan();

	645 break;

	646 }

	647

637 default:	648 default:

638 if (unicode_cache_->IsIdentifierStart(c0_)) {	649 if (unicode_cache_->IsIdentifierStart(c0_)) {

639 token = ScanIdentifierOrKeyword();	650 token = ScanIdentifierOrKeyword();

640 } else if (IsDecimalDigit(c0_)) {	651 } else if (IsDecimalDigit(c0_)) {

641 token = ScanNumber(false);	652 token = ScanNumber(false);

642 } else if (SkipWhiteSpace()) {	653 } else if (SkipWhiteSpace()) {

643 token = Token::WHITESPACE;	654 token = Token::WHITESPACE;

644 } else if (c0_ < 0) {	655 } else if (c0_ < 0) {

645 token = Token::EOS;	656 token = Token::EOS;

646 } else {	657 } else {

(...skipping 25 matching lines...) Expand all Loading...
672 // This function is only called to seek to the location	683 // This function is only called to seek to the location

673 // of the end of a function (at the "}" token). It doesn't matter	684 // of the end of a function (at the "}" token). It doesn't matter

674 // whether there was a line terminator in the part we skip.	685 // whether there was a line terminator in the part we skip.

675 has_line_terminator_before_next_ = false;	686 has_line_terminator_before_next_ = false;

676 has_multiline_comment_before_next_ = false;	687 has_multiline_comment_before_next_ = false;

677 }	688 }

678 Scan();	689 Scan();

679 }	690 }

680	691

681	692

682 bool Scanner::ScanEscape() {	693 bool Scanner::ScanEscape(bool recordRaw) {

683 uc32 c = c0_;	694 uc32 c = c0_;

684 Advance();	695 Advance();

685	696

686 // Skip escaped newlines.	697 // Skip escaped newlines.

687 if (unicode_cache_->IsLineTerminator(c)) {	698 if (unicode_cache_->IsLineTerminator(c)) {

688 // Allow CR+LF newlines in multiline string literals.	699 // Allow CR+LF newlines in multiline string literals.

689 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();	700 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

690 // Allow LF+CR newlines in multiline string literals.	701 // Allow LF+CR newlines in multiline string literals.

691 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();	702 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();

692 return true;	703 return true;

693 }	704 }

694	705

695 switch (c) {	706 switch (c) {

696 case '\'': // fall through	707 case '\'': // fall through

697 case '"' : // fall through	708 case '"' : // fall through

698 case '\\': break;	709 case '\\': break;

699 case 'b' : c = '\b'; break;	710 case 'b' : c = '\b'; break;

700 case 'f' : c = '\f'; break;	711 case 'f' : c = '\f'; break;

701 case 'n' : c = '\n'; break;	712 case 'n' : c = '\n'; break;

702 case 'r' : c = '\r'; break;	713 case 'r' : c = '\r'; break;

703 case 't' : c = '\t'; break;	714 case 't' : c = '\t'; break;

704 case 'u' : {	715 case 'u' : {

705 c = ScanHexNumber(4);	716 c = ScanHexNumber(4, recordRaw);

706 if (c < 0) return false;	717 if (c < 0) return false;

707 break;	718 break;

708 }	719 }

709 case 'v' : c = '\v'; break;	720 case 'v' : c = '\v'; break;

710 case 'x' : {	721 case 'x' : {

711 c = ScanHexNumber(2);	722 c = ScanHexNumber(2, recordRaw);

712 if (c < 0) return false;	723 if (c < 0) return false;

713 break;	724 break;

714 }	725 }

715 case '0' : // fall through	726 case '0' : // fall through

716 case '1' : // fall through	727 case '1' : // fall through

717 case '2' : // fall through	728 case '2' : // fall through

718 case '3' : // fall through	729 case '3' : // fall through

719 case '4' : // fall through	730 case '4' : // fall through

720 case '5' : // fall through	731 case '5' : // fall through

721 case '6' : // fall through	732 case '6' : // fall through

722 case '7' : c = ScanOctalEscape(c, 2); break;	733 case '7':

	734 c = ScanOctalEscape(c, 2, recordRaw);

	735 break;

723 }	736 }

724	737

725 // According to ECMA-262, section 7.8.4, characters not covered by the	738 // According to ECMA-262, section 7.8.4, characters not covered by the

726 // above cases should be illegal, but they are commonly handled as	739 // above cases should be illegal, but they are commonly handled as

727 // non-escaped characters by JS VMs.	740 // non-escaped characters by JS VMs.

728 AddLiteralChar(c);	741 AddLiteralChar(c);

729 return true;	742 return true;

730 }	743 }

731	744

732	745

733 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of	746 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of

734 // ECMA-262. Other JS VMs support them.	747 // ECMA-262. Other JS VMs support them.

735 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {	748 uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool recordRaw) {

736 uc32 x = c - '0';	749 uc32 x = c - '0';

737 int i = 0;	750 int i = 0;

738 for (; i < length; i++) {	751 for (; i < length; i++) {

739 int d = c0_ - '0';	752 int d = c0_ - '0';

740 if (d < 0 \|\| d > 7) break;	753 if (d < 0 \|\| d > 7) break;

741 int nx = x * 8 + d;	754 int nx = x * 8 + d;

742 if (nx >= 256) break;	755 if (nx >= 256) break;

743 x = nx;	756 x = nx;

	757 if (recordRaw) {

	758 AddRawLiteralChar(c0_);

	759 }

744 Advance();	760 Advance();

745 }	761 }

746 // Anything except '\0' is an octal escape sequence, illegal in strict mode.	762 // Anything except '\0' is an octal escape sequence, illegal in strict mode.

747 // Remember the position of octal escape sequences so that an error	763 // Remember the position of octal escape sequences so that an error

748 // can be reported later (in strict mode).	764 // can be reported later (in strict mode).

749 // We don't report the error immediately, because the octal escape can	765 // We don't report the error immediately, because the octal escape can

750 // occur before the "use strict" directive.	766 // occur before the "use strict" directive.

751 if (c != '0' \|\| i > 0) {	767 if (c != '0' \|\| i > 0) {

752 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);	768 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);

753 }	769 }

(...skipping 17 matching lines...) Expand all Loading...
771 }	787 }

772 }	788 }

773 if (c0_ != quote) return Token::ILLEGAL;	789 if (c0_ != quote) return Token::ILLEGAL;

774 literal.Complete();	790 literal.Complete();

775	791

776 Advance(); // consume quote	792 Advance(); // consume quote

777 return Token::STRING;	793 return Token::STRING;

778 }	794 }

779	795

780	796

	797 Token::Value Scanner::ScanTemplateSpan() {

	798 if (next_.token == Token::RBRACE) {

	799 PushBack('}');

	800 }

	801 next_.location.beg_pos = source_pos();

	802 Token::Value result = Token::ILLEGAL;

	803 DCHECK(c0_ == '`' \|\| c0_ == '}');

	804 Advance(); // Consume ` or }

	805

	806 LiteralScope literal(this);

	807 while (true) {

	808 uc32 c = c0_;

	809 Advance();

	810 if (c == '`') {

	811 result = Token::TEMPLATE_TAIL;

	812 break;

	813 } else if (c == '$' && c0_ == '{') {

	814 Advance(); // Consume '{'

	815 result = Token::TEMPLATE_SPAN;

	816 break;

	817 } else if (c == '\\') {

	818 if (unicode_cache_->IsLineTerminator(c0_)) {

	819 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty

	820 // code unit sequence.

	821 do {

	822 uc32 lastChar = c0_;

	823 Advance();

	824 if (lastChar == '\r' && c0_ == '\n') Advance();

	825 AddRawLiteralChar('\n');

	826 } while (unicode_cache_->IsLineTerminator(c0_));

	827 } else {

	828 ScanEscape();

	829 }

	830 } else if (c < 0) {

	831 // Unterminated template literal

	832 PushBack(c);

	833 break;

	834 } else {

	835 AddLiteralChar(c);

	836 AddRawLiteralChar(c);

	837 }

	838 }

	839 literal.Complete();

	840 next_.location.end_pos = source_pos();

	841 next_.token = result;

	842 return result;

	843 }

	844

	845

781 void Scanner::ScanDecimalDigits() {	846 void Scanner::ScanDecimalDigits() {

782 while (IsDecimalDigit(c0_))	847 while (IsDecimalDigit(c0_))

783 AddLiteralCharAdvance();	848 AddLiteralCharAdvance();

784 }	849 }

785	850

786	851

787 Token::Value Scanner::ScanNumber(bool seen_period) {	852 Token::Value Scanner::ScanNumber(bool seen_period) {

788 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction	853 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction

789	854

790 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;	855 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;

(...skipping 403 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1194	1259

1195	1260

1196 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {	1261 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {

1197 if (is_literal_one_byte()) {	1262 if (is_literal_one_byte()) {

1198 return ast_value_factory->GetOneByteString(literal_one_byte_string());	1263 return ast_value_factory->GetOneByteString(literal_one_byte_string());

1199 }	1264 }

1200 return ast_value_factory->GetTwoByteString(literal_two_byte_string());	1265 return ast_value_factory->GetTwoByteString(literal_two_byte_string());

1201 }	1266 }

1202	1267

1203	1268

	1269 const AstRawString* Scanner::CurrentRawSymbol(

	1270 AstValueFactory* ast_value_factory) {

	1271 if (is_raw_one_byte()) {

	1272 return ast_value_factory->GetOneByteString(raw_one_byte_string());

	1273 }

	1274 return ast_value_factory->GetTwoByteString(raw_two_byte_string());

	1275 }

	1276

	1277

1204 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {	1278 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {

1205 if (is_next_literal_one_byte()) {	1279 if (is_next_literal_one_byte()) {

1206 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());	1280 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());

1207 }	1281 }

1208 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());	1282 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());

1209 }	1283 }

1210	1284

1211	1285

	1286 const AstRawString* Scanner::NextRawSymbol(AstValueFactory* ast_value_factory) {

	1287 if (is_next_raw_one_byte()) {

	1288 return ast_value_factory->GetOneByteString(next_raw_one_byte_string());

	1289 }

	1290 return ast_value_factory->GetTwoByteString(next_raw_two_byte_string());

	1291 }

	1292

	1293

1212 double Scanner::DoubleValue() {	1294 double Scanner::DoubleValue() {

1213 DCHECK(is_literal_one_byte());	1295 DCHECK(is_literal_one_byte());

1214 return StringToDouble(	1296 return StringToDouble(

1215 unicode_cache_,	1297 unicode_cache_,

1216 literal_one_byte_string(),	1298 literal_one_byte_string(),

1217 ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY);	1299 ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY);

1218 }	1300 }

1219	1301

1220	1302

1221 int Scanner::FindNumber(DuplicateFinder* finder, int value) {	1303 int Scanner::FindNumber(DuplicateFinder* finder, int value) {

(...skipping 142 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1364 }	1446 }

1365 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1447 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1366 }	1448 }

1367 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1449 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1368	1450

1369 backing_store_.AddBlock(bytes);	1451 backing_store_.AddBlock(bytes);

1370 return backing_store_.EndSequence().start();	1452 return backing_store_.EndSequence().start();

1371 }	1453 }

1372	1454

1373 } } // namespace v8::internal	1455 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »