src/scanner.cc - Issue 663683006: Implement ES6 Template Literals

Side by Side Diff: src/scanner.cc

Issue 663683006: Implement ES6 Template Literals (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <cmath>	9 #include <cmath>

10	10

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
47 // in object literals.	47 // in object literals.

48 Init();	48 Init();

49 // Skip initial whitespace allowing HTML comment ends just like	49 // Skip initial whitespace allowing HTML comment ends just like

50 // after a newline and scan first token.	50 // after a newline and scan first token.

51 has_line_terminator_before_next_ = true;	51 has_line_terminator_before_next_ = true;

52 SkipWhiteSpace();	52 SkipWhiteSpace();

53 Scan();	53 Scan();

54 }	54 }

55	55

56	56

57 uc32 Scanner::ScanHexNumber(int expected_length) {	57 uc32 Scanner::ScanHexNumber(int expected_length, bool recordRaw) {

58 DCHECK(expected_length <= 4); // prevent overflow	58 DCHECK(expected_length <= 4); // prevent overflow

59	59

60 uc32 digits[4] = { 0, 0, 0, 0 };	60 uc32 digits[4] = { 0, 0, 0, 0 };

61 uc32 x = 0;	61 uc32 x = 0;

62 for (int i = 0; i < expected_length; i++) {	62 for (int i = 0; i < expected_length; i++) {

63 digits[i] = c0_;	63 digits[i] = c0_;

64 int d = HexValue(c0_);	64 int d = HexValue(c0_);

65 if (d < 0) {	65 if (d < 0) {

66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes	66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes

67 // should be illegal, but other JS VMs just return the	67 // should be illegal, but other JS VMs just return the

68 // non-escaped version of the original character.	68 // non-escaped version of the original character.

69	69

70 // Push back digits that we have advanced past.	70 // Push back digits that we have advanced past.

71 for (int j = i-1; j >= 0; j--) {	71 for (int j = i-1; j >= 0; j--) {

72 PushBack(digits[j]);	72 PushBack(digits[j]);

73 }	73 }

74 return -1;	74 return -1;

75 }	75 }

76 x = x * 16 + d;	76 x = x * 16 + d;

	77 if (recordRaw) {

	78 AddRawLiteralChar(c0_);

	79 }

77 Advance();	80 Advance();

78 }	81 }

79	82

80 return x;	83 return x;

81 }	84 }

82	85

83	86

84 // Ensure that tokens can be stored in a byte.	87 // Ensure that tokens can be stored in a byte.

85 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);	88 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);

86	89

(...skipping 123 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
210 Token::ILLEGAL,	213 Token::ILLEGAL,

211 Token::ILLEGAL,	214 Token::ILLEGAL,

212 Token::LBRACE, // 0x7b	215 Token::LBRACE, // 0x7b

213 Token::ILLEGAL,	216 Token::ILLEGAL,

214 Token::RBRACE, // 0x7d	217 Token::RBRACE, // 0x7d

215 Token::BIT_NOT, // 0x7e	218 Token::BIT_NOT, // 0x7e

216 Token::ILLEGAL	219 Token::ILLEGAL

217 };	220 };

218	221

219	222

220 Token::Value Scanner::Next() {	223 Token::Value Scanner::Next(Mode mode) {

221 current_ = next_;	224 current_ = next_;

222 has_line_terminator_before_next_ = false;	225 has_line_terminator_before_next_ = false;

223 has_multiline_comment_before_next_ = false;	226 has_multiline_comment_before_next_ = false;

224 if (static_cast<unsigned>(c0_) <= 0x7f) {	227 if (mode != TemplateLiteral && static_cast<unsigned>(c0_) <= 0x7f) {

225 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);	228 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);

226 if (token != Token::ILLEGAL) {	229 if (token != Token::ILLEGAL) {

227 int pos = source_pos();	230 int pos = source_pos();

228 next_.token = token;	231 next_.token = token;

229 next_.location.beg_pos = pos;	232 next_.location.beg_pos = pos;

230 next_.location.end_pos = pos + 1;	233 next_.location.end_pos = pos + 1;

231 Advance();	234 Advance();

232 return current_.token;	235 return current_.token;

233 }	236 }

234 }	237 }

235 Scan();	238 Scan(mode);

	239 if (mode == TemplateLiteral && current_.token == Token::RBRACE) {

	240 // The current token is now invalid

	241 return Next();

	242 }

236 return current_.token;	243 return current_.token;

237 }	244 }

238	245

239	246

240 // TODO(yangguo): check whether this is actually necessary.	247 // TODO(yangguo): check whether this is actually necessary.

241 static inline bool IsLittleEndianByteOrderMark(uc32 c) {	248 static inline bool IsLittleEndianByteOrderMark(uc32 c) {

242 // The Unicode value U+FFFE is guaranteed never to be assigned as a	249 // The Unicode value U+FFFE is guaranteed never to be assigned as a

243 // Unicode character; this implies that in a Unicode context the	250 // Unicode character; this implies that in a Unicode context the

244 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	251 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

245 // character expressed in little-endian byte order (since it could	252 // character expressed in little-endian byte order (since it could

(...skipping 156 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
402 Advance();	409 Advance();

403 if (c0_ == '-') return SkipSingleLineComment();	410 if (c0_ == '-') return SkipSingleLineComment();

404 PushBack('-'); // undo Advance()	411 PushBack('-'); // undo Advance()

405 }	412 }

406 PushBack('!'); // undo Advance()	413 PushBack('!'); // undo Advance()

407 DCHECK(c0_ == '!');	414 DCHECK(c0_ == '!');

408 return Token::LT;	415 return Token::LT;

409 }	416 }

410	417

411	418

412 void Scanner::Scan() {	419 void Scanner::Scan(Scanner::Mode mode) {

413 next_.literal_chars = NULL;	420 next_.literal_chars = NULL;

	421 next_.raw_literal_chars = NULL;

414 Token::Value token;	422 Token::Value token;

	423

	424 if (mode == TemplateLiteral) {

	425 CHECK(HarmonyTemplates());

	426

	427 // If we have an RBRACE next, PushBack a `}`, this should be the start of a

	428 // TemplateMiddle span

	429 if (peek() == Token::RBRACE) {

	430 PushBack('}');

	431 }

	432

	433 token = ScanTemplateSpan();

	434 next_.location.end_pos = source_pos();

	435 next_.token = token;

	436 return;

	437 }

	438

415 do {	439 do {

416 // Remember the position of the next token	440 // Remember the position of the next token

417 next_.location.beg_pos = source_pos();	441 next_.location.beg_pos = source_pos();

418	442

419 switch (c0_) {	443 switch (c0_) {

420 case ' ':	444 case ' ':

421 case '\t':	445 case '\t':

422 Advance();	446 Advance();

423 token = Token::WHITESPACE;	447 token = Token::WHITESPACE;

424 break;	448 break;

(...skipping 202 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
627 break;	651 break;

628	652

629 case '?':	653 case '?':

630 token = Select(Token::CONDITIONAL);	654 token = Select(Token::CONDITIONAL);

631 break;	655 break;

632	656

633 case '~':	657 case '~':

634 token = Select(Token::BIT_NOT);	658 token = Select(Token::BIT_NOT);

635 break;	659 break;

636	660

	661 case '`':

	662 if (HarmonyTemplates()) {

	663 token = ScanTemplateSpan();

	664 break;

	665 }

	666

637 default:	667 default:

638 if (unicode_cache_->IsIdentifierStart(c0_)) {	668 if (unicode_cache_->IsIdentifierStart(c0_)) {

639 token = ScanIdentifierOrKeyword();	669 token = ScanIdentifierOrKeyword();

640 } else if (IsDecimalDigit(c0_)) {	670 } else if (IsDecimalDigit(c0_)) {

641 token = ScanNumber(false);	671 token = ScanNumber(false);

642 } else if (SkipWhiteSpace()) {	672 } else if (SkipWhiteSpace()) {

643 token = Token::WHITESPACE;	673 token = Token::WHITESPACE;

644 } else if (c0_ < 0) {	674 } else if (c0_ < 0) {

645 token = Token::EOS;	675 token = Token::EOS;

646 } else {	676 } else {

(...skipping 25 matching lines...) Expand all Loading...
672 // This function is only called to seek to the location	702 // This function is only called to seek to the location

673 // of the end of a function (at the "}" token). It doesn't matter	703 // of the end of a function (at the "}" token). It doesn't matter

674 // whether there was a line terminator in the part we skip.	704 // whether there was a line terminator in the part we skip.

675 has_line_terminator_before_next_ = false;	705 has_line_terminator_before_next_ = false;

676 has_multiline_comment_before_next_ = false;	706 has_multiline_comment_before_next_ = false;

677 }	707 }

678 Scan();	708 Scan();

679 }	709 }

680	710

681	711

682 bool Scanner::ScanEscape() {	712 bool Scanner::ScanEscape(bool recordRaw) {

683 uc32 c = c0_;	713 uc32 c = c0_;

684 Advance();	714 Advance();

685	715

686 // Skip escaped newlines.	716 // Skip escaped newlines.

687 if (unicode_cache_->IsLineTerminator(c)) {	717 if (unicode_cache_->IsLineTerminator(c)) {

688 // Allow CR+LF newlines in multiline string literals.	718 // Allow CR+LF newlines in multiline string literals.

689 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();	719 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

690 // Allow LF+CR newlines in multiline string literals.	720 // Allow LF+CR newlines in multiline string literals.

691 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();	721 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();

692 return true;	722 return true;

693 }	723 }

694	724

695 switch (c) {	725 switch (c) {

696 case '\'': // fall through	726 case '\'': // fall through

697 case '"' : // fall through	727 case '"' : // fall through

698 case '\\': break;	728 case '\\': break;

699 case 'b' : c = '\b'; break;	729 case 'b' : c = '\b'; break;

700 case 'f' : c = '\f'; break;	730 case 'f' : c = '\f'; break;

701 case 'n' : c = '\n'; break;	731 case 'n' : c = '\n'; break;

702 case 'r' : c = '\r'; break;	732 case 'r' : c = '\r'; break;

703 case 't' : c = '\t'; break;	733 case 't' : c = '\t'; break;

704 case 'u' : {	734 case 'u' : {

705 c = ScanHexNumber(4);	735 c = ScanHexNumber(4, recordRaw);

706 if (c < 0) return false;	736 if (c < 0) return false;

707 break;	737 break;

708 }	738 }

709 case 'v' : c = '\v'; break;	739 case 'v' : c = '\v'; break;

710 case 'x' : {	740 case 'x' : {

711 c = ScanHexNumber(2);	741 c = ScanHexNumber(2, recordRaw);

712 if (c < 0) return false;	742 if (c < 0) return false;

713 break;	743 break;

714 }	744 }

715 case '0' : // fall through	745 case '0' : // fall through

716 case '1' : // fall through	746 case '1' : // fall through

717 case '2' : // fall through	747 case '2' : // fall through

718 case '3' : // fall through	748 case '3' : // fall through

719 case '4' : // fall through	749 case '4' : // fall through

720 case '5' : // fall through	750 case '5' : // fall through

721 case '6' : // fall through	751 case '6' : // fall through

722 case '7' : c = ScanOctalEscape(c, 2); break;	752 case '7':

	753 c = ScanOctalEscape(c, 2, recordRaw);

	754 break;

723 }	755 }

724	756

725 // According to ECMA-262, section 7.8.4, characters not covered by the	757 // According to ECMA-262, section 7.8.4, characters not covered by the

726 // above cases should be illegal, but they are commonly handled as	758 // above cases should be illegal, but they are commonly handled as

727 // non-escaped characters by JS VMs.	759 // non-escaped characters by JS VMs.

728 AddLiteralChar(c);	760 AddLiteralChar(c);

729 return true;	761 return true;

730 }	762 }

731	763

732	764

733 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of	765 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of

734 // ECMA-262. Other JS VMs support them.	766 // ECMA-262. Other JS VMs support them.

735 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {	767 uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool recordRaw) {

736 uc32 x = c - '0';	768 uc32 x = c - '0';

737 int i = 0;	769 int i = 0;

738 for (; i < length; i++) {	770 for (; i < length; i++) {

739 int d = c0_ - '0';	771 int d = c0_ - '0';

740 if (d < 0 \|\| d > 7) break;	772 if (d < 0 \|\| d > 7) break;

741 int nx = x * 8 + d;	773 int nx = x * 8 + d;

742 if (nx >= 256) break;	774 if (nx >= 256) break;

743 x = nx;	775 x = nx;

	776 if (recordRaw) {

	777 AddRawLiteralChar(c0_);

	778 }

744 Advance();	779 Advance();

745 }	780 }

746 // Anything except '\0' is an octal escape sequence, illegal in strict mode.	781 // Anything except '\0' is an octal escape sequence, illegal in strict mode.

747 // Remember the position of octal escape sequences so that an error	782 // Remember the position of octal escape sequences so that an error

748 // can be reported later (in strict mode).	783 // can be reported later (in strict mode).

749 // We don't report the error immediately, because the octal escape can	784 // We don't report the error immediately, because the octal escape can

750 // occur before the "use strict" directive.	785 // occur before the "use strict" directive.

751 if (c != '0' \|\| i > 0) {	786 if (c != '0' \|\| i > 0) {

752 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);	787 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);

753 }	788 }

(...skipping 17 matching lines...) Expand all Loading...
771 }	806 }

772 }	807 }

773 if (c0_ != quote) return Token::ILLEGAL;	808 if (c0_ != quote) return Token::ILLEGAL;

774 literal.Complete();	809 literal.Complete();

775	810

776 Advance(); // consume quote	811 Advance(); // consume quote

777 return Token::STRING;	812 return Token::STRING;

778 }	813 }

779	814

780	815

	816 Token::Value Scanner::ScanTemplateSpan() {

	817 DCHECK(c0_ == '`' \|\| c0_ == '}');

	818 Advance(); // Consume ` or }

	819 LiteralScope literal(this);

	820 while (true) {

	821 uc32 c = c0_;

	822 Advance();

	823 if (c == '`') {

	824 literal.Complete();

	825 return Token::TEMPLATE_TAIL;

	826 } else if (c == '$' && c0_ == '{') {

	827 Advance(); // Consume '{'

	828 literal.Complete();

	829 return Token::TEMPLATE_SPAN;

	830 } else if (c == '\\') {

	831 if (unicode_cache_->IsLineTerminator(c0_)) {

	832 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty

	833 // code unit sequence.

	834 do {

	835 uc32 lastChar = c0_;

	836 Advance();

	837 if (lastChar == '\r' && c0_ == '\n') Advance();

	838 AddRawLiteralChar('\n');

	839 } while (unicode_cache_->IsLineTerminator(c0_));

	840 } else {

	841 ScanEscape();

	842 }

	843 } else if (c < 0) {

	844 // Unterminated template literal

	845 literal.Complete();

	846 PushBack(c);

	847 return Token::ILLEGAL;

	848 } else {

	849 AddLiteralChar(c);

	850 AddRawLiteralChar(c);

	851 }

	852 }

	853 }

	854

	855

781 void Scanner::ScanDecimalDigits() {	856 void Scanner::ScanDecimalDigits() {

782 while (IsDecimalDigit(c0_))	857 while (IsDecimalDigit(c0_))

783 AddLiteralCharAdvance();	858 AddLiteralCharAdvance();

784 }	859 }

785	860

786	861

787 Token::Value Scanner::ScanNumber(bool seen_period) {	862 Token::Value Scanner::ScanNumber(bool seen_period) {

788 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction	863 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction

789	864

790 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;	865 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;

(...skipping 403 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1194	1269

1195	1270

1196 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {	1271 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {

1197 if (is_literal_one_byte()) {	1272 if (is_literal_one_byte()) {

1198 return ast_value_factory->GetOneByteString(literal_one_byte_string());	1273 return ast_value_factory->GetOneByteString(literal_one_byte_string());

1199 }	1274 }

1200 return ast_value_factory->GetTwoByteString(literal_two_byte_string());	1275 return ast_value_factory->GetTwoByteString(literal_two_byte_string());

1201 }	1276 }

1202	1277

1203	1278

	1279 const AstRawString* Scanner::CurrentRawSymbol(

	1280 AstValueFactory* ast_value_factory) {

	1281 if (is_raw_one_byte()) {

	1282 return ast_value_factory->GetOneByteString(raw_one_byte_string());

	1283 }

	1284 return ast_value_factory->GetTwoByteString(raw_two_byte_string());

	1285 }

	1286

	1287

1204 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {	1288 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {

1205 if (is_next_literal_one_byte()) {	1289 if (is_next_literal_one_byte()) {

1206 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());	1290 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());

1207 }	1291 }

1208 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());	1292 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());

1209 }	1293 }

1210	1294

1211	1295

	1296 const AstRawString* Scanner::NextRawSymbol(AstValueFactory* ast_value_factory) {

	1297 if (is_next_raw_one_byte()) {

	1298 return ast_value_factory->GetOneByteString(next_raw_one_byte_string());

	1299 }

	1300 return ast_value_factory->GetTwoByteString(next_raw_two_byte_string());

	1301 }

	1302

	1303

1212 double Scanner::DoubleValue() {	1304 double Scanner::DoubleValue() {

1213 DCHECK(is_literal_one_byte());	1305 DCHECK(is_literal_one_byte());

1214 return StringToDouble(	1306 return StringToDouble(

1215 unicode_cache_,	1307 unicode_cache_,

1216 literal_one_byte_string(),	1308 literal_one_byte_string(),

1217 ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY);	1309 ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY);

1218 }	1310 }

1219	1311

1220	1312

1221 int Scanner::FindNumber(DuplicateFinder* finder, int value) {	1313 int Scanner::FindNumber(DuplicateFinder* finder, int value) {

(...skipping 142 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1364 }	1456 }

1365 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1457 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1366 }	1458 }

1367 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1459 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1368	1460

1369 backing_store_.AddBlock(bytes);	1461 backing_store_.AddBlock(bytes);

1370 return backing_store_.EndSequence().start();	1462 return backing_store_.EndSequence().start();

1371 }	1463 }

1372	1464

1373 } } // namespace v8::internal	1465 } } // namespace v8::internal

OLD	NEW

« src/scanner.h ('K') | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »