Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: src/scanner.cc

Issue 663683006: Implement ES6 Template Literals (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« src/scanner.h ('K') | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <cmath> 9 #include <cmath>
10 10
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
47 // in object literals. 47 // in object literals.
48 Init(); 48 Init();
49 // Skip initial whitespace allowing HTML comment ends just like 49 // Skip initial whitespace allowing HTML comment ends just like
50 // after a newline and scan first token. 50 // after a newline and scan first token.
51 has_line_terminator_before_next_ = true; 51 has_line_terminator_before_next_ = true;
52 SkipWhiteSpace(); 52 SkipWhiteSpace();
53 Scan(); 53 Scan();
54 } 54 }
55 55
56 56
57 uc32 Scanner::ScanHexNumber(int expected_length) { 57 uc32 Scanner::ScanHexNumber(int expected_length, bool recordRaw) {
58 DCHECK(expected_length <= 4); // prevent overflow 58 DCHECK(expected_length <= 4); // prevent overflow
59 59
60 uc32 digits[4] = { 0, 0, 0, 0 }; 60 uc32 digits[4] = { 0, 0, 0, 0 };
61 uc32 x = 0; 61 uc32 x = 0;
62 for (int i = 0; i < expected_length; i++) { 62 for (int i = 0; i < expected_length; i++) {
63 digits[i] = c0_; 63 digits[i] = c0_;
64 int d = HexValue(c0_); 64 int d = HexValue(c0_);
65 if (d < 0) { 65 if (d < 0) {
66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes 66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes
67 // should be illegal, but other JS VMs just return the 67 // should be illegal, but other JS VMs just return the
68 // non-escaped version of the original character. 68 // non-escaped version of the original character.
69 69
70 // Push back digits that we have advanced past. 70 // Push back digits that we have advanced past.
71 for (int j = i-1; j >= 0; j--) { 71 for (int j = i-1; j >= 0; j--) {
72 PushBack(digits[j]); 72 PushBack(digits[j]);
73 } 73 }
74 return -1; 74 return -1;
75 } 75 }
76 x = x * 16 + d; 76 x = x * 16 + d;
77 if (recordRaw) {
78 AddRawLiteralChar(c0_);
79 }
77 Advance(); 80 Advance();
78 } 81 }
79 82
80 return x; 83 return x;
81 } 84 }
82 85
83 86
84 // Ensure that tokens can be stored in a byte. 87 // Ensure that tokens can be stored in a byte.
85 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); 88 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
86 89
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after
210 Token::ILLEGAL, 213 Token::ILLEGAL,
211 Token::ILLEGAL, 214 Token::ILLEGAL,
212 Token::LBRACE, // 0x7b 215 Token::LBRACE, // 0x7b
213 Token::ILLEGAL, 216 Token::ILLEGAL,
214 Token::RBRACE, // 0x7d 217 Token::RBRACE, // 0x7d
215 Token::BIT_NOT, // 0x7e 218 Token::BIT_NOT, // 0x7e
216 Token::ILLEGAL 219 Token::ILLEGAL
217 }; 220 };
218 221
219 222
220 Token::Value Scanner::Next() { 223 Token::Value Scanner::Next(Mode mode) {
221 current_ = next_; 224 current_ = next_;
222 has_line_terminator_before_next_ = false; 225 has_line_terminator_before_next_ = false;
223 has_multiline_comment_before_next_ = false; 226 has_multiline_comment_before_next_ = false;
224 if (static_cast<unsigned>(c0_) <= 0x7f) { 227 if (mode != TemplateLiteral && static_cast<unsigned>(c0_) <= 0x7f) {
225 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); 228 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
226 if (token != Token::ILLEGAL) { 229 if (token != Token::ILLEGAL) {
227 int pos = source_pos(); 230 int pos = source_pos();
228 next_.token = token; 231 next_.token = token;
229 next_.location.beg_pos = pos; 232 next_.location.beg_pos = pos;
230 next_.location.end_pos = pos + 1; 233 next_.location.end_pos = pos + 1;
231 Advance(); 234 Advance();
232 return current_.token; 235 return current_.token;
233 } 236 }
234 } 237 }
235 Scan(); 238 Scan(mode);
239 if (mode == TemplateLiteral && current_.token == Token::RBRACE) {
240 // The current token is now invalid
241 return Next();
242 }
236 return current_.token; 243 return current_.token;
237 } 244 }
238 245
239 246
240 // TODO(yangguo): check whether this is actually necessary. 247 // TODO(yangguo): check whether this is actually necessary.
241 static inline bool IsLittleEndianByteOrderMark(uc32 c) { 248 static inline bool IsLittleEndianByteOrderMark(uc32 c) {
242 // The Unicode value U+FFFE is guaranteed never to be assigned as a 249 // The Unicode value U+FFFE is guaranteed never to be assigned as a
243 // Unicode character; this implies that in a Unicode context the 250 // Unicode character; this implies that in a Unicode context the
244 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 251 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
245 // character expressed in little-endian byte order (since it could 252 // character expressed in little-endian byte order (since it could
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after
402 Advance(); 409 Advance();
403 if (c0_ == '-') return SkipSingleLineComment(); 410 if (c0_ == '-') return SkipSingleLineComment();
404 PushBack('-'); // undo Advance() 411 PushBack('-'); // undo Advance()
405 } 412 }
406 PushBack('!'); // undo Advance() 413 PushBack('!'); // undo Advance()
407 DCHECK(c0_ == '!'); 414 DCHECK(c0_ == '!');
408 return Token::LT; 415 return Token::LT;
409 } 416 }
410 417
411 418
412 void Scanner::Scan() { 419 void Scanner::Scan(Scanner::Mode mode) {
413 next_.literal_chars = NULL; 420 next_.literal_chars = NULL;
421 next_.raw_literal_chars = NULL;
414 Token::Value token; 422 Token::Value token;
423
424 if (mode == TemplateLiteral) {
425 CHECK(HarmonyTemplates());
426
427 // If we have an RBRACE next, PushBack a `}`, this should be the start of a
428 // TemplateMiddle span
429 if (peek() == Token::RBRACE) {
430 PushBack('}');
431 }
432
433 token = ScanTemplateSpan();
434 next_.location.end_pos = source_pos();
435 next_.token = token;
436 return;
437 }
438
415 do { 439 do {
416 // Remember the position of the next token 440 // Remember the position of the next token
417 next_.location.beg_pos = source_pos(); 441 next_.location.beg_pos = source_pos();
418 442
419 switch (c0_) { 443 switch (c0_) {
420 case ' ': 444 case ' ':
421 case '\t': 445 case '\t':
422 Advance(); 446 Advance();
423 token = Token::WHITESPACE; 447 token = Token::WHITESPACE;
424 break; 448 break;
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
627 break; 651 break;
628 652
629 case '?': 653 case '?':
630 token = Select(Token::CONDITIONAL); 654 token = Select(Token::CONDITIONAL);
631 break; 655 break;
632 656
633 case '~': 657 case '~':
634 token = Select(Token::BIT_NOT); 658 token = Select(Token::BIT_NOT);
635 break; 659 break;
636 660
661 case '`':
662 if (HarmonyTemplates()) {
663 token = ScanTemplateSpan();
664 break;
665 }
666
637 default: 667 default:
638 if (unicode_cache_->IsIdentifierStart(c0_)) { 668 if (unicode_cache_->IsIdentifierStart(c0_)) {
639 token = ScanIdentifierOrKeyword(); 669 token = ScanIdentifierOrKeyword();
640 } else if (IsDecimalDigit(c0_)) { 670 } else if (IsDecimalDigit(c0_)) {
641 token = ScanNumber(false); 671 token = ScanNumber(false);
642 } else if (SkipWhiteSpace()) { 672 } else if (SkipWhiteSpace()) {
643 token = Token::WHITESPACE; 673 token = Token::WHITESPACE;
644 } else if (c0_ < 0) { 674 } else if (c0_ < 0) {
645 token = Token::EOS; 675 token = Token::EOS;
646 } else { 676 } else {
(...skipping 25 matching lines...) Expand all
672 // This function is only called to seek to the location 702 // This function is only called to seek to the location
673 // of the end of a function (at the "}" token). It doesn't matter 703 // of the end of a function (at the "}" token). It doesn't matter
674 // whether there was a line terminator in the part we skip. 704 // whether there was a line terminator in the part we skip.
675 has_line_terminator_before_next_ = false; 705 has_line_terminator_before_next_ = false;
676 has_multiline_comment_before_next_ = false; 706 has_multiline_comment_before_next_ = false;
677 } 707 }
678 Scan(); 708 Scan();
679 } 709 }
680 710
681 711
682 bool Scanner::ScanEscape() { 712 bool Scanner::ScanEscape(bool recordRaw) {
683 uc32 c = c0_; 713 uc32 c = c0_;
684 Advance(); 714 Advance();
685 715
686 // Skip escaped newlines. 716 // Skip escaped newlines.
687 if (unicode_cache_->IsLineTerminator(c)) { 717 if (unicode_cache_->IsLineTerminator(c)) {
688 // Allow CR+LF newlines in multiline string literals. 718 // Allow CR+LF newlines in multiline string literals.
689 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); 719 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
690 // Allow LF+CR newlines in multiline string literals. 720 // Allow LF+CR newlines in multiline string literals.
691 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); 721 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
692 return true; 722 return true;
693 } 723 }
694 724
695 switch (c) { 725 switch (c) {
696 case '\'': // fall through 726 case '\'': // fall through
697 case '"' : // fall through 727 case '"' : // fall through
698 case '\\': break; 728 case '\\': break;
699 case 'b' : c = '\b'; break; 729 case 'b' : c = '\b'; break;
700 case 'f' : c = '\f'; break; 730 case 'f' : c = '\f'; break;
701 case 'n' : c = '\n'; break; 731 case 'n' : c = '\n'; break;
702 case 'r' : c = '\r'; break; 732 case 'r' : c = '\r'; break;
703 case 't' : c = '\t'; break; 733 case 't' : c = '\t'; break;
704 case 'u' : { 734 case 'u' : {
705 c = ScanHexNumber(4); 735 c = ScanHexNumber(4, recordRaw);
706 if (c < 0) return false; 736 if (c < 0) return false;
707 break; 737 break;
708 } 738 }
709 case 'v' : c = '\v'; break; 739 case 'v' : c = '\v'; break;
710 case 'x' : { 740 case 'x' : {
711 c = ScanHexNumber(2); 741 c = ScanHexNumber(2, recordRaw);
712 if (c < 0) return false; 742 if (c < 0) return false;
713 break; 743 break;
714 } 744 }
715 case '0' : // fall through 745 case '0' : // fall through
716 case '1' : // fall through 746 case '1' : // fall through
717 case '2' : // fall through 747 case '2' : // fall through
718 case '3' : // fall through 748 case '3' : // fall through
719 case '4' : // fall through 749 case '4' : // fall through
720 case '5' : // fall through 750 case '5' : // fall through
721 case '6' : // fall through 751 case '6' : // fall through
722 case '7' : c = ScanOctalEscape(c, 2); break; 752 case '7':
753 c = ScanOctalEscape(c, 2, recordRaw);
754 break;
723 } 755 }
724 756
725 // According to ECMA-262, section 7.8.4, characters not covered by the 757 // According to ECMA-262, section 7.8.4, characters not covered by the
726 // above cases should be illegal, but they are commonly handled as 758 // above cases should be illegal, but they are commonly handled as
727 // non-escaped characters by JS VMs. 759 // non-escaped characters by JS VMs.
728 AddLiteralChar(c); 760 AddLiteralChar(c);
729 return true; 761 return true;
730 } 762 }
731 763
732 764
733 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of 765 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
734 // ECMA-262. Other JS VMs support them. 766 // ECMA-262. Other JS VMs support them.
735 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { 767 uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool recordRaw) {
736 uc32 x = c - '0'; 768 uc32 x = c - '0';
737 int i = 0; 769 int i = 0;
738 for (; i < length; i++) { 770 for (; i < length; i++) {
739 int d = c0_ - '0'; 771 int d = c0_ - '0';
740 if (d < 0 || d > 7) break; 772 if (d < 0 || d > 7) break;
741 int nx = x * 8 + d; 773 int nx = x * 8 + d;
742 if (nx >= 256) break; 774 if (nx >= 256) break;
743 x = nx; 775 x = nx;
776 if (recordRaw) {
777 AddRawLiteralChar(c0_);
778 }
744 Advance(); 779 Advance();
745 } 780 }
746 // Anything except '\0' is an octal escape sequence, illegal in strict mode. 781 // Anything except '\0' is an octal escape sequence, illegal in strict mode.
747 // Remember the position of octal escape sequences so that an error 782 // Remember the position of octal escape sequences so that an error
748 // can be reported later (in strict mode). 783 // can be reported later (in strict mode).
749 // We don't report the error immediately, because the octal escape can 784 // We don't report the error immediately, because the octal escape can
750 // occur before the "use strict" directive. 785 // occur before the "use strict" directive.
751 if (c != '0' || i > 0) { 786 if (c != '0' || i > 0) {
752 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); 787 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
753 } 788 }
(...skipping 17 matching lines...) Expand all
771 } 806 }
772 } 807 }
773 if (c0_ != quote) return Token::ILLEGAL; 808 if (c0_ != quote) return Token::ILLEGAL;
774 literal.Complete(); 809 literal.Complete();
775 810
776 Advance(); // consume quote 811 Advance(); // consume quote
777 return Token::STRING; 812 return Token::STRING;
778 } 813 }
779 814
780 815
816 Token::Value Scanner::ScanTemplateSpan() {
817 DCHECK(c0_ == '`' || c0_ == '}');
818 Advance(); // Consume ` or }
819 LiteralScope literal(this);
820 while (true) {
821 uc32 c = c0_;
822 Advance();
823 if (c == '`') {
824 literal.Complete();
825 return Token::TEMPLATE_TAIL;
826 } else if (c == '$' && c0_ == '{') {
827 Advance(); // Consume '{'
828 literal.Complete();
829 return Token::TEMPLATE_SPAN;
830 } else if (c == '\\') {
831 if (unicode_cache_->IsLineTerminator(c0_)) {
832 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
833 // code unit sequence.
834 do {
835 uc32 lastChar = c0_;
836 Advance();
837 if (lastChar == '\r' && c0_ == '\n') Advance();
838 AddRawLiteralChar('\n');
839 } while (unicode_cache_->IsLineTerminator(c0_));
840 } else {
841 ScanEscape();
842 }
843 } else if (c < 0) {
844 // Unterminated template literal
845 literal.Complete();
846 PushBack(c);
847 return Token::ILLEGAL;
848 } else {
849 AddLiteralChar(c);
850 AddRawLiteralChar(c);
851 }
852 }
853 }
854
855
781 void Scanner::ScanDecimalDigits() { 856 void Scanner::ScanDecimalDigits() {
782 while (IsDecimalDigit(c0_)) 857 while (IsDecimalDigit(c0_))
783 AddLiteralCharAdvance(); 858 AddLiteralCharAdvance();
784 } 859 }
785 860
786 861
787 Token::Value Scanner::ScanNumber(bool seen_period) { 862 Token::Value Scanner::ScanNumber(bool seen_period) {
788 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction 863 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
789 864
790 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; 865 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;
(...skipping 403 matching lines...) Expand 10 before | Expand all | Expand 10 after
1194 1269
1195 1270
1196 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) { 1271 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {
1197 if (is_literal_one_byte()) { 1272 if (is_literal_one_byte()) {
1198 return ast_value_factory->GetOneByteString(literal_one_byte_string()); 1273 return ast_value_factory->GetOneByteString(literal_one_byte_string());
1199 } 1274 }
1200 return ast_value_factory->GetTwoByteString(literal_two_byte_string()); 1275 return ast_value_factory->GetTwoByteString(literal_two_byte_string());
1201 } 1276 }
1202 1277
1203 1278
1279 const AstRawString* Scanner::CurrentRawSymbol(
1280 AstValueFactory* ast_value_factory) {
1281 if (is_raw_one_byte()) {
1282 return ast_value_factory->GetOneByteString(raw_one_byte_string());
1283 }
1284 return ast_value_factory->GetTwoByteString(raw_two_byte_string());
1285 }
1286
1287
1204 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { 1288 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {
1205 if (is_next_literal_one_byte()) { 1289 if (is_next_literal_one_byte()) {
1206 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); 1290 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
1207 } 1291 }
1208 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); 1292 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
1209 } 1293 }
1210 1294
1211 1295
1296 const AstRawString* Scanner::NextRawSymbol(AstValueFactory* ast_value_factory) {
1297 if (is_next_raw_one_byte()) {
1298 return ast_value_factory->GetOneByteString(next_raw_one_byte_string());
1299 }
1300 return ast_value_factory->GetTwoByteString(next_raw_two_byte_string());
1301 }
1302
1303
1212 double Scanner::DoubleValue() { 1304 double Scanner::DoubleValue() {
1213 DCHECK(is_literal_one_byte()); 1305 DCHECK(is_literal_one_byte());
1214 return StringToDouble( 1306 return StringToDouble(
1215 unicode_cache_, 1307 unicode_cache_,
1216 literal_one_byte_string(), 1308 literal_one_byte_string(),
1217 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); 1309 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
1218 } 1310 }
1219 1311
1220 1312
1221 int Scanner::FindNumber(DuplicateFinder* finder, int value) { 1313 int Scanner::FindNumber(DuplicateFinder* finder, int value) {
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after
1364 } 1456 }
1365 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); 1457 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
1366 } 1458 }
1367 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); 1459 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
1368 1460
1369 backing_store_.AddBlock(bytes); 1461 backing_store_.AddBlock(bytes);
1370 return backing_store_.EndSequence().start(); 1462 return backing_store_.EndSequence().start();
1371 } 1463 }
1372 1464
1373 } } // namespace v8::internal 1465 } } // namespace v8::internal
OLDNEW
« src/scanner.h ('K') | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698