Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: src/scanner.cc

Issue 663683006: Implement ES6 Template Literals (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: More tests again Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <cmath> 9 #include <cmath>
10 10
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
47 // in object literals. 47 // in object literals.
48 Init(); 48 Init();
49 // Skip initial whitespace allowing HTML comment ends just like 49 // Skip initial whitespace allowing HTML comment ends just like
50 // after a newline and scan first token. 50 // after a newline and scan first token.
51 has_line_terminator_before_next_ = true; 51 has_line_terminator_before_next_ = true;
52 SkipWhiteSpace(); 52 SkipWhiteSpace();
53 Scan(); 53 Scan();
54 } 54 }
55 55
56 56
57 uc32 Scanner::ScanHexNumber(int expected_length) { 57 uc32 Scanner::ScanHexNumber(int expected_length, bool recordRaw) {
58 DCHECK(expected_length <= 4); // prevent overflow 58 DCHECK(expected_length <= 4); // prevent overflow
59 59
60 uc32 x = 0; 60 uc32 x = 0;
61 for (int i = 0; i < expected_length; i++) { 61 for (int i = 0; i < expected_length; i++) {
62 int d = HexValue(c0_); 62 int d = HexValue(c0_);
63 if (d < 0) { 63 if (d < 0) {
64 return -1; 64 return -1;
65 } 65 }
66 x = x * 16 + d; 66 x = x * 16 + d;
67 if (recordRaw) {
68 AddRawLiteralChar(c0_);
69 }
67 Advance(); 70 Advance();
68 } 71 }
69 72
70 return x; 73 return x;
71 } 74 }
72 75
73 76
74 // Ensure that tokens can be stored in a byte. 77 // Ensure that tokens can be stored in a byte.
75 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); 78 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
76 79
(...skipping 319 matching lines...) Expand 10 before | Expand all | Expand 10 after
396 PushBack('-'); // undo Advance() 399 PushBack('-'); // undo Advance()
397 } 400 }
398 PushBack('!'); // undo Advance() 401 PushBack('!'); // undo Advance()
399 DCHECK(c0_ == '!'); 402 DCHECK(c0_ == '!');
400 return Token::LT; 403 return Token::LT;
401 } 404 }
402 405
403 406
404 void Scanner::Scan() { 407 void Scanner::Scan() {
405 next_.literal_chars = NULL; 408 next_.literal_chars = NULL;
409 next_.raw_literal_chars = NULL;
406 Token::Value token; 410 Token::Value token;
411
407 do { 412 do {
408 // Remember the position of the next token 413 // Remember the position of the next token
409 next_.location.beg_pos = source_pos(); 414 next_.location.beg_pos = source_pos();
410 415
411 switch (c0_) { 416 switch (c0_) {
412 case ' ': 417 case ' ':
413 case '\t': 418 case '\t':
414 Advance(); 419 Advance();
415 token = Token::WHITESPACE; 420 token = Token::WHITESPACE;
416 break; 421 break;
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
619 break; 624 break;
620 625
621 case '?': 626 case '?':
622 token = Select(Token::CONDITIONAL); 627 token = Select(Token::CONDITIONAL);
623 break; 628 break;
624 629
625 case '~': 630 case '~':
626 token = Select(Token::BIT_NOT); 631 token = Select(Token::BIT_NOT);
627 break; 632 break;
628 633
634 case '`':
635 if (HarmonyTemplates()) {
636 token = ScanTemplateSpan();
637 break;
638 }
639
629 default: 640 default:
630 if (c0_ < 0) { 641 if (c0_ < 0) {
631 token = Token::EOS; 642 token = Token::EOS;
632 } else if (unicode_cache_->IsIdentifierStart(c0_)) { 643 } else if (unicode_cache_->IsIdentifierStart(c0_)) {
633 token = ScanIdentifierOrKeyword(); 644 token = ScanIdentifierOrKeyword();
634 } else if (IsDecimalDigit(c0_)) { 645 } else if (IsDecimalDigit(c0_)) {
635 token = ScanNumber(false); 646 token = ScanNumber(false);
636 } else if (SkipWhiteSpace()) { 647 } else if (SkipWhiteSpace()) {
637 token = Token::WHITESPACE; 648 token = Token::WHITESPACE;
638 } else { 649 } else {
(...skipping 25 matching lines...) Expand all
664 // This function is only called to seek to the location 675 // This function is only called to seek to the location
665 // of the end of a function (at the "}" token). It doesn't matter 676 // of the end of a function (at the "}" token). It doesn't matter
666 // whether there was a line terminator in the part we skip. 677 // whether there was a line terminator in the part we skip.
667 has_line_terminator_before_next_ = false; 678 has_line_terminator_before_next_ = false;
668 has_multiline_comment_before_next_ = false; 679 has_multiline_comment_before_next_ = false;
669 } 680 }
670 Scan(); 681 Scan();
671 } 682 }
672 683
673 684
674 bool Scanner::ScanEscape() { 685 bool Scanner::ScanEscape(bool recordRaw) {
675 uc32 c = c0_; 686 uc32 c = c0_;
687 uc32 rc = c;
688 bool singleCharEscape = true;
676 Advance(); 689 Advance();
677 690
678 // Skip escaped newlines. 691 // Skip escaped newlines.
679 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { 692 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {
680 // Allow CR+LF newlines in multiline string literals. 693 // Allow CR+LF newlines in multiline string literals.
681 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); 694 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
682 // Allow LF+CR newlines in multiline string literals. 695 // Allow LF+CR newlines in multiline string literals.
683 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); 696 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
684 return true; 697 return true;
685 } 698 }
686 699
687 switch (c) { 700 switch (c) {
688 case '\'': // fall through 701 case '\'': // fall through
689 case '"' : // fall through 702 case '"' : // fall through
690 case '\\': break; 703 case '\\': break;
691 case 'b' : c = '\b'; break; 704 case 'b' : c = '\b'; break;
692 case 'f' : c = '\f'; break; 705 case 'f' : c = '\f'; break;
693 case 'n' : c = '\n'; break; 706 case 'n' : c = '\n'; break;
694 case 'r' : c = '\r'; break; 707 case 'r' : c = '\r'; break;
695 case 't' : c = '\t'; break; 708 case 't' : c = '\t'; break;
696 case 'u' : { 709 case 'u' : {
697 c = ScanHexNumber(4); 710 if (recordRaw) AddRawLiteralChar('u');
711 singleCharEscape = false;
712 c = ScanHexNumber(4, recordRaw);
698 if (c < 0) return false; 713 if (c < 0) return false;
699 break; 714 break;
700 } 715 }
701 case 'v' : c = '\v'; break; 716 case 'v' : c = '\v'; break;
702 case 'x' : { 717 case 'x' : {
703 c = ScanHexNumber(2); 718 if (recordRaw) AddRawLiteralChar('x');
719 singleCharEscape = false;
720 c = ScanHexNumber(2, recordRaw);
704 if (c < 0) return false; 721 if (c < 0) return false;
705 break; 722 break;
706 } 723 }
707 case '0' : // fall through 724 case '0' : // fall through
708 case '1' : // fall through 725 case '1' : // fall through
709 case '2' : // fall through 726 case '2' : // fall through
710 case '3' : // fall through 727 case '3' : // fall through
711 case '4' : // fall through 728 case '4' : // fall through
712 case '5' : // fall through 729 case '5' : // fall through
713 case '6' : // fall through 730 case '6' : // fall through
714 case '7' : c = ScanOctalEscape(c, 2); break; 731 case '7':
732 singleCharEscape = false;
733 c = ScanOctalEscape(c, 2, recordRaw);
734 break;
715 } 735 }
716 736
717 // According to ECMA-262, section 7.8.4, characters not covered by the 737 // According to ECMA-262, section 7.8.4, characters not covered by the
718 // above cases should be illegal, but they are commonly handled as 738 // above cases should be illegal, but they are commonly handled as
719 // non-escaped characters by JS VMs. 739 // non-escaped characters by JS VMs.
740 if (singleCharEscape && recordRaw) AddRawLiteralChar(rc);
720 AddLiteralChar(c); 741 AddLiteralChar(c);
721 return true; 742 return true;
722 } 743 }
723 744
724 745
725 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of 746 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
726 // ECMA-262. Other JS VMs support them. 747 // ECMA-262. Other JS VMs support them.
727 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { 748 uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool recordRaw) {
728 uc32 x = c - '0'; 749 uc32 x = c - '0';
729 int i = 0; 750 int i = 0;
730 for (; i < length; i++) { 751 for (; i < length; i++) {
731 int d = c0_ - '0'; 752 int d = c0_ - '0';
732 if (d < 0 || d > 7) break; 753 if (d < 0 || d > 7) break;
733 int nx = x * 8 + d; 754 int nx = x * 8 + d;
734 if (nx >= 256) break; 755 if (nx >= 256) break;
735 x = nx; 756 x = nx;
757 if (recordRaw) {
758 AddRawLiteralChar(c0_);
759 }
736 Advance(); 760 Advance();
737 } 761 }
738 // Anything except '\0' is an octal escape sequence, illegal in strict mode. 762 // Anything except '\0' is an octal escape sequence, illegal in strict mode.
739 // Remember the position of octal escape sequences so that an error 763 // Remember the position of octal escape sequences so that an error
740 // can be reported later (in strict mode). 764 // can be reported later (in strict mode).
741 // We don't report the error immediately, because the octal escape can 765 // We don't report the error immediately, because the octal escape can
742 // occur before the "use strict" directive. 766 // occur before the "use strict" directive.
743 if (c != '0' || i > 0) { 767 if (c != '0' || i > 0) {
744 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); 768 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
745 } 769 }
(...skipping 17 matching lines...) Expand all
763 } 787 }
764 } 788 }
765 if (c0_ != quote) return Token::ILLEGAL; 789 if (c0_ != quote) return Token::ILLEGAL;
766 literal.Complete(); 790 literal.Complete();
767 791
768 Advance(); // consume quote 792 Advance(); // consume quote
769 return Token::STRING; 793 return Token::STRING;
770 } 794 }
771 795
772 796
797 Token::Value Scanner::ScanTemplateSpan() {
marja 2014/11/10 15:32:02 Hmm, I don't fully understand this function. Pls
caitp (gmail) 2014/11/10 15:43:56 literal_chars_ are being used as the TV or cooked
798 if (next_.token == Token::RBRACE) {
799 PushBack('}');
800 }
801 next_.location.beg_pos = source_pos();
802 Token::Value result = Token::ILLEGAL;
803 DCHECK(c0_ == '`' || c0_ == '}');
804 Advance(); // Consume ` or }
805
806 LiteralScope literal(this);
807 while (true) {
808 uc32 c = c0_;
809 Advance();
810 if (c == '`') {
811 result = Token::TEMPLATE_TAIL;
812 break;
813 } else if (c == '$' && c0_ == '{') {
814 Advance(); // Consume '{'
815 result = Token::TEMPLATE_SPAN;
816 break;
817 } else if (c == '\\') {
818 AddRawLiteralChar('\\');
819 if (unicode_cache_->IsLineTerminator(c0_)) {
820 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
821 // code unit sequence.
822 do {
823 uc32 lastChar = c0_;
824 Advance();
825 if (lastChar == '\r' && c0_ == '\n') Advance();
826 AddRawLiteralChar('\n');
827 } while (unicode_cache_->IsLineTerminator(c0_));
828 } else if (c0_ == '0') {
829 Advance();
830 AddRawLiteralChar('0');
831 AddLiteralChar('0');
832 } else {
833 ScanEscape(true);
834 }
835 } else if (c < 0) {
836 // Unterminated template literal
837 PushBack(c);
838 break;
839 } else {
840 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
841 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
842 // consisting of the CV 0x000A.
843 if (c == '\r') {
844 if (c0_ == '\n') Advance();
845 c = '\n';
846 }
847 AddLiteralChar(c);
848 AddRawLiteralChar(c);
849 }
850 }
851 literal.Complete();
852 next_.location.end_pos = source_pos();
853 next_.token = result;
854 return result;
855 }
856
857
773 void Scanner::ScanDecimalDigits() { 858 void Scanner::ScanDecimalDigits() {
774 while (IsDecimalDigit(c0_)) 859 while (IsDecimalDigit(c0_))
775 AddLiteralCharAdvance(); 860 AddLiteralCharAdvance();
776 } 861 }
777 862
778 863
779 Token::Value Scanner::ScanNumber(bool seen_period) { 864 Token::Value Scanner::ScanNumber(bool seen_period) {
780 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction 865 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
781 866
782 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; 867 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;
(...skipping 394 matching lines...) Expand 10 before | Expand all | Expand 10 after
1177 1262
1178 1263
1179 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) { 1264 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {
1180 if (is_literal_one_byte()) { 1265 if (is_literal_one_byte()) {
1181 return ast_value_factory->GetOneByteString(literal_one_byte_string()); 1266 return ast_value_factory->GetOneByteString(literal_one_byte_string());
1182 } 1267 }
1183 return ast_value_factory->GetTwoByteString(literal_two_byte_string()); 1268 return ast_value_factory->GetTwoByteString(literal_two_byte_string());
1184 } 1269 }
1185 1270
1186 1271
1272 const AstRawString* Scanner::CurrentRawSymbol(
1273 AstValueFactory* ast_value_factory) {
1274 if (is_raw_one_byte()) {
1275 return ast_value_factory->GetOneByteString(raw_one_byte_string());
1276 }
1277 return ast_value_factory->GetTwoByteString(raw_two_byte_string());
1278 }
1279
1280
1187 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { 1281 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {
1188 if (is_next_literal_one_byte()) { 1282 if (is_next_literal_one_byte()) {
1189 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); 1283 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
1190 } 1284 }
1191 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); 1285 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
1192 } 1286 }
1193 1287
1194 1288
1289 const AstRawString* Scanner::NextRawSymbol(AstValueFactory* ast_value_factory) {
1290 if (is_next_raw_one_byte()) {
1291 return ast_value_factory->GetOneByteString(next_raw_one_byte_string());
1292 }
1293 return ast_value_factory->GetTwoByteString(next_raw_two_byte_string());
1294 }
1295
1296
1195 double Scanner::DoubleValue() { 1297 double Scanner::DoubleValue() {
1196 DCHECK(is_literal_one_byte()); 1298 DCHECK(is_literal_one_byte());
1197 return StringToDouble( 1299 return StringToDouble(
1198 unicode_cache_, 1300 unicode_cache_,
1199 literal_one_byte_string(), 1301 literal_one_byte_string(),
1200 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); 1302 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
1201 } 1303 }
1202 1304
1203 1305
1204 int Scanner::FindNumber(DuplicateFinder* finder, int value) { 1306 int Scanner::FindNumber(DuplicateFinder* finder, int value) {
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after
1347 } 1449 }
1348 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); 1450 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
1349 } 1451 }
1350 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); 1452 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
1351 1453
1352 backing_store_.AddBlock(bytes); 1454 backing_store_.AddBlock(bytes);
1353 return backing_store_.EndSequence().start(); 1455 return backing_store_.EndSequence().start();
1354 } 1456 }
1355 1457
1356 } } // namespace v8::internal 1458 } } // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698