Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(18)

Side by Side Diff: src/scanner.cc

Issue 663683006: Implement ES6 Template Literals (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Prevent fall-through to template token handlers Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« src/parser.cc ('K') | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <cmath> 9 #include <cmath>
10 10
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
47 // in object literals. 47 // in object literals.
48 Init(); 48 Init();
49 // Skip initial whitespace allowing HTML comment ends just like 49 // Skip initial whitespace allowing HTML comment ends just like
50 // after a newline and scan first token. 50 // after a newline and scan first token.
51 has_line_terminator_before_next_ = true; 51 has_line_terminator_before_next_ = true;
52 SkipWhiteSpace(); 52 SkipWhiteSpace();
53 Scan(); 53 Scan();
54 } 54 }
55 55
56 56
57 uc32 Scanner::ScanHexNumber(int expected_length) { 57 uc32 Scanner::ScanHexNumber(int expected_length, bool recordRaw) {
58 DCHECK(expected_length <= 4); // prevent overflow 58 DCHECK(expected_length <= 4); // prevent overflow
59 59
60 uc32 digits[4] = { 0, 0, 0, 0 }; 60 uc32 digits[4] = { 0, 0, 0, 0 };
61 uc32 x = 0; 61 uc32 x = 0;
62 for (int i = 0; i < expected_length; i++) { 62 for (int i = 0; i < expected_length; i++) {
63 digits[i] = c0_; 63 digits[i] = c0_;
64 int d = HexValue(c0_); 64 int d = HexValue(c0_);
65 if (d < 0) { 65 if (d < 0) {
66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes 66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes
67 // should be illegal, but other JS VMs just return the 67 // should be illegal, but other JS VMs just return the
68 // non-escaped version of the original character. 68 // non-escaped version of the original character.
69 69
70 // Push back digits that we have advanced past. 70 // Push back digits that we have advanced past.
71 for (int j = i-1; j >= 0; j--) { 71 for (int j = i-1; j >= 0; j--) {
72 PushBack(digits[j]); 72 PushBack(digits[j]);
73 } 73 }
74 return -1; 74 return -1;
75 } 75 }
76 x = x * 16 + d; 76 x = x * 16 + d;
77 if (recordRaw) {
78 AddRawLiteralChar(c0_);
79 }
77 Advance(); 80 Advance();
78 } 81 }
79 82
80 return x; 83 return x;
81 } 84 }
82 85
83 86
84 // Ensure that tokens can be stored in a byte. 87 // Ensure that tokens can be stored in a byte.
85 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); 88 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
86 89
(...skipping 317 matching lines...) Expand 10 before | Expand all | Expand 10 after
404 PushBack('-'); // undo Advance() 407 PushBack('-'); // undo Advance()
405 } 408 }
406 PushBack('!'); // undo Advance() 409 PushBack('!'); // undo Advance()
407 DCHECK(c0_ == '!'); 410 DCHECK(c0_ == '!');
408 return Token::LT; 411 return Token::LT;
409 } 412 }
410 413
411 414
412 void Scanner::Scan() { 415 void Scanner::Scan() {
413 next_.literal_chars = NULL; 416 next_.literal_chars = NULL;
417 next_.raw_literal_chars = NULL;
414 Token::Value token; 418 Token::Value token;
419
415 do { 420 do {
416 // Remember the position of the next token 421 // Remember the position of the next token
417 next_.location.beg_pos = source_pos(); 422 next_.location.beg_pos = source_pos();
418 423
419 switch (c0_) { 424 switch (c0_) {
420 case ' ': 425 case ' ':
421 case '\t': 426 case '\t':
422 Advance(); 427 Advance();
423 token = Token::WHITESPACE; 428 token = Token::WHITESPACE;
424 break; 429 break;
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
627 break; 632 break;
628 633
629 case '?': 634 case '?':
630 token = Select(Token::CONDITIONAL); 635 token = Select(Token::CONDITIONAL);
631 break; 636 break;
632 637
633 case '~': 638 case '~':
634 token = Select(Token::BIT_NOT); 639 token = Select(Token::BIT_NOT);
635 break; 640 break;
636 641
642 case '`':
643 if (HarmonyTemplates()) {
644 token = ScanTemplateSpan();
645 break;
646 }
647
637 default: 648 default:
638 if (unicode_cache_->IsIdentifierStart(c0_)) { 649 if (unicode_cache_->IsIdentifierStart(c0_)) {
639 token = ScanIdentifierOrKeyword(); 650 token = ScanIdentifierOrKeyword();
640 } else if (IsDecimalDigit(c0_)) { 651 } else if (IsDecimalDigit(c0_)) {
641 token = ScanNumber(false); 652 token = ScanNumber(false);
642 } else if (SkipWhiteSpace()) { 653 } else if (SkipWhiteSpace()) {
643 token = Token::WHITESPACE; 654 token = Token::WHITESPACE;
644 } else if (c0_ < 0) { 655 } else if (c0_ < 0) {
645 token = Token::EOS; 656 token = Token::EOS;
646 } else { 657 } else {
(...skipping 25 matching lines...) Expand all
672 // This function is only called to seek to the location 683 // This function is only called to seek to the location
673 // of the end of a function (at the "}" token). It doesn't matter 684 // of the end of a function (at the "}" token). It doesn't matter
674 // whether there was a line terminator in the part we skip. 685 // whether there was a line terminator in the part we skip.
675 has_line_terminator_before_next_ = false; 686 has_line_terminator_before_next_ = false;
676 has_multiline_comment_before_next_ = false; 687 has_multiline_comment_before_next_ = false;
677 } 688 }
678 Scan(); 689 Scan();
679 } 690 }
680 691
681 692
682 bool Scanner::ScanEscape() { 693 bool Scanner::ScanEscape(bool recordRaw) {
683 uc32 c = c0_; 694 uc32 c = c0_;
695 uc32 rc = c;
696 bool singleCharEscape = true;
684 Advance(); 697 Advance();
685 698
686 // Skip escaped newlines. 699 // Skip escaped newlines.
687 if (unicode_cache_->IsLineTerminator(c)) { 700 if (unicode_cache_->IsLineTerminator(c)) {
688 // Allow CR+LF newlines in multiline string literals. 701 // Allow CR+LF newlines in multiline string literals.
689 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); 702 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
690 // Allow LF+CR newlines in multiline string literals. 703 // Allow LF+CR newlines in multiline string literals.
691 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); 704 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
692 return true; 705 return true;
693 } 706 }
694 707
695 switch (c) { 708 switch (c) {
696 case '\'': // fall through 709 case '\'': // fall through
697 case '"' : // fall through 710 case '"' : // fall through
698 case '\\': break; 711 case '\\': break;
699 case 'b' : c = '\b'; break; 712 case 'b' : c = '\b'; break;
700 case 'f' : c = '\f'; break; 713 case 'f' : c = '\f'; break;
701 case 'n' : c = '\n'; break; 714 case 'n' : c = '\n'; break;
702 case 'r' : c = '\r'; break; 715 case 'r' : c = '\r'; break;
703 case 't' : c = '\t'; break; 716 case 't' : c = '\t'; break;
704 case 'u' : { 717 case 'u' : {
705 c = ScanHexNumber(4); 718 if (recordRaw) AddRawLiteralChar('u');
719 singleCharEscape = false;
720 c = ScanHexNumber(4, recordRaw);
706 if (c < 0) return false; 721 if (c < 0) return false;
707 break; 722 break;
708 } 723 }
709 case 'v' : c = '\v'; break; 724 case 'v' : c = '\v'; break;
710 case 'x' : { 725 case 'x' : {
711 c = ScanHexNumber(2); 726 if (recordRaw) AddRawLiteralChar('x');
727 singleCharEscape = false;
728 c = ScanHexNumber(2, recordRaw);
712 if (c < 0) return false; 729 if (c < 0) return false;
713 break; 730 break;
714 } 731 }
715 case '0' : // fall through 732 case '0' : // fall through
716 case '1' : // fall through 733 case '1' : // fall through
717 case '2' : // fall through 734 case '2' : // fall through
718 case '3' : // fall through 735 case '3' : // fall through
719 case '4' : // fall through 736 case '4' : // fall through
720 case '5' : // fall through 737 case '5' : // fall through
721 case '6' : // fall through 738 case '6' : // fall through
722 case '7' : c = ScanOctalEscape(c, 2); break; 739 case '7':
740 singleCharEscape = false;
741 c = ScanOctalEscape(c, 2, recordRaw);
742 break;
723 } 743 }
724 744
725 // According to ECMA-262, section 7.8.4, characters not covered by the 745 // According to ECMA-262, section 7.8.4, characters not covered by the
726 // above cases should be illegal, but they are commonly handled as 746 // above cases should be illegal, but they are commonly handled as
727 // non-escaped characters by JS VMs. 747 // non-escaped characters by JS VMs.
748 if (singleCharEscape && recordRaw) AddRawLiteralChar(rc);
728 AddLiteralChar(c); 749 AddLiteralChar(c);
729 return true; 750 return true;
730 } 751 }
731 752
732 753
733 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of 754 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
734 // ECMA-262. Other JS VMs support them. 755 // ECMA-262. Other JS VMs support them.
735 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { 756 uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool recordRaw) {
736 uc32 x = c - '0'; 757 uc32 x = c - '0';
737 int i = 0; 758 int i = 0;
738 for (; i < length; i++) { 759 for (; i < length; i++) {
739 int d = c0_ - '0'; 760 int d = c0_ - '0';
740 if (d < 0 || d > 7) break; 761 if (d < 0 || d > 7) break;
741 int nx = x * 8 + d; 762 int nx = x * 8 + d;
742 if (nx >= 256) break; 763 if (nx >= 256) break;
743 x = nx; 764 x = nx;
765 if (recordRaw) {
766 AddRawLiteralChar(c0_);
767 }
744 Advance(); 768 Advance();
745 } 769 }
746 // Anything except '\0' is an octal escape sequence, illegal in strict mode. 770 // Anything except '\0' is an octal escape sequence, illegal in strict mode.
747 // Remember the position of octal escape sequences so that an error 771 // Remember the position of octal escape sequences so that an error
748 // can be reported later (in strict mode). 772 // can be reported later (in strict mode).
749 // We don't report the error immediately, because the octal escape can 773 // We don't report the error immediately, because the octal escape can
750 // occur before the "use strict" directive. 774 // occur before the "use strict" directive.
751 if (c != '0' || i > 0) { 775 if (c != '0' || i > 0) {
752 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); 776 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
753 } 777 }
(...skipping 17 matching lines...) Expand all
771 } 795 }
772 } 796 }
773 if (c0_ != quote) return Token::ILLEGAL; 797 if (c0_ != quote) return Token::ILLEGAL;
774 literal.Complete(); 798 literal.Complete();
775 799
776 Advance(); // consume quote 800 Advance(); // consume quote
777 return Token::STRING; 801 return Token::STRING;
778 } 802 }
779 803
780 804
805 Token::Value Scanner::ScanTemplateSpan() {
806 if (next_.token == Token::RBRACE) {
807 PushBack('}');
808 }
809 next_.location.beg_pos = source_pos();
810 Token::Value result = Token::ILLEGAL;
811 DCHECK(c0_ == '`' || c0_ == '}');
812 Advance(); // Consume ` or }
813
814 LiteralScope literal(this);
815 while (true) {
816 uc32 c = c0_;
817 Advance();
818 if (c == '`') {
819 result = Token::TEMPLATE_TAIL;
820 break;
821 } else if (c == '$' && c0_ == '{') {
822 Advance(); // Consume '{'
823 result = Token::TEMPLATE_SPAN;
824 break;
825 } else if (c == '\\') {
826 AddRawLiteralChar('\\');
827 if (unicode_cache_->IsLineTerminator(c0_)) {
828 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
829 // code unit sequence.
830 do {
831 uc32 lastChar = c0_;
832 Advance();
833 if (lastChar == '\r' && c0_ == '\n') Advance();
834 AddRawLiteralChar('\n');
835 } while (unicode_cache_->IsLineTerminator(c0_));
836 } else if (c0_ == '0') {
837 Advance();
838 AddRawLiteralChar('0');
839 AddLiteralChar('0');
840 } else {
841 ScanEscape(true);
842 }
843 } else if (c < 0) {
844 // Unterminated template literal
845 PushBack(c);
846 break;
847 } else {
848 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
849 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
850 // consisting of the CV 0x000A.
851 if (c == '\r') {
852 if (c0_ == '\n') Advance();
853 c = '\n';
854 }
855 AddLiteralChar(c);
856 AddRawLiteralChar(c);
857 }
858 }
859 literal.Complete();
860 next_.location.end_pos = source_pos();
861 next_.token = result;
862 return result;
863 }
864
865
781 void Scanner::ScanDecimalDigits() { 866 void Scanner::ScanDecimalDigits() {
782 while (IsDecimalDigit(c0_)) 867 while (IsDecimalDigit(c0_))
783 AddLiteralCharAdvance(); 868 AddLiteralCharAdvance();
784 } 869 }
785 870
786 871
787 Token::Value Scanner::ScanNumber(bool seen_period) { 872 Token::Value Scanner::ScanNumber(bool seen_period) {
788 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction 873 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
789 874
790 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; 875 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL;
(...skipping 403 matching lines...) Expand 10 before | Expand all | Expand 10 after
1194 1279
1195 1280
1196 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) { 1281 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {
1197 if (is_literal_one_byte()) { 1282 if (is_literal_one_byte()) {
1198 return ast_value_factory->GetOneByteString(literal_one_byte_string()); 1283 return ast_value_factory->GetOneByteString(literal_one_byte_string());
1199 } 1284 }
1200 return ast_value_factory->GetTwoByteString(literal_two_byte_string()); 1285 return ast_value_factory->GetTwoByteString(literal_two_byte_string());
1201 } 1286 }
1202 1287
1203 1288
1289 const AstRawString* Scanner::CurrentRawSymbol(
1290 AstValueFactory* ast_value_factory) {
1291 if (is_raw_one_byte()) {
1292 return ast_value_factory->GetOneByteString(raw_one_byte_string());
1293 }
1294 return ast_value_factory->GetTwoByteString(raw_two_byte_string());
1295 }
1296
1297
1204 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { 1298 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {
1205 if (is_next_literal_one_byte()) { 1299 if (is_next_literal_one_byte()) {
1206 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); 1300 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
1207 } 1301 }
1208 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); 1302 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
1209 } 1303 }
1210 1304
1211 1305
1306 const AstRawString* Scanner::NextRawSymbol(AstValueFactory* ast_value_factory) {
1307 if (is_next_raw_one_byte()) {
1308 return ast_value_factory->GetOneByteString(next_raw_one_byte_string());
1309 }
1310 return ast_value_factory->GetTwoByteString(next_raw_two_byte_string());
1311 }
1312
1313
1212 double Scanner::DoubleValue() { 1314 double Scanner::DoubleValue() {
1213 DCHECK(is_literal_one_byte()); 1315 DCHECK(is_literal_one_byte());
1214 return StringToDouble( 1316 return StringToDouble(
1215 unicode_cache_, 1317 unicode_cache_,
1216 literal_one_byte_string(), 1318 literal_one_byte_string(),
1217 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); 1319 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
1218 } 1320 }
1219 1321
1220 1322
1221 int Scanner::FindNumber(DuplicateFinder* finder, int value) { 1323 int Scanner::FindNumber(DuplicateFinder* finder, int value) {
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after
1364 } 1466 }
1365 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); 1467 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
1366 } 1468 }
1367 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); 1469 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
1368 1470
1369 backing_store_.AddBlock(bytes); 1471 backing_store_.AddBlock(bytes);
1370 return backing_store_.EndSequence().start(); 1472 return backing_store_.EndSequence().start();
1371 } 1473 }
1372 1474
1373 } } // namespace v8::internal 1475 } } // namespace v8::internal
OLDNEW
« src/parser.cc ('K') | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698