Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(212)

Side by Side Diff: src/parsing/scanner.cc

Issue 2724003006: [parser] Correctly handle invalid escapes in adjacent template tokens. (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include "src/parsing/scanner.h" 7 #include "src/parsing/scanner.h"
8 8
9 #include <stdint.h> 9 #include <stdint.h>
10 10
(...skipping 379 matching lines...) Expand 10 before | Expand all | Expand 10 after
390 has_multiline_comment_before_next_ = false; 390 has_multiline_comment_before_next_ = false;
391 if (static_cast<unsigned>(c0_) <= 0x7f) { 391 if (static_cast<unsigned>(c0_) <= 0x7f) {
392 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); 392 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
393 if (token != Token::ILLEGAL) { 393 if (token != Token::ILLEGAL) {
394 int pos = source_pos(); 394 int pos = source_pos();
395 next_.token = token; 395 next_.token = token;
396 next_.location.beg_pos = pos; 396 next_.location.beg_pos = pos;
397 next_.location.end_pos = pos + 1; 397 next_.location.end_pos = pos + 1;
398 next_.literal_chars = nullptr; 398 next_.literal_chars = nullptr;
399 next_.raw_literal_chars = nullptr; 399 next_.raw_literal_chars = nullptr;
400 next_.invalid_template_escape_message_ = MessageTemplate::kNone;
400 Advance(); 401 Advance();
401 return current_.token; 402 return current_.token;
402 } 403 }
403 } 404 }
404 Scan(); 405 Scan();
405 return current_.token; 406 return current_.token;
406 } 407 }
407 408
408 409
409 Token::Value Scanner::PeekAhead() { 410 Token::Value Scanner::PeekAhead() {
(...skipping 192 matching lines...) Expand 10 before | Expand all | Expand 10 after
602 return Token::LT; 603 return Token::LT;
603 } 604 }
604 605
605 found_html_comment_ = true; 606 found_html_comment_ = true;
606 return SkipSingleLineComment(); 607 return SkipSingleLineComment();
607 } 608 }
608 609
609 void Scanner::Scan() { 610 void Scanner::Scan() {
610 next_.literal_chars = NULL; 611 next_.literal_chars = NULL;
611 next_.raw_literal_chars = NULL; 612 next_.raw_literal_chars = NULL;
613 next_.invalid_template_escape_message_ = MessageTemplate::kNone;
612 Token::Value token; 614 Token::Value token;
613 do { 615 do {
614 // Remember the position of the next token 616 // Remember the position of the next token
615 next_.location.beg_pos = source_pos(); 617 next_.location.beg_pos = source_pos();
616 618
617 switch (c0_) { 619 switch (c0_) {
618 case ' ': 620 case ' ':
619 case '\t': 621 case '\t':
620 Advance(); 622 Advance();
621 token = Token::WHITESPACE; 623 token = Token::WHITESPACE;
(...skipping 260 matching lines...) Expand 10 before | Expand all | Expand 10 after
882 } 884 }
883 885
884 #ifdef DEBUG 886 #ifdef DEBUG
885 void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const { 887 void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
886 // Most tokens should not have literal_chars or even raw_literal chars. 888 // Most tokens should not have literal_chars or even raw_literal chars.
887 // The rules are: 889 // The rules are:
888 // - UNINITIALIZED: we don't care. 890 // - UNINITIALIZED: we don't care.
889 // - TEMPLATE_*: need both literal + raw literal chars. 891 // - TEMPLATE_*: need both literal + raw literal chars.
890 // - IDENTIFIERS, STRINGS, etc.: need a literal, but no raw literal. 892 // - IDENTIFIERS, STRINGS, etc.: need a literal, but no raw literal.
891 // - all others: should have neither. 893 // - all others: should have neither.
894 // Furthermore, only TEMPLATE_* tokens can have a
895 // invalid_template_escape_message_.
892 896
893 switch (token.token) { 897 switch (token.token) {
894 case Token::UNINITIALIZED: 898 case Token::UNINITIALIZED:
895 // token.literal_chars & other members might be garbage. That's ok. 899 // token.literal_chars & other members might be garbage. That's ok.
896 break; 900 break;
897 case Token::TEMPLATE_SPAN: 901 case Token::TEMPLATE_SPAN:
898 case Token::TEMPLATE_TAIL: 902 case Token::TEMPLATE_TAIL:
899 DCHECK_NOT_NULL(token.raw_literal_chars); 903 DCHECK_NOT_NULL(token.raw_literal_chars);
900 DCHECK_NOT_NULL(token.literal_chars); 904 DCHECK_NOT_NULL(token.literal_chars);
901 break; 905 break;
902 case Token::ESCAPED_KEYWORD: 906 case Token::ESCAPED_KEYWORD:
903 case Token::ESCAPED_STRICT_RESERVED_WORD: 907 case Token::ESCAPED_STRICT_RESERVED_WORD:
904 case Token::FUTURE_STRICT_RESERVED_WORD: 908 case Token::FUTURE_STRICT_RESERVED_WORD:
905 case Token::IDENTIFIER: 909 case Token::IDENTIFIER:
906 case Token::NUMBER: 910 case Token::NUMBER:
907 case Token::REGEXP_LITERAL: 911 case Token::REGEXP_LITERAL:
908 case Token::SMI: 912 case Token::SMI:
909 case Token::STRING: 913 case Token::STRING:
910 DCHECK_NOT_NULL(token.literal_chars); 914 DCHECK_NOT_NULL(token.literal_chars);
911 DCHECK_NULL(token.raw_literal_chars); 915 DCHECK_NULL(token.raw_literal_chars);
916 DCHECK_EQ(token.invalid_template_escape_message_, MessageTemplate::kNone);
912 break; 917 break;
913 default: 918 default:
914 DCHECK_NULL(token.literal_chars); 919 DCHECK_NULL(token.literal_chars);
915 DCHECK_NULL(token.raw_literal_chars); 920 DCHECK_NULL(token.raw_literal_chars);
921 DCHECK_EQ(token.invalid_template_escape_message_, MessageTemplate::kNone);
916 break; 922 break;
917 } 923 }
918 } 924 }
919 #endif // DEBUG 925 #endif // DEBUG
920 926
921 void Scanner::SeekForward(int pos) { 927 void Scanner::SeekForward(int pos) {
922 // After this call, we will have the token at the given position as 928 // After this call, we will have the token at the given position as
923 // the "next" token. The "current" token will be invalid. 929 // the "next" token. The "current" token will be invalid.
924 if (pos == next_.location.beg_pos) return; 930 if (pos == next_.location.beg_pos) return;
925 int current_pos = source_pos(); 931 int current_pos = source_pos();
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
1110 } else { 1116 } else {
1111 AddRawLiteralChar('\n'); 1117 AddRawLiteralChar('\n');
1112 } 1118 }
1113 } 1119 }
1114 } else { 1120 } else {
1115 bool success = ScanEscape<capture_raw, in_template_literal>(); 1121 bool success = ScanEscape<capture_raw, in_template_literal>();
1116 USE(success); 1122 USE(success);
1117 DCHECK_EQ(!success, has_error()); 1123 DCHECK_EQ(!success, has_error());
1118 // For templates, invalid escape sequence checking is handled in the 1124 // For templates, invalid escape sequence checking is handled in the
1119 // parser. 1125 // parser.
1120 scanner_error_state.MoveErrorTo(&invalid_template_escape_message_, 1126 scanner_error_state.MoveErrorTo(
vogelheim 2017/03/03 10:40:08 nitpick: With this change, it might be slightly ni
bakkot1 2017/03/03 20:50:17 TokenDesc is private in Scanner. I could make it p
vogelheim 2017/03/03 21:26:19 Oops. Hrmm, yeah, I think I asked you to take Erro
1121 &invalid_template_escape_location_); 1127 &next_.invalid_template_escape_message_,
1122 octal_error_state.MoveErrorTo(&invalid_template_escape_message_, 1128 &next_.invalid_template_escape_location_);
1123 &invalid_template_escape_location_); 1129 octal_error_state.MoveErrorTo(&next_.invalid_template_escape_message_,
1130 &next_.invalid_template_escape_location_);
1124 } 1131 }
1125 } else if (c < 0) { 1132 } else if (c < 0) {
1126 // Unterminated template literal 1133 // Unterminated template literal
1127 PushBack(c); 1134 PushBack(c);
1128 break; 1135 break;
1129 } else { 1136 } else {
1130 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A. 1137 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
1131 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence 1138 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
1132 // consisting of the CV 0x000A. 1139 // consisting of the CV 0x000A.
1133 if (c == '\r') { 1140 if (c == '\r') {
(...skipping 595 matching lines...) Expand 10 before | Expand all | Expand 10 after
1729 } 1736 }
1730 1737
1731 void Scanner::SeekNext(size_t position) { 1738 void Scanner::SeekNext(size_t position) {
1732 // Use with care: This cleanly resets most, but not all scanner state. 1739 // Use with care: This cleanly resets most, but not all scanner state.
1733 // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions. 1740 // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.
1734 1741
1735 // To re-scan from a given character position, we need to: 1742 // To re-scan from a given character position, we need to:
1736 // 1, Reset the current_, next_ and next_next_ tokens 1743 // 1, Reset the current_, next_ and next_next_ tokens
1737 // (next_ + next_next_ will be overwrittem by Next(), 1744 // (next_ + next_next_ will be overwrittem by Next(),
1738 // current_ will remain unchanged, so overwrite it fully.) 1745 // current_ will remain unchanged, so overwrite it fully.)
1739 current_ = {{0, 0}, nullptr, nullptr, 0, Token::UNINITIALIZED}; 1746 current_ = {
1747 {0, 0}, nullptr, nullptr, 0, Token::UNINITIALIZED, MessageTemplate::kNone,
1748 {0, 0}};
1740 next_.token = Token::UNINITIALIZED; 1749 next_.token = Token::UNINITIALIZED;
1741 next_next_.token = Token::UNINITIALIZED; 1750 next_next_.token = Token::UNINITIALIZED;
1742 // 2, reset the source to the desired position, 1751 // 2, reset the source to the desired position,
1743 source_->Seek(position); 1752 source_->Seek(position);
1744 // 3, re-scan, by scanning the look-ahead char + 1 token (next_). 1753 // 3, re-scan, by scanning the look-ahead char + 1 token (next_).
1745 c0_ = source_->Advance(); 1754 c0_ = source_->Advance();
1746 Next(); 1755 Next();
1747 DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position)); 1756 DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position));
1748 } 1757 }
1749 1758
1750 } // namespace internal 1759 } // namespace internal
1751 } // namespace v8 1760 } // namespace v8
OLDNEW
« src/parsing/scanner.h ('K') | « src/parsing/scanner.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698