src/parsing/scanner.cc - Issue 2724003006: [parser] Correctly handle invalid escapes in adjacent template tokens.

Side by Side Diff: src/parsing/scanner.cc

Issue 2724003006: [parser] Correctly handle invalid escapes in adjacent template tokens. (Closed)

Patch Set: Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/parsing/scanner.h"	7 #include "src/parsing/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

(...skipping 379 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
390 has_multiline_comment_before_next_ = false;	390 has_multiline_comment_before_next_ = false;

391 if (static_cast<unsigned>(c0_) <= 0x7f) {	391 if (static_cast<unsigned>(c0_) <= 0x7f) {

392 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);	392 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);

393 if (token != Token::ILLEGAL) {	393 if (token != Token::ILLEGAL) {

394 int pos = source_pos();	394 int pos = source_pos();

395 next_.token = token;	395 next_.token = token;

396 next_.location.beg_pos = pos;	396 next_.location.beg_pos = pos;

397 next_.location.end_pos = pos + 1;	397 next_.location.end_pos = pos + 1;

398 next_.literal_chars = nullptr;	398 next_.literal_chars = nullptr;

399 next_.raw_literal_chars = nullptr;	399 next_.raw_literal_chars = nullptr;

	400 next_.invalid_template_escape_message_ = MessageTemplate::kNone;

400 Advance();	401 Advance();

401 return current_.token;	402 return current_.token;

402 }	403 }

403 }	404 }

404 Scan();	405 Scan();

405 return current_.token;	406 return current_.token;

406 }	407 }

407	408

408	409

409 Token::Value Scanner::PeekAhead() {	410 Token::Value Scanner::PeekAhead() {

(...skipping 192 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
602 return Token::LT;	603 return Token::LT;

603 }	604 }

604	605

605 found_html_comment_ = true;	606 found_html_comment_ = true;

606 return SkipSingleLineComment();	607 return SkipSingleLineComment();

607 }	608 }

608	609

609 void Scanner::Scan() {	610 void Scanner::Scan() {

610 next_.literal_chars = NULL;	611 next_.literal_chars = NULL;

611 next_.raw_literal_chars = NULL;	612 next_.raw_literal_chars = NULL;

	613 next_.invalid_template_escape_message_ = MessageTemplate::kNone;

612 Token::Value token;	614 Token::Value token;

613 do {	615 do {

614 // Remember the position of the next token	616 // Remember the position of the next token

615 next_.location.beg_pos = source_pos();	617 next_.location.beg_pos = source_pos();

616	618

617 switch (c0_) {	619 switch (c0_) {

618 case ' ':	620 case ' ':

619 case '\t':	621 case '\t':

620 Advance();	622 Advance();

621 token = Token::WHITESPACE;	623 token = Token::WHITESPACE;

(...skipping 260 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
882 }	884 }

883	885

884 #ifdef DEBUG	886 #ifdef DEBUG

885 void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {	887 void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {

886 // Most tokens should not have literal_chars or even raw_literal chars.	888 // Most tokens should not have literal_chars or even raw_literal chars.

887 // The rules are:	889 // The rules are:

888 // - UNINITIALIZED: we don't care.	890 // - UNINITIALIZED: we don't care.

889 // - TEMPLATE_*: need both literal + raw literal chars.	891 // - TEMPLATE_*: need both literal + raw literal chars.

890 // - IDENTIFIERS, STRINGS, etc.: need a literal, but no raw literal.	892 // - IDENTIFIERS, STRINGS, etc.: need a literal, but no raw literal.

891 // - all others: should have neither.	893 // - all others: should have neither.

	894 // Furthermore, only TEMPLATE_* tokens can have a

	895 // invalid_template_escape_message_.

892	896

893 switch (token.token) {	897 switch (token.token) {

894 case Token::UNINITIALIZED:	898 case Token::UNINITIALIZED:

895 // token.literal_chars & other members might be garbage. That's ok.	899 // token.literal_chars & other members might be garbage. That's ok.

896 break;	900 break;

897 case Token::TEMPLATE_SPAN:	901 case Token::TEMPLATE_SPAN:

898 case Token::TEMPLATE_TAIL:	902 case Token::TEMPLATE_TAIL:

899 DCHECK_NOT_NULL(token.raw_literal_chars);	903 DCHECK_NOT_NULL(token.raw_literal_chars);

900 DCHECK_NOT_NULL(token.literal_chars);	904 DCHECK_NOT_NULL(token.literal_chars);

901 break;	905 break;

902 case Token::ESCAPED_KEYWORD:	906 case Token::ESCAPED_KEYWORD:

903 case Token::ESCAPED_STRICT_RESERVED_WORD:	907 case Token::ESCAPED_STRICT_RESERVED_WORD:

904 case Token::FUTURE_STRICT_RESERVED_WORD:	908 case Token::FUTURE_STRICT_RESERVED_WORD:

905 case Token::IDENTIFIER:	909 case Token::IDENTIFIER:

906 case Token::NUMBER:	910 case Token::NUMBER:

907 case Token::REGEXP_LITERAL:	911 case Token::REGEXP_LITERAL:

908 case Token::SMI:	912 case Token::SMI:

909 case Token::STRING:	913 case Token::STRING:

910 DCHECK_NOT_NULL(token.literal_chars);	914 DCHECK_NOT_NULL(token.literal_chars);

911 DCHECK_NULL(token.raw_literal_chars);	915 DCHECK_NULL(token.raw_literal_chars);

	916 DCHECK_EQ(token.invalid_template_escape_message_, MessageTemplate::kNone);

912 break;	917 break;

913 default:	918 default:

914 DCHECK_NULL(token.literal_chars);	919 DCHECK_NULL(token.literal_chars);

915 DCHECK_NULL(token.raw_literal_chars);	920 DCHECK_NULL(token.raw_literal_chars);

	921 DCHECK_EQ(token.invalid_template_escape_message_, MessageTemplate::kNone);

916 break;	922 break;

917 }	923 }

918 }	924 }

919 #endif // DEBUG	925 #endif // DEBUG

920	926

921 void Scanner::SeekForward(int pos) {	927 void Scanner::SeekForward(int pos) {

922 // After this call, we will have the token at the given position as	928 // After this call, we will have the token at the given position as

923 // the "next" token. The "current" token will be invalid.	929 // the "next" token. The "current" token will be invalid.

924 if (pos == next_.location.beg_pos) return;	930 if (pos == next_.location.beg_pos) return;

925 int current_pos = source_pos();	931 int current_pos = source_pos();

(...skipping 184 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1110 } else {	1116 } else {

1111 AddRawLiteralChar('\n');	1117 AddRawLiteralChar('\n');

1112 }	1118 }

1113 }	1119 }

1114 } else {	1120 } else {

1115 bool success = ScanEscape<capture_raw, in_template_literal>();	1121 bool success = ScanEscape<capture_raw, in_template_literal>();

1116 USE(success);	1122 USE(success);

1117 DCHECK_EQ(!success, has_error());	1123 DCHECK_EQ(!success, has_error());

1118 // For templates, invalid escape sequence checking is handled in the	1124 // For templates, invalid escape sequence checking is handled in the

1119 // parser.	1125 // parser.

1120 scanner_error_state.MoveErrorTo(&invalid_template_escape_message_,	1126 scanner_error_state.MoveErrorTo(
	vogelheim 2017/03/03 10:40:08 nitpick: With this change, it might be slightly ni nitpick: With this change, it might be slightly nicer to change the signature of MoveErrorTo to MoveErrorTo(TokenDesc&), since now there's a 'thing' that contains both of the variables we're trying to modify here. bakkot1 2017/03/03 20:50:17 TokenDesc is private in Scanner. I could make it p Show quoted text On 2017/03/03 10:40:08, vogelheim wrote: > nitpick: With this change, it might be slightly nicer to change the signature of > MoveErrorTo to MoveErrorTo(TokenDesc&), since now there's a 'thing' that > contains both of the variables we're trying to modify here. TokenDesc is private in Scanner. I could make it public, I guess. On the other hand, at that point it could just be "MoveErrorToNextTokenDesc" and take no parameters at all. Thoughts? vogelheim 2017/03/03 21:26:19 Oops. Hrmm, yeah, I think I asked you to take Erro Show quoted text On 2017/03/03 20:50:17, bakkot1 wrote: > On 2017/03/03 10:40:08, vogelheim wrote: > > nitpick: With this change, it might be slightly nicer to change the signature > of > > MoveErrorTo to MoveErrorTo(TokenDesc&), since now there's a 'thing' that > > contains both of the variables we're trying to modify here. > > TokenDesc is private in Scanner. I could make it public, I guess. > > On the other hand, at that point it could just be "MoveErrorToNextTokenDesc" > and take no parameters at all. Thoughts? Oops. Hrmm, yeah, I think I asked you to take ErrorState out of the header... :-/ Alternatives: - Leave things as they are. - Make TokenDesc public. I'm not fond of it. - Make ErrorState a friend of Scanner. - Forward declare Scanner::ErrorState. I think I'd prefer last or second-to-last, but am happy to let you have the last word. (What I mean with the last one is: Declare: class ErrorState; in the private:-section of Scanner, and in Scanner.cc implement it as Scanner::ErrorState. When I try it locally (without the rest of your patch), this seems to work and gives Scanner::ErrorState full access to Scanner, without polluting the header.)
1121 &invalid_template_escape_location_);	1127 &next_.invalid_template_escape_message_,

1122 octal_error_state.MoveErrorTo(&invalid_template_escape_message_,	1128 &next_.invalid_template_escape_location_);

1123 &invalid_template_escape_location_);	1129 octal_error_state.MoveErrorTo(&next_.invalid_template_escape_message_,

	1130 &next_.invalid_template_escape_location_);

1124 }	1131 }

1125 } else if (c < 0) {	1132 } else if (c < 0) {

1126 // Unterminated template literal	1133 // Unterminated template literal

1127 PushBack(c);	1134 PushBack(c);

1128 break;	1135 break;

1129 } else {	1136 } else {

1130 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.	1137 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.

1131 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence	1138 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence

1132 // consisting of the CV 0x000A.	1139 // consisting of the CV 0x000A.

1133 if (c == '\r') {	1140 if (c == '\r') {

(...skipping 595 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1729 }	1736 }

1730	1737

1731 void Scanner::SeekNext(size_t position) {	1738 void Scanner::SeekNext(size_t position) {

1732 // Use with care: This cleanly resets most, but not all scanner state.	1739 // Use with care: This cleanly resets most, but not all scanner state.

1733 // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.	1740 // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.

1734	1741

1735 // To re-scan from a given character position, we need to:	1742 // To re-scan from a given character position, we need to:

1736 // 1, Reset the current_, next_ and next_next_ tokens	1743 // 1, Reset the current_, next_ and next_next_ tokens

1737 // (next_ + next_next_ will be overwrittem by Next(),	1744 // (next_ + next_next_ will be overwrittem by Next(),

1738 // current_ will remain unchanged, so overwrite it fully.)	1745 // current_ will remain unchanged, so overwrite it fully.)

1739 current_ = {{0, 0}, nullptr, nullptr, 0, Token::UNINITIALIZED};	1746 current_ = {

	1747 {0, 0}, nullptr, nullptr, 0, Token::UNINITIALIZED, MessageTemplate::kNone,

	1748 {0, 0}};

1740 next_.token = Token::UNINITIALIZED;	1749 next_.token = Token::UNINITIALIZED;

1741 next_next_.token = Token::UNINITIALIZED;	1750 next_next_.token = Token::UNINITIALIZED;

1742 // 2, reset the source to the desired position,	1751 // 2, reset the source to the desired position,

1743 source_->Seek(position);	1752 source_->Seek(position);

1744 // 3, re-scan, by scanning the look-ahead char + 1 token (next_).	1753 // 3, re-scan, by scanning the look-ahead char + 1 token (next_).

1745 c0_ = source_->Advance();	1754 c0_ = source_->Advance();

1746 Next();	1755 Next();

1747 DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position));	1756 DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position));

1748 }	1757 }

1749	1758

1750 } // namespace internal	1759 } // namespace internal

1751 } // namespace v8	1760 } // namespace v8

OLD	NEW

« src/parsing/scanner.h ('K') | « src/parsing/scanner.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »