src/parsing/scanner.cc - Issue 2665513002: [parser] Lift template literal invalid escape restriction

Side by Side Diff: src/parsing/scanner.cc

Issue 2665513002: [parser] Lift template literal invalid escape restriction (Closed)

Patch Set: address comments Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/parsing/scanner.h"	7 #include "src/parsing/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

(...skipping 930 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
941 case '2': // fall through	941 case '2': // fall through

942 case '3': // fall through	942 case '3': // fall through

943 case '4': // fall through	943 case '4': // fall through

944 case '5': // fall through	944 case '5': // fall through

945 case '6': // fall through	945 case '6': // fall through

946 case '7':	946 case '7':

947 c = ScanOctalEscape<capture_raw>(c, 2);	947 c = ScanOctalEscape<capture_raw>(c, 2);

948 break;	948 break;

949 }	949 }

950	950

951 // According to ECMA-262, section 7.8.4, characters not covered by the	951 // Other escaped characters are interpreted as their non-escaped version.

952 // above cases should be illegal, but they are commonly handled as

953 // non-escaped characters by JS VMs.

954 AddLiteralChar(c);	952 AddLiteralChar(c);

955 return true;	953 return true;

956 }	954 }

957	955

958	956

959 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of

960 // ECMA-262. Other JS VMs support them.

961 template <bool capture_raw>	957 template <bool capture_raw>

962 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {	958 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {

963 uc32 x = c - '0';	959 uc32 x = c - '0';

964 int i = 0;	960 int i = 0;

965 for (; i < length; i++) {	961 for (; i < length; i++) {

966 int d = c0_ - '0';	962 int d = c0_ - '0';

967 if (d < 0 \|\| d > 7) break;	963 if (d < 0 \|\| d > 7) break;

968 int nx = x * 8 + d;	964 int nx = x * 8 + d;

969 if (nx >= 256) break;	965 if (nx >= 256) break;

970 x = nx;	966 x = nx;

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1032 // \| } LiteralChars* ${	1028 // \| } LiteralChars* ${

1033 //	1029 //

1034 // TEMPLATE_TAIL ::	1030 // TEMPLATE_TAIL ::

1035 // ` LiteralChars* `	1031 // ` LiteralChars* `

1036 // \| } LiteralChar* `	1032 // \| } LiteralChar* `

1037 //	1033 //

1038 // A TEMPLATE_SPAN should always be followed by an Expression, while a	1034 // A TEMPLATE_SPAN should always be followed by an Expression, while a

1039 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be	1035 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be

1040 // followed by an Expression.	1036 // followed by an Expression.

1041	1037

	1038 DCHECK(!has_error());

1042 Token::Value result = Token::TEMPLATE_SPAN;	1039 Token::Value result = Token::TEMPLATE_SPAN;

1043 LiteralScope literal(this);	1040 LiteralScope literal(this);

1044 StartRawLiteral();	1041 StartRawLiteral();

1045 const bool capture_raw = true;	1042 const bool capture_raw = true;

1046 const bool in_template_literal = true;	1043 const bool in_template_literal = true;

1047 while (true) {	1044 while (true) {

1048 uc32 c = c0_;	1045 uc32 c = c0_;

1049 Advance<capture_raw>();	1046 Advance<capture_raw>();

1050 if (c == '`') {	1047 if (c == '`') {

1051 result = Token::TEMPLATE_TAIL;	1048 result = Token::TEMPLATE_TAIL;

(...skipping 10 matching lines...) Expand all Loading...
1062 uc32 lastChar = c0_;	1059 uc32 lastChar = c0_;

1063 Advance<capture_raw>();	1060 Advance<capture_raw>();

1064 if (lastChar == '\r') {	1061 if (lastChar == '\r') {

1065 ReduceRawLiteralLength(1); // Remove \r	1062 ReduceRawLiteralLength(1); // Remove \r

1066 if (c0_ == '\n') {	1063 if (c0_ == '\n') {

1067 Advance<capture_raw>(); // Adds \n	1064 Advance<capture_raw>(); // Adds \n

1068 } else {	1065 } else {

1069 AddRawLiteralChar('\n');	1066 AddRawLiteralChar('\n');

1070 }	1067 }

1071 }	1068 }

1072 } else if (!ScanEscape<capture_raw, in_template_literal>()) {	1069 } else {

1073 return Token::ILLEGAL;	1070 ScanEscape<capture_raw, in_template_literal>();
	vogelheim 2017/02/17 16:33:53 In a template w/ two invalid escapes, this would r In a template w/ two invalid escapes, this would record the position of the last one, right? bakkot1 2017/02/17 20:42:18 It would. Fixed. Octal literals actually have the Show quoted text On 2017/02/17 16:33:53, vogelheim wrote: > In a template w/ two invalid escapes, this would record the position of the last > one, right? It would. Fixed. Octal literals actually have the same issue currently: we report the location of the last octal literal in a string or template literal. With this patch, we report the last in a string the first in a template literal. Actually, that exposes a bug, which I'll document elsewhere.
	1071 if (has_error()) {

	1072 // For templates, invalid escape sequence checking is handled in the

	1073 // parser.

	1074 invalid_template_escape_message_ = scanner_error_;

	1075 invalid_template_escape_location_ = scanner_error_location_;

	1076 scanner_error_ = MessageTemplate::kNone;

	1077 scanner_error_location_ = Location();

	1078 }

1074 }	1079 }

1075 } else if (c < 0) {	1080 } else if (c < 0) {

1076 // Unterminated template literal	1081 // Unterminated template literal

1077 PushBack(c);	1082 PushBack(c);

1078 break;	1083 break;

1079 } else {	1084 } else {

1080 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.	1085 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.

1081 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence	1086 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence

1082 // consisting of the CV 0x000A.	1087 // consisting of the CV 0x000A.

1083 if (c == '\r') {	1088 if (c == '\r') {

(...skipping 605 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1689 // 2, reset the source to the desired position,	1694 // 2, reset the source to the desired position,

1690 source_->Seek(position);	1695 source_->Seek(position);

1691 // 3, re-scan, by scanning the look-ahead char + 1 token (next_).	1696 // 3, re-scan, by scanning the look-ahead char + 1 token (next_).

1692 c0_ = source_->Advance();	1697 c0_ = source_->Advance();

1693 Next();	1698 Next();

1694 DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position));	1699 DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position));

1695 }	1700 }

1696	1701

1697 } // namespace internal	1702 } // namespace internal

1698 } // namespace v8	1703 } // namespace v8

OLD	NEW

« no previous file with comments | « src/parsing/scanner.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »