Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(168)

Side by Side Diff: src/parsing/scanner.cc

Issue 2665513002: [parser] Lift template literal invalid escape restriction (Closed)
Patch Set: rebase again, almost certainly not necessary Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/parsing/scanner.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include "src/parsing/scanner.h" 7 #include "src/parsing/scanner.h"
8 8
9 #include <stdint.h> 9 #include <stdint.h>
10 10
11 #include <cmath> 11 #include <cmath>
12 12
13 #include "src/ast/ast-value-factory.h" 13 #include "src/ast/ast-value-factory.h"
14 #include "src/char-predicates-inl.h" 14 #include "src/char-predicates-inl.h"
15 #include "src/conversions-inl.h" 15 #include "src/conversions-inl.h"
16 #include "src/list-inl.h" 16 #include "src/list-inl.h"
17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol 17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol
18 18
19 namespace v8 { 19 namespace v8 {
20 namespace internal { 20 namespace internal {
21 21
22 // Scoped helper for saving & restoring scanner error state.
23 // This is used for tagged template literals, in which normally forbidden
24 // escape sequences are allowed.
25 class ErrorState {
26 public:
27 ErrorState(MessageTemplate::Template* message_stack,
28 Scanner::Location* location_stack)
29 : message_stack_(message_stack),
30 old_message_(*message_stack),
31 location_stack_(location_stack),
32 old_location_(*location_stack) {
33 *message_stack_ = MessageTemplate::kNone;
34 *location_stack_ = Scanner::Location::invalid();
35 }
36
37 ~ErrorState() {
38 *message_stack_ = old_message_;
39 *location_stack_ = old_location_;
40 }
41
42 void MoveErrorTo(MessageTemplate::Template* message_dest,
43 Scanner::Location* location_dest) {
44 if (*message_stack_ == MessageTemplate::kNone) {
45 return;
46 }
47 if (*message_dest == MessageTemplate::kNone) {
48 *message_dest = *message_stack_;
49 *location_dest = *location_stack_;
50 }
51 *message_stack_ = MessageTemplate::kNone;
52 *location_stack_ = Scanner::Location::invalid();
53 }
54
55 private:
56 MessageTemplate::Template* const message_stack_;
57 MessageTemplate::Template const old_message_;
58 Scanner::Location* const location_stack_;
59 Scanner::Location const old_location_;
60 };
61
22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { 62 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
23 if (is_one_byte()) { 63 if (is_one_byte()) {
24 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); 64 return isolate->factory()->InternalizeOneByteString(one_byte_literal());
25 } 65 }
26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); 66 return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
27 } 67 }
28 68
29 int Scanner::LiteralBuffer::NewCapacity(int min_capacity) { 69 int Scanner::LiteralBuffer::NewCapacity(int min_capacity) {
30 int capacity = Max(min_capacity, backing_store_.length()); 70 int capacity = Max(min_capacity, backing_store_.length());
31 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); 71 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
(...skipping 909 matching lines...) Expand 10 before | Expand all | Expand 10 after
941 case '2': // fall through 981 case '2': // fall through
942 case '3': // fall through 982 case '3': // fall through
943 case '4': // fall through 983 case '4': // fall through
944 case '5': // fall through 984 case '5': // fall through
945 case '6': // fall through 985 case '6': // fall through
946 case '7': 986 case '7':
947 c = ScanOctalEscape<capture_raw>(c, 2); 987 c = ScanOctalEscape<capture_raw>(c, 2);
948 break; 988 break;
949 } 989 }
950 990
951 // According to ECMA-262, section 7.8.4, characters not covered by the 991 // Other escaped characters are interpreted as their non-escaped version.
952 // above cases should be illegal, but they are commonly handled as
953 // non-escaped characters by JS VMs.
954 AddLiteralChar(c); 992 AddLiteralChar(c);
955 return true; 993 return true;
956 } 994 }
957 995
958 996
959 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
960 // ECMA-262. Other JS VMs support them.
961 template <bool capture_raw> 997 template <bool capture_raw>
962 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { 998 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
963 uc32 x = c - '0'; 999 uc32 x = c - '0';
964 int i = 0; 1000 int i = 0;
965 for (; i < length; i++) { 1001 for (; i < length; i++) {
966 int d = c0_ - '0'; 1002 int d = c0_ - '0';
967 if (d < 0 || d > 7) break; 1003 if (d < 0 || d > 7) break;
968 int nx = x * 8 + d; 1004 int nx = x * 8 + d;
969 if (nx >= 256) break; 1005 if (nx >= 256) break;
970 x = nx; 1006 x = nx;
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
1032 // | } LiteralChars* ${ 1068 // | } LiteralChars* ${
1033 // 1069 //
1034 // TEMPLATE_TAIL :: 1070 // TEMPLATE_TAIL ::
1035 // ` LiteralChars* ` 1071 // ` LiteralChars* `
1036 // | } LiteralChar* ` 1072 // | } LiteralChar* `
1037 // 1073 //
1038 // A TEMPLATE_SPAN should always be followed by an Expression, while a 1074 // A TEMPLATE_SPAN should always be followed by an Expression, while a
1039 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be 1075 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be
1040 // followed by an Expression. 1076 // followed by an Expression.
1041 1077
1078 // These scoped helpers save and restore the original error state, so that we
1079 // can specially treat invalid escape sequences in templates (which are
1080 // handled by the parser).
1081 ErrorState scanner_error_state(&scanner_error_, &scanner_error_location_);
1082 ErrorState octal_error_state(&octal_message_, &octal_pos_);
1083
1042 Token::Value result = Token::TEMPLATE_SPAN; 1084 Token::Value result = Token::TEMPLATE_SPAN;
1043 LiteralScope literal(this); 1085 LiteralScope literal(this);
1044 StartRawLiteral(); 1086 StartRawLiteral();
1045 const bool capture_raw = true; 1087 const bool capture_raw = true;
1046 const bool in_template_literal = true; 1088 const bool in_template_literal = true;
1047 while (true) { 1089 while (true) {
1048 uc32 c = c0_; 1090 uc32 c = c0_;
1049 Advance<capture_raw>(); 1091 Advance<capture_raw>();
1050 if (c == '`') { 1092 if (c == '`') {
1051 result = Token::TEMPLATE_TAIL; 1093 result = Token::TEMPLATE_TAIL;
(...skipping 10 matching lines...) Expand all
1062 uc32 lastChar = c0_; 1104 uc32 lastChar = c0_;
1063 Advance<capture_raw>(); 1105 Advance<capture_raw>();
1064 if (lastChar == '\r') { 1106 if (lastChar == '\r') {
1065 ReduceRawLiteralLength(1); // Remove \r 1107 ReduceRawLiteralLength(1); // Remove \r
1066 if (c0_ == '\n') { 1108 if (c0_ == '\n') {
1067 Advance<capture_raw>(); // Adds \n 1109 Advance<capture_raw>(); // Adds \n
1068 } else { 1110 } else {
1069 AddRawLiteralChar('\n'); 1111 AddRawLiteralChar('\n');
1070 } 1112 }
1071 } 1113 }
1072 } else if (!ScanEscape<capture_raw, in_template_literal>()) { 1114 } else {
1073 return Token::ILLEGAL; 1115 bool success = ScanEscape<capture_raw, in_template_literal>();
1116 DCHECK_EQ(!success, has_error());
adamk 2017/02/22 20:45:36 You could add USE(success); to avoid the failure
bakkot1 2017/02/22 20:56:44 Ah, thanks. Once more with feeling...
1117 // For templates, invalid escape sequence checking is handled in the
1118 // parser.
1119 scanner_error_state.MoveErrorTo(&invalid_template_escape_message_,
1120 &invalid_template_escape_location_);
1121 octal_error_state.MoveErrorTo(&invalid_template_escape_message_,
1122 &invalid_template_escape_location_);
1074 } 1123 }
1075 } else if (c < 0) { 1124 } else if (c < 0) {
1076 // Unterminated template literal 1125 // Unterminated template literal
1077 PushBack(c); 1126 PushBack(c);
1078 break; 1127 break;
1079 } else { 1128 } else {
1080 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A. 1129 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
1081 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence 1130 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
1082 // consisting of the CV 0x000A. 1131 // consisting of the CV 0x000A.
1083 if (c == '\r') { 1132 if (c == '\r') {
1084 ReduceRawLiteralLength(1); // Remove \r 1133 ReduceRawLiteralLength(1); // Remove \r
1085 if (c0_ == '\n') { 1134 if (c0_ == '\n') {
1086 Advance<capture_raw>(); // Adds \n 1135 Advance<capture_raw>(); // Adds \n
1087 } else { 1136 } else {
1088 AddRawLiteralChar('\n'); 1137 AddRawLiteralChar('\n');
1089 } 1138 }
1090 c = '\n'; 1139 c = '\n';
1091 } 1140 }
1092 AddLiteralChar(c); 1141 AddLiteralChar(c);
1093 } 1142 }
1094 } 1143 }
1095 literal.Complete(); 1144 literal.Complete();
1096 next_.location.end_pos = source_pos(); 1145 next_.location.end_pos = source_pos();
1097 next_.token = result; 1146 next_.token = result;
1147
1098 return result; 1148 return result;
1099 } 1149 }
1100 1150
1101 1151
1102 Token::Value Scanner::ScanTemplateStart() { 1152 Token::Value Scanner::ScanTemplateStart() {
1103 DCHECK(next_next_.token == Token::UNINITIALIZED); 1153 DCHECK(next_next_.token == Token::UNINITIALIZED);
1104 DCHECK(c0_ == '`'); 1154 DCHECK(c0_ == '`');
1105 next_.location.beg_pos = source_pos(); 1155 next_.location.beg_pos = source_pos();
1106 Advance(); // Consume ` 1156 Advance(); // Consume `
1107 return ScanTemplateSpan(); 1157 return ScanTemplateSpan();
(...skipping 583 matching lines...) Expand 10 before | Expand all | Expand 10 after
1691 // 2, reset the source to the desired position, 1741 // 2, reset the source to the desired position,
1692 source_->Seek(position); 1742 source_->Seek(position);
1693 // 3, re-scan, by scanning the look-ahead char + 1 token (next_). 1743 // 3, re-scan, by scanning the look-ahead char + 1 token (next_).
1694 c0_ = source_->Advance(); 1744 c0_ = source_->Advance();
1695 Next(); 1745 Next();
1696 DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position)); 1746 DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position));
1697 } 1747 }
1698 1748
1699 } // namespace internal 1749 } // namespace internal
1700 } // namespace v8 1750 } // namespace v8
OLDNEW
« no previous file with comments | « src/parsing/scanner.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698