src/scanner.cc - Issue 1429983002: [es6] early error when Identifier is an escaped reserved word

Side by Side Diff: src/scanner.cc

Issue 1429983002: [es6] early error when Identifier is an escaped reserved word (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: rebase Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/scanner.h"	7 #include "src/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

(...skipping 1159 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1170 KEYWORD("var", Token::VAR) \	1170 KEYWORD("var", Token::VAR) \

1171 KEYWORD("void", Token::VOID) \	1171 KEYWORD("void", Token::VOID) \

1172 KEYWORD_GROUP('w') \	1172 KEYWORD_GROUP('w') \

1173 KEYWORD("while", Token::WHILE) \	1173 KEYWORD("while", Token::WHILE) \

1174 KEYWORD("with", Token::WITH) \	1174 KEYWORD("with", Token::WITH) \

1175 KEYWORD_GROUP('y') \	1175 KEYWORD_GROUP('y') \

1176 KEYWORD("yield", Token::YIELD)	1176 KEYWORD("yield", Token::YIELD)

1177	1177

1178	1178

1179 static Token::Value KeywordOrIdentifierToken(const uint8_t* input,	1179 static Token::Value KeywordOrIdentifierToken(const uint8_t* input,

1180 int input_length) {	1180 int input_length, bool escaped) {

1181 DCHECK(input_length >= 1);	1181 DCHECK(input_length >= 1);

1182 const int kMinLength = 2;	1182 const int kMinLength = 2;

1183 const int kMaxLength = 10;	1183 const int kMaxLength = 10;

1184 if (input_length < kMinLength \|\| input_length > kMaxLength) {	1184 if (input_length < kMinLength \|\| input_length > kMaxLength) {

1185 return Token::IDENTIFIER;	1185 return Token::IDENTIFIER;

1186 }	1186 }

1187 switch (input[0]) {	1187 switch (input[0]) {

1188 default:	1188 default:

1189 #define KEYWORD_GROUP_CASE(ch) \	1189 #define KEYWORD_GROUP_CASE(ch) \

1190 break; \	1190 break; \

1191 case ch:	1191 case ch:

1192 #define KEYWORD(keyword, token) \	1192 #define KEYWORD(keyword, token) \

1193 { \	1193 { \

1194 /* 'keyword' is a char array, so sizeof(keyword) is */ \	1194 /* 'keyword' is a char array, so sizeof(keyword) is */ \

1195 /* strlen(keyword) plus 1 for the NUL char. */ \	1195 /* strlen(keyword) plus 1 for the NUL char. */ \

1196 const int keyword_length = sizeof(keyword) - 1; \	1196 const int keyword_length = sizeof(keyword) - 1; \

1197 STATIC_ASSERT(keyword_length >= kMinLength); \	1197 STATIC_ASSERT(keyword_length >= kMinLength); \

1198 STATIC_ASSERT(keyword_length <= kMaxLength); \	1198 STATIC_ASSERT(keyword_length <= kMaxLength); \

1199 if (input_length == keyword_length && \	1199 if (input_length == keyword_length && input[1] == keyword[1] && \

1200 input[1] == keyword[1] && \	1200 (keyword_length <= 2 \|\| input[2] == keyword[2]) && \

1201 (keyword_length <= 2 \|\| input[2] == keyword[2]) && \	1201 (keyword_length <= 3 \|\| input[3] == keyword[3]) && \

1202 (keyword_length <= 3 \|\| input[3] == keyword[3]) && \	1202 (keyword_length <= 4 \|\| input[4] == keyword[4]) && \

1203 (keyword_length <= 4 \|\| input[4] == keyword[4]) && \	1203 (keyword_length <= 5 \|\| input[5] == keyword[5]) && \

1204 (keyword_length <= 5 \|\| input[5] == keyword[5]) && \	1204 (keyword_length <= 6 \|\| input[6] == keyword[6]) && \

1205 (keyword_length <= 6 \|\| input[6] == keyword[6]) && \	1205 (keyword_length <= 7 \|\| input[7] == keyword[7]) && \

1206 (keyword_length <= 7 \|\| input[7] == keyword[7]) && \	1206 (keyword_length <= 8 \|\| input[8] == keyword[8]) && \

1207 (keyword_length <= 8 \|\| input[8] == keyword[8]) && \	1207 (keyword_length <= 9 \|\| input[9] == keyword[9])) { \

1208 (keyword_length <= 9 \|\| input[9] == keyword[9])) { \	1208 if (escaped && token == Token::FUTURE_STRICT_RESERVED_WORD) { \
	rossberg 2015/11/06 13:31:05 Nit: Can we regroup this to if (escaped) { retu Nit: Can we regroup this to if (escaped) { return token == Token::FUTURE_STRICT_RESERVED_WORD ? Token::ESCAPED_STRICT_RESERVED_WORD : Token::ESCAPED_KEYWORD; } return token; caitp (gmail) 2015/11/06 19:08:06 Done. Show quoted text On 2015/11/06 13:31:05, rossberg wrote: > Nit: Can we regroup this to > > if (escaped) { > return token == Token::FUTURE_STRICT_RESERVED_WORD > ? Token::ESCAPED_STRICT_RESERVED_WORD : Token::ESCAPED_KEYWORD; > } > return token; Done.
1209 return token; \	1209 return Token::ESCAPED_STRICT_RESERVED_WORD; \

1210 } \	1210 } \

1211 }	1211 return escaped ? Token::ESCAPED_KEYWORD : token; \

	1212 } \

	1213 }

1212 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)	1214 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)

1213 }	1215 }

1214 return Token::IDENTIFIER;	1216 return Token::IDENTIFIER;

1215 }	1217 }

1216	1218

1217	1219

1218 bool Scanner::IdentifierIsFutureStrictReserved(	1220 bool Scanner::IdentifierIsFutureStrictReserved(

1219 const AstRawString* string) const {	1221 const AstRawString* string) const {

1220 // Keywords are always 1-byte strings.	1222 // Keywords are always 1-byte strings.

1221 if (!string->is_one_byte()) return false;	1223 if (!string->is_one_byte()) return false;

1222 if (string->IsOneByteEqualTo("let") \|\| string->IsOneByteEqualTo("static") \|\|	1224 if (string->IsOneByteEqualTo("let") \|\| string->IsOneByteEqualTo("static") \|\|

1223 string->IsOneByteEqualTo("yield")) {	1225 string->IsOneByteEqualTo("yield")) {

1224 return true;	1226 return true;

1225 }	1227 }

1226 return Token::FUTURE_STRICT_RESERVED_WORD ==	1228 return Token::FUTURE_STRICT_RESERVED_WORD ==

1227 KeywordOrIdentifierToken(string->raw_data(), string->length());	1229 KeywordOrIdentifierToken(string->raw_data(), string->length(), false);

1228 }	1230 }

1229	1231

1230	1232

1231 Token::Value Scanner::ScanIdentifierOrKeyword() {	1233 Token::Value Scanner::ScanIdentifierOrKeyword() {

1232 DCHECK(unicode_cache_->IsIdentifierStart(c0_));	1234 DCHECK(unicode_cache_->IsIdentifierStart(c0_));

1233 LiteralScope literal(this);	1235 LiteralScope literal(this);

1234 if (IsInRange(c0_, 'a', 'z')) {	1236 if (IsInRange(c0_, 'a', 'z')) {

1235 do {	1237 do {

1236 uc32 first_char = c0_;	1238 uc32 first_char = c0_;

1237 Advance<false, false>();	1239 Advance<false, false>();

(...skipping 12 matching lines...) Expand all Loading...
1250 AddLiteralChar(first_char);	1252 AddLiteralChar(first_char);

1251 }	1253 }

1252 if (c0_ <= kMaxAscii && c0_ != '\\') {	1254 if (c0_ <= kMaxAscii && c0_ != '\\') {

1253 literal.Complete();	1255 literal.Complete();

1254 return Token::IDENTIFIER;	1256 return Token::IDENTIFIER;

1255 }	1257 }

1256 } else if (c0_ <= kMaxAscii && c0_ != '\\') {	1258 } else if (c0_ <= kMaxAscii && c0_ != '\\') {

1257 // Only a-z+: could be a keyword or identifier.	1259 // Only a-z+: could be a keyword or identifier.

1258 literal.Complete();	1260 literal.Complete();

1259 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();	1261 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();

1260 return KeywordOrIdentifierToken(chars.start(), chars.length());	1262 return KeywordOrIdentifierToken(chars.start(), chars.length(), false);

1261 }	1263 }

1262	1264

1263 HandleLeadSurrogate();	1265 HandleLeadSurrogate();

1264 } else if (IsInRange(c0_, 'A', 'Z') \|\| c0_ == '_' \|\| c0_ == '$') {	1266 } else if (IsInRange(c0_, 'A', 'Z') \|\| c0_ == '_' \|\| c0_ == '$') {

1265 do {	1267 do {

1266 uc32 first_char = c0_;	1268 uc32 first_char = c0_;

1267 Advance<false, false>();	1269 Advance<false, false>();

1268 AddLiteralChar(first_char);	1270 AddLiteralChar(first_char);

1269 } while (IsAsciiIdentifier(c0_));	1271 } while (IsAsciiIdentifier(c0_));

1270	1272

1271 if (c0_ <= kMaxAscii && c0_ != '\\') {	1273 if (c0_ <= kMaxAscii && c0_ != '\\') {

1272 literal.Complete();	1274 literal.Complete();

1273 return Token::IDENTIFIER;	1275 return Token::IDENTIFIER;

1274 }	1276 }

1275	1277

1276 HandleLeadSurrogate();	1278 HandleLeadSurrogate();

1277 } else if (c0_ == '\\') {	1279 } else if (c0_ == '\\') {

1278 // Scan identifier start character.	1280 // Scan identifier start character.

1279 uc32 c = ScanIdentifierUnicodeEscape();	1281 uc32 c = ScanIdentifierUnicodeEscape();

1280 // Only allow legal identifier start characters.	1282 // Only allow legal identifier start characters.

1281 if (c < 0 \|\|	1283 if (c < 0 \|\|

1282 c == '\\' \|\| // No recursive escapes.	1284 c == '\\' \|\| // No recursive escapes.

1283 !unicode_cache_->IsIdentifierStart(c)) {	1285 !unicode_cache_->IsIdentifierStart(c)) {

1284 return Token::ILLEGAL;	1286 return Token::ILLEGAL;

1285 }	1287 }

1286 AddLiteralChar(c);	1288 AddLiteralChar(c);

1287 return ScanIdentifierSuffix(&literal);	1289 return ScanIdentifierSuffix(&literal, true);

1288 } else {	1290 } else {

1289 uc32 first_char = c0_;	1291 uc32 first_char = c0_;

1290 Advance();	1292 Advance();

1291 AddLiteralChar(first_char);	1293 AddLiteralChar(first_char);

1292 }	1294 }

1293	1295

1294 // Scan the rest of the identifier characters.	1296 // Scan the rest of the identifier characters.

1295 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {	1297 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {

1296 if (c0_ != '\\') {	1298 if (c0_ != '\\') {

1297 uc32 next_char = c0_;	1299 uc32 next_char = c0_;

1298 Advance();	1300 Advance();

1299 AddLiteralChar(next_char);	1301 AddLiteralChar(next_char);

1300 continue;	1302 continue;

1301 }	1303 }

1302 // Fallthrough if no longer able to complete keyword.	1304 // Fallthrough if no longer able to complete keyword.

1303 return ScanIdentifierSuffix(&literal);	1305 return ScanIdentifierSuffix(&literal, false);

1304 }	1306 }

1305	1307

1306 literal.Complete();	1308 literal.Complete();

1307	1309

1308 if (next_.literal_chars->is_one_byte()) {	1310 if (next_.literal_chars->is_one_byte()) {

1309 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();	1311 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();

1310 return KeywordOrIdentifierToken(chars.start(), chars.length());	1312 return KeywordOrIdentifierToken(chars.start(), chars.length(), false);

1311 }	1313 }

1312 return Token::IDENTIFIER;	1314 return Token::IDENTIFIER;

1313 }	1315 }

1314	1316

1315	1317

1316 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {	1318 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,

	1319 bool escaped) {

1317 // Scan the rest of the identifier characters.	1320 // Scan the rest of the identifier characters.

1318 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {	1321 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {

1319 if (c0_ == '\\') {	1322 if (c0_ == '\\') {

1320 uc32 c = ScanIdentifierUnicodeEscape();	1323 uc32 c = ScanIdentifierUnicodeEscape();

	1324 escaped = true;

1321 // Only allow legal identifier part characters.	1325 // Only allow legal identifier part characters.

1322 if (c < 0 \|\|	1326 if (c < 0 \|\|

1323 c == '\\' \|\|	1327 c == '\\' \|\|

1324 !unicode_cache_->IsIdentifierPart(c)) {	1328 !unicode_cache_->IsIdentifierPart(c)) {

1325 return Token::ILLEGAL;	1329 return Token::ILLEGAL;

1326 }	1330 }

1327 AddLiteralChar(c);	1331 AddLiteralChar(c);

1328 } else {	1332 } else {

1329 AddLiteralChar(c0_);	1333 AddLiteralChar(c0_);

1330 Advance();	1334 Advance();

1331 }	1335 }

1332 }	1336 }

1333 literal->Complete();	1337 literal->Complete();

1334	1338

	1339 if (next_.literal_chars->is_one_byte() && escaped) {
	rossberg 2015/11/06 13:31:05 Nit: put the escaped first, since it is the cheape Nit: put the escaped first, since it is the cheaper condition. caitp (gmail) 2015/11/06 19:08:06 Done. Show quoted text On 2015/11/06 13:31:05, rossberg wrote: > Nit: put the escaped first, since it is the cheaper condition. Done.
	1340 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();

	1341 return KeywordOrIdentifierToken(chars.start(), chars.length(), true);

	1342 }

1335 return Token::IDENTIFIER;	1343 return Token::IDENTIFIER;

1336 }	1344 }

1337	1345

1338	1346

1339 bool Scanner::ScanRegExpPattern(bool seen_equal) {	1347 bool Scanner::ScanRegExpPattern(bool seen_equal) {

1340 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags	1348 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags

1341 bool in_character_class = false;	1349 bool in_character_class = false;

1342	1350

1343 // Previous token is either '/' or '/=', in the second case, the	1351 // Previous token is either '/' or '/=', in the second case, the

1344 // pattern starts at =.	1352 // pattern starts at =.

(...skipping 288 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1633 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1641 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1634 }	1642 }

1635 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1643 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1636	1644

1637 backing_store_.AddBlock(bytes);	1645 backing_store_.AddBlock(bytes);

1638 return backing_store_.EndSequence().start();	1646 return backing_store_.EndSequence().start();

1639 }	1647 }

1640	1648

1641 } // namespace internal	1649 } // namespace internal

1642 } // namespace v8	1650 } // namespace v8

OLD	NEW

« src/preparser.h ('K') | « src/scanner.h ('k') | src/token.h » ('j') | test/cctest/test-parsing.cc » ('J')