src/scanner.cc - Issue 1429983002: [es6] early error when Identifier is an escaped reserved word

Side by Side Diff: src/scanner.cc

Issue 1429983002: [es6] early error when Identifier is an escaped reserved word (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Cosmetic fixup 1 Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/scanner.h"	7 #include "src/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

(...skipping 1159 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1170 KEYWORD("var", Token::VAR) \	1170 KEYWORD("var", Token::VAR) \

1171 KEYWORD("void", Token::VOID) \	1171 KEYWORD("void", Token::VOID) \

1172 KEYWORD_GROUP('w') \	1172 KEYWORD_GROUP('w') \

1173 KEYWORD("while", Token::WHILE) \	1173 KEYWORD("while", Token::WHILE) \

1174 KEYWORD("with", Token::WITH) \	1174 KEYWORD("with", Token::WITH) \

1175 KEYWORD_GROUP('y') \	1175 KEYWORD_GROUP('y') \

1176 KEYWORD("yield", Token::YIELD)	1176 KEYWORD("yield", Token::YIELD)

1177	1177

1178	1178

1179 static Token::Value KeywordOrIdentifierToken(const uint8_t* input,	1179 static Token::Value KeywordOrIdentifierToken(const uint8_t* input,

1180 int input_length) {	1180 int input_length, bool escaped) {

1181 DCHECK(input_length >= 1);	1181 DCHECK(input_length >= 1);

1182 const int kMinLength = 2;	1182 const int kMinLength = 2;

1183 const int kMaxLength = 10;	1183 const int kMaxLength = 10;

1184 if (input_length < kMinLength \|\| input_length > kMaxLength) {	1184 if (input_length < kMinLength \|\| input_length > kMaxLength) {

1185 return Token::IDENTIFIER;	1185 return Token::IDENTIFIER;

1186 }	1186 }

1187 switch (input[0]) {	1187 switch (input[0]) {

1188 default:	1188 default:

1189 #define KEYWORD_GROUP_CASE(ch) \	1189 #define KEYWORD_GROUP_CASE(ch) \

1190 break; \	1190 break; \

1191 case ch:	1191 case ch:

1192 #define KEYWORD(keyword, token) \	1192 #define KEYWORD(keyword, token) \

1193 { \	1193 { \

1194 /* 'keyword' is a char array, so sizeof(keyword) is */ \	1194 /* 'keyword' is a char array, so sizeof(keyword) is */ \

1195 /* strlen(keyword) plus 1 for the NUL char. */ \	1195 /* strlen(keyword) plus 1 for the NUL char. */ \

1196 const int keyword_length = sizeof(keyword) - 1; \	1196 const int keyword_length = sizeof(keyword) - 1; \

1197 STATIC_ASSERT(keyword_length >= kMinLength); \	1197 STATIC_ASSERT(keyword_length >= kMinLength); \

1198 STATIC_ASSERT(keyword_length <= kMaxLength); \	1198 STATIC_ASSERT(keyword_length <= kMaxLength); \

1199 if (input_length == keyword_length && \	1199 if (input_length == keyword_length && input[1] == keyword[1] && \

1200 input[1] == keyword[1] && \	1200 (keyword_length <= 2 \|\| input[2] == keyword[2]) && \

1201 (keyword_length <= 2 \|\| input[2] == keyword[2]) && \	1201 (keyword_length <= 3 \|\| input[3] == keyword[3]) && \

1202 (keyword_length <= 3 \|\| input[3] == keyword[3]) && \	1202 (keyword_length <= 4 \|\| input[4] == keyword[4]) && \

1203 (keyword_length <= 4 \|\| input[4] == keyword[4]) && \	1203 (keyword_length <= 5 \|\| input[5] == keyword[5]) && \

1204 (keyword_length <= 5 \|\| input[5] == keyword[5]) && \	1204 (keyword_length <= 6 \|\| input[6] == keyword[6]) && \

1205 (keyword_length <= 6 \|\| input[6] == keyword[6]) && \	1205 (keyword_length <= 7 \|\| input[7] == keyword[7]) && \

1206 (keyword_length <= 7 \|\| input[7] == keyword[7]) && \	1206 (keyword_length <= 8 \|\| input[8] == keyword[8]) && \

1207 (keyword_length <= 8 \|\| input[8] == keyword[8]) && \	1207 (keyword_length <= 9 \|\| input[9] == keyword[9])) { \

1208 (keyword_length <= 9 \|\| input[9] == keyword[9])) { \	1208 if (escaped && token == Token::FUTURE_STRICT_RESERVED_WORD) { \

1209 return token; \	1209 return Token::ESCAPED_STRICT_RESERVED_WORD; \

1210 } \	1210 } \

1211 }	1211 return escaped ? Token::ESCAPED_KEYWORD : token; \

	1212 } \

	1213 }

1212 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)	1214 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)

1213 }	1215 }

1214 return Token::IDENTIFIER;	1216 return Token::IDENTIFIER;

1215 }	1217 }

1216	1218

1217	1219

1218 bool Scanner::IdentifierIsFutureStrictReserved(	1220 bool Scanner::IdentifierIsFutureStrictReserved(

1219 const AstRawString* string) const {	1221 const AstRawString* string) const {

1220 // Keywords are always 1-byte strings.	1222 // Keywords are always 1-byte strings.

1221 if (!string->is_one_byte()) return false;	1223 if (!string->is_one_byte()) return false;

1222 if (string->IsOneByteEqualTo("let") \|\| string->IsOneByteEqualTo("static") \|\|	1224 if (string->IsOneByteEqualTo("let") \|\| string->IsOneByteEqualTo("static") \|\|

1223 string->IsOneByteEqualTo("yield")) {	1225 string->IsOneByteEqualTo("yield")) {

1224 return true;	1226 return true;

1225 }	1227 }

1226 return Token::FUTURE_STRICT_RESERVED_WORD ==	1228 return Token::FUTURE_STRICT_RESERVED_WORD ==

1227 KeywordOrIdentifierToken(string->raw_data(), string->length());	1229 KeywordOrIdentifierToken(string->raw_data(), string->length(), false);

1228 }	1230 }

1229	1231

1230	1232

1231 Token::Value Scanner::ScanIdentifierOrKeyword() {	1233 Token::Value Scanner::ScanIdentifierOrKeyword() {

1232 DCHECK(unicode_cache_->IsIdentifierStart(c0_));	1234 DCHECK(unicode_cache_->IsIdentifierStart(c0_));

1233 LiteralScope literal(this);	1235 LiteralScope literal(this);

	1236 bool escaped = false;
	adamk 2015/11/04 23:42:02 I think this mutable local actually makes this fun I think this mutable local actually makes this function harder to read, all the callsites know statically what its value is. caitp (gmail) 2015/11/04 23:54:16 Re-reading the function, I agree. I thought there Show quoted text On 2015/11/04 23:42:02, adamk wrote: > I think this mutable local actually makes this function harder to read, all the > callsites know statically what its value is. Re-reading the function, I agree. I thought there was another caller to ScanIdentifierUnicodeEscape() in this method, but nope.
1234 if (IsInRange(c0_, 'a', 'z')) {	1237 if (IsInRange(c0_, 'a', 'z')) {

1235 do {	1238 do {

1236 uc32 first_char = c0_;	1239 uc32 first_char = c0_;

1237 Advance<false, false>();	1240 Advance<false, false>();

1238 AddLiteralChar(first_char);	1241 AddLiteralChar(first_char);

1239 } while (IsInRange(c0_, 'a', 'z'));	1242 } while (IsInRange(c0_, 'a', 'z'));

1240	1243

1241 if (IsDecimalDigit(c0_) \|\| IsInRange(c0_, 'A', 'Z') \|\| c0_ == '_' \|\|	1244 if (IsDecimalDigit(c0_) \|\| IsInRange(c0_, 'A', 'Z') \|\| c0_ == '_' \|\|

1242 c0_ == '$') {	1245 c0_ == '$') {

1243 // Identifier starting with lowercase.	1246 // Identifier starting with lowercase.

1244 uc32 first_char = c0_;	1247 uc32 first_char = c0_;

1245 Advance<false, false>();	1248 Advance<false, false>();

1246 AddLiteralChar(first_char);	1249 AddLiteralChar(first_char);

1247 while (IsAsciiIdentifier(c0_)) {	1250 while (IsAsciiIdentifier(c0_)) {

1248 uc32 first_char = c0_;	1251 uc32 first_char = c0_;

1249 Advance<false, false>();	1252 Advance<false, false>();

1250 AddLiteralChar(first_char);	1253 AddLiteralChar(first_char);

1251 }	1254 }

1252 if (c0_ <= kMaxAscii && c0_ != '\\') {	1255 if (c0_ <= kMaxAscii && c0_ != '\\') {

1253 literal.Complete();	1256 literal.Complete();

1254 return Token::IDENTIFIER;	1257 return Token::IDENTIFIER;

1255 }	1258 }

1256 } else if (c0_ <= kMaxAscii && c0_ != '\\') {	1259 } else if (c0_ <= kMaxAscii && c0_ != '\\') {

1257 // Only a-z+: could be a keyword or identifier.	1260 // Only a-z+: could be a keyword or identifier.

1258 literal.Complete();	1261 literal.Complete();

1259 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();	1262 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();

1260 return KeywordOrIdentifierToken(chars.start(), chars.length());	1263 return KeywordOrIdentifierToken(chars.start(), chars.length(), escaped);
	adamk 2015/11/04 23:42:02 You could simply pass false here You could simply pass false here
1261 }	1264 }

1262	1265

1263 HandleLeadSurrogate();	1266 HandleLeadSurrogate();

1264 } else if (IsInRange(c0_, 'A', 'Z') \|\| c0_ == '_' \|\| c0_ == '$') {	1267 } else if (IsInRange(c0_, 'A', 'Z') \|\| c0_ == '_' \|\| c0_ == '$') {

1265 do {	1268 do {

1266 uc32 first_char = c0_;	1269 uc32 first_char = c0_;

1267 Advance<false, false>();	1270 Advance<false, false>();

1268 AddLiteralChar(first_char);	1271 AddLiteralChar(first_char);

1269 } while (IsAsciiIdentifier(c0_));	1272 } while (IsAsciiIdentifier(c0_));

1270	1273

1271 if (c0_ <= kMaxAscii && c0_ != '\\') {	1274 if (c0_ <= kMaxAscii && c0_ != '\\') {

1272 literal.Complete();	1275 literal.Complete();

1273 return Token::IDENTIFIER;	1276 return Token::IDENTIFIER;

1274 }	1277 }

1275	1278

1276 HandleLeadSurrogate();	1279 HandleLeadSurrogate();

1277 } else if (c0_ == '\\') {	1280 } else if (c0_ == '\\') {

1278 // Scan identifier start character.	1281 // Scan identifier start character.

1279 uc32 c = ScanIdentifierUnicodeEscape();	1282 uc32 c = ScanIdentifierUnicodeEscape();

	1283 escaped = true;

1280 // Only allow legal identifier start characters.	1284 // Only allow legal identifier start characters.

1281 if (c < 0 \|\|	1285 if (c < 0 \|\|

1282 c == '\\' \|\| // No recursive escapes.	1286 c == '\\' \|\| // No recursive escapes.

1283 !unicode_cache_->IsIdentifierStart(c)) {	1287 !unicode_cache_->IsIdentifierStart(c)) {

1284 return Token::ILLEGAL;	1288 return Token::ILLEGAL;

1285 }	1289 }

1286 AddLiteralChar(c);	1290 AddLiteralChar(c);

1287 return ScanIdentifierSuffix(&literal);	1291 return ScanIdentifierSuffix(&literal, escaped);
	adamk 2015/11/04 23:42:02 and true here and true here
1288 } else {	1292 } else {

1289 uc32 first_char = c0_;	1293 uc32 first_char = c0_;

1290 Advance();	1294 Advance();

1291 AddLiteralChar(first_char);	1295 AddLiteralChar(first_char);

1292 }	1296 }

1293	1297

1294 // Scan the rest of the identifier characters.	1298 // Scan the rest of the identifier characters.

1295 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {	1299 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {

1296 if (c0_ != '\\') {	1300 if (c0_ != '\\') {

1297 uc32 next_char = c0_;	1301 uc32 next_char = c0_;

1298 Advance();	1302 Advance();

1299 AddLiteralChar(next_char);	1303 AddLiteralChar(next_char);

1300 continue;	1304 continue;

1301 }	1305 }

1302 // Fallthrough if no longer able to complete keyword.	1306 // Fallthrough if no longer able to complete keyword.

1303 return ScanIdentifierSuffix(&literal);	1307 return ScanIdentifierSuffix(&literal, escaped);
	adamk 2015/11/04 23:42:02 and false here and false here
1304 }	1308 }

1305	1309

1306 literal.Complete();	1310 literal.Complete();

1307	1311

1308 if (next_.literal_chars->is_one_byte()) {	1312 if (next_.literal_chars->is_one_byte()) {

1309 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();	1313 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();

1310 return KeywordOrIdentifierToken(chars.start(), chars.length());	1314 return KeywordOrIdentifierToken(chars.start(), chars.length(), escaped);
	adamk 2015/11/04 23:42:02 and here too and here too
1311 }	1315 }

1312 return Token::IDENTIFIER;	1316 return Token::IDENTIFIER;

1313 }	1317 }

1314	1318

1315	1319

1316 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {	1320 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,

	1321 bool escaped) {

1317 // Scan the rest of the identifier characters.	1322 // Scan the rest of the identifier characters.

1318 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {	1323 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {

1319 if (c0_ == '\\') {	1324 if (c0_ == '\\') {

1320 uc32 c = ScanIdentifierUnicodeEscape();	1325 uc32 c = ScanIdentifierUnicodeEscape();

	1326 escaped = true;

1321 // Only allow legal identifier part characters.	1327 // Only allow legal identifier part characters.

1322 if (c < 0 \|\|	1328 if (c < 0 \|\|

1323 c == '\\' \|\|	1329 c == '\\' \|\|

1324 !unicode_cache_->IsIdentifierPart(c)) {	1330 !unicode_cache_->IsIdentifierPart(c)) {

1325 return Token::ILLEGAL;	1331 return Token::ILLEGAL;

1326 }	1332 }

1327 AddLiteralChar(c);	1333 AddLiteralChar(c);

1328 } else {	1334 } else {

1329 AddLiteralChar(c0_);	1335 AddLiteralChar(c0_);

1330 Advance();	1336 Advance();

1331 }	1337 }

1332 }	1338 }

1333 literal->Complete();	1339 literal->Complete();

1334	1340

	1341 if (next_.literal_chars->is_one_byte()) {

	1342 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();

	1343 return KeywordOrIdentifierToken(chars.start(), chars.length(), escaped);

	1344 }

1335 return Token::IDENTIFIER;	1345 return Token::IDENTIFIER;

1336 }	1346 }

1337	1347

1338	1348

1339 bool Scanner::ScanRegExpPattern(bool seen_equal) {	1349 bool Scanner::ScanRegExpPattern(bool seen_equal) {

1340 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags	1350 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags

1341 bool in_character_class = false;	1351 bool in_character_class = false;

1342	1352

1343 // Previous token is either '/' or '/=', in the second case, the	1353 // Previous token is either '/' or '/=', in the second case, the

1344 // pattern starts at =.	1354 // pattern starts at =.

(...skipping 288 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1633 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1643 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1634 }	1644 }

1635 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1645 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1636	1646

1637 backing_store_.AddBlock(bytes);	1647 backing_store_.AddBlock(bytes);

1638 return backing_store_.EndSequence().start();	1648 return backing_store_.EndSequence().start();

1639 }	1649 }

1640	1650

1641 } // namespace internal	1651 } // namespace internal

1642 } // namespace v8	1652 } // namespace v8

OLD	NEW

« src/preparser.h ('K') | « src/scanner.h ('k') | src/token.h » ('j') | test/cctest/test-parsing.cc » ('J')