Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(685)

Side by Side Diff: src/scanner.cc

Issue 1429983002: [es6] early error when Identifier is an escaped reserved word (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Cosmetic fixup 1 Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include "src/scanner.h" 7 #include "src/scanner.h"
8 8
9 #include <stdint.h> 9 #include <stdint.h>
10 10
(...skipping 1159 matching lines...) Expand 10 before | Expand all | Expand 10 after
1170 KEYWORD("var", Token::VAR) \ 1170 KEYWORD("var", Token::VAR) \
1171 KEYWORD("void", Token::VOID) \ 1171 KEYWORD("void", Token::VOID) \
1172 KEYWORD_GROUP('w') \ 1172 KEYWORD_GROUP('w') \
1173 KEYWORD("while", Token::WHILE) \ 1173 KEYWORD("while", Token::WHILE) \
1174 KEYWORD("with", Token::WITH) \ 1174 KEYWORD("with", Token::WITH) \
1175 KEYWORD_GROUP('y') \ 1175 KEYWORD_GROUP('y') \
1176 KEYWORD("yield", Token::YIELD) 1176 KEYWORD("yield", Token::YIELD)
1177 1177
1178 1178
1179 static Token::Value KeywordOrIdentifierToken(const uint8_t* input, 1179 static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
1180 int input_length) { 1180 int input_length, bool escaped) {
1181 DCHECK(input_length >= 1); 1181 DCHECK(input_length >= 1);
1182 const int kMinLength = 2; 1182 const int kMinLength = 2;
1183 const int kMaxLength = 10; 1183 const int kMaxLength = 10;
1184 if (input_length < kMinLength || input_length > kMaxLength) { 1184 if (input_length < kMinLength || input_length > kMaxLength) {
1185 return Token::IDENTIFIER; 1185 return Token::IDENTIFIER;
1186 } 1186 }
1187 switch (input[0]) { 1187 switch (input[0]) {
1188 default: 1188 default:
1189 #define KEYWORD_GROUP_CASE(ch) \ 1189 #define KEYWORD_GROUP_CASE(ch) \
1190 break; \ 1190 break; \
1191 case ch: 1191 case ch:
1192 #define KEYWORD(keyword, token) \ 1192 #define KEYWORD(keyword, token) \
1193 { \ 1193 { \
1194 /* 'keyword' is a char array, so sizeof(keyword) is */ \ 1194 /* 'keyword' is a char array, so sizeof(keyword) is */ \
1195 /* strlen(keyword) plus 1 for the NUL char. */ \ 1195 /* strlen(keyword) plus 1 for the NUL char. */ \
1196 const int keyword_length = sizeof(keyword) - 1; \ 1196 const int keyword_length = sizeof(keyword) - 1; \
1197 STATIC_ASSERT(keyword_length >= kMinLength); \ 1197 STATIC_ASSERT(keyword_length >= kMinLength); \
1198 STATIC_ASSERT(keyword_length <= kMaxLength); \ 1198 STATIC_ASSERT(keyword_length <= kMaxLength); \
1199 if (input_length == keyword_length && \ 1199 if (input_length == keyword_length && input[1] == keyword[1] && \
1200 input[1] == keyword[1] && \ 1200 (keyword_length <= 2 || input[2] == keyword[2]) && \
1201 (keyword_length <= 2 || input[2] == keyword[2]) && \ 1201 (keyword_length <= 3 || input[3] == keyword[3]) && \
1202 (keyword_length <= 3 || input[3] == keyword[3]) && \ 1202 (keyword_length <= 4 || input[4] == keyword[4]) && \
1203 (keyword_length <= 4 || input[4] == keyword[4]) && \ 1203 (keyword_length <= 5 || input[5] == keyword[5]) && \
1204 (keyword_length <= 5 || input[5] == keyword[5]) && \ 1204 (keyword_length <= 6 || input[6] == keyword[6]) && \
1205 (keyword_length <= 6 || input[6] == keyword[6]) && \ 1205 (keyword_length <= 7 || input[7] == keyword[7]) && \
1206 (keyword_length <= 7 || input[7] == keyword[7]) && \ 1206 (keyword_length <= 8 || input[8] == keyword[8]) && \
1207 (keyword_length <= 8 || input[8] == keyword[8]) && \ 1207 (keyword_length <= 9 || input[9] == keyword[9])) { \
1208 (keyword_length <= 9 || input[9] == keyword[9])) { \ 1208 if (escaped && token == Token::FUTURE_STRICT_RESERVED_WORD) { \
1209 return token; \ 1209 return Token::ESCAPED_STRICT_RESERVED_WORD; \
1210 } \ 1210 } \
1211 } 1211 return escaped ? Token::ESCAPED_KEYWORD : token; \
1212 } \
1213 }
1212 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) 1214 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
1213 } 1215 }
1214 return Token::IDENTIFIER; 1216 return Token::IDENTIFIER;
1215 } 1217 }
1216 1218
1217 1219
1218 bool Scanner::IdentifierIsFutureStrictReserved( 1220 bool Scanner::IdentifierIsFutureStrictReserved(
1219 const AstRawString* string) const { 1221 const AstRawString* string) const {
1220 // Keywords are always 1-byte strings. 1222 // Keywords are always 1-byte strings.
1221 if (!string->is_one_byte()) return false; 1223 if (!string->is_one_byte()) return false;
1222 if (string->IsOneByteEqualTo("let") || string->IsOneByteEqualTo("static") || 1224 if (string->IsOneByteEqualTo("let") || string->IsOneByteEqualTo("static") ||
1223 string->IsOneByteEqualTo("yield")) { 1225 string->IsOneByteEqualTo("yield")) {
1224 return true; 1226 return true;
1225 } 1227 }
1226 return Token::FUTURE_STRICT_RESERVED_WORD == 1228 return Token::FUTURE_STRICT_RESERVED_WORD ==
1227 KeywordOrIdentifierToken(string->raw_data(), string->length()); 1229 KeywordOrIdentifierToken(string->raw_data(), string->length(), false);
1228 } 1230 }
1229 1231
1230 1232
1231 Token::Value Scanner::ScanIdentifierOrKeyword() { 1233 Token::Value Scanner::ScanIdentifierOrKeyword() {
1232 DCHECK(unicode_cache_->IsIdentifierStart(c0_)); 1234 DCHECK(unicode_cache_->IsIdentifierStart(c0_));
1233 LiteralScope literal(this); 1235 LiteralScope literal(this);
1236 bool escaped = false;
adamk 2015/11/04 23:42:02 I think this mutable local actually makes this fun
caitp (gmail) 2015/11/04 23:54:16 Re-reading the function, I agree. I thought there
1234 if (IsInRange(c0_, 'a', 'z')) { 1237 if (IsInRange(c0_, 'a', 'z')) {
1235 do { 1238 do {
1236 uc32 first_char = c0_; 1239 uc32 first_char = c0_;
1237 Advance<false, false>(); 1240 Advance<false, false>();
1238 AddLiteralChar(first_char); 1241 AddLiteralChar(first_char);
1239 } while (IsInRange(c0_, 'a', 'z')); 1242 } while (IsInRange(c0_, 'a', 'z'));
1240 1243
1241 if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '_' || 1244 if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '_' ||
1242 c0_ == '$') { 1245 c0_ == '$') {
1243 // Identifier starting with lowercase. 1246 // Identifier starting with lowercase.
1244 uc32 first_char = c0_; 1247 uc32 first_char = c0_;
1245 Advance<false, false>(); 1248 Advance<false, false>();
1246 AddLiteralChar(first_char); 1249 AddLiteralChar(first_char);
1247 while (IsAsciiIdentifier(c0_)) { 1250 while (IsAsciiIdentifier(c0_)) {
1248 uc32 first_char = c0_; 1251 uc32 first_char = c0_;
1249 Advance<false, false>(); 1252 Advance<false, false>();
1250 AddLiteralChar(first_char); 1253 AddLiteralChar(first_char);
1251 } 1254 }
1252 if (c0_ <= kMaxAscii && c0_ != '\\') { 1255 if (c0_ <= kMaxAscii && c0_ != '\\') {
1253 literal.Complete(); 1256 literal.Complete();
1254 return Token::IDENTIFIER; 1257 return Token::IDENTIFIER;
1255 } 1258 }
1256 } else if (c0_ <= kMaxAscii && c0_ != '\\') { 1259 } else if (c0_ <= kMaxAscii && c0_ != '\\') {
1257 // Only a-z+: could be a keyword or identifier. 1260 // Only a-z+: could be a keyword or identifier.
1258 literal.Complete(); 1261 literal.Complete();
1259 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); 1262 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1260 return KeywordOrIdentifierToken(chars.start(), chars.length()); 1263 return KeywordOrIdentifierToken(chars.start(), chars.length(), escaped);
adamk 2015/11/04 23:42:02 You could simply pass false here
1261 } 1264 }
1262 1265
1263 HandleLeadSurrogate(); 1266 HandleLeadSurrogate();
1264 } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '_' || c0_ == '$') { 1267 } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '_' || c0_ == '$') {
1265 do { 1268 do {
1266 uc32 first_char = c0_; 1269 uc32 first_char = c0_;
1267 Advance<false, false>(); 1270 Advance<false, false>();
1268 AddLiteralChar(first_char); 1271 AddLiteralChar(first_char);
1269 } while (IsAsciiIdentifier(c0_)); 1272 } while (IsAsciiIdentifier(c0_));
1270 1273
1271 if (c0_ <= kMaxAscii && c0_ != '\\') { 1274 if (c0_ <= kMaxAscii && c0_ != '\\') {
1272 literal.Complete(); 1275 literal.Complete();
1273 return Token::IDENTIFIER; 1276 return Token::IDENTIFIER;
1274 } 1277 }
1275 1278
1276 HandleLeadSurrogate(); 1279 HandleLeadSurrogate();
1277 } else if (c0_ == '\\') { 1280 } else if (c0_ == '\\') {
1278 // Scan identifier start character. 1281 // Scan identifier start character.
1279 uc32 c = ScanIdentifierUnicodeEscape(); 1282 uc32 c = ScanIdentifierUnicodeEscape();
1283 escaped = true;
1280 // Only allow legal identifier start characters. 1284 // Only allow legal identifier start characters.
1281 if (c < 0 || 1285 if (c < 0 ||
1282 c == '\\' || // No recursive escapes. 1286 c == '\\' || // No recursive escapes.
1283 !unicode_cache_->IsIdentifierStart(c)) { 1287 !unicode_cache_->IsIdentifierStart(c)) {
1284 return Token::ILLEGAL; 1288 return Token::ILLEGAL;
1285 } 1289 }
1286 AddLiteralChar(c); 1290 AddLiteralChar(c);
1287 return ScanIdentifierSuffix(&literal); 1291 return ScanIdentifierSuffix(&literal, escaped);
adamk 2015/11/04 23:42:02 and true here
1288 } else { 1292 } else {
1289 uc32 first_char = c0_; 1293 uc32 first_char = c0_;
1290 Advance(); 1294 Advance();
1291 AddLiteralChar(first_char); 1295 AddLiteralChar(first_char);
1292 } 1296 }
1293 1297
1294 // Scan the rest of the identifier characters. 1298 // Scan the rest of the identifier characters.
1295 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { 1299 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {
1296 if (c0_ != '\\') { 1300 if (c0_ != '\\') {
1297 uc32 next_char = c0_; 1301 uc32 next_char = c0_;
1298 Advance(); 1302 Advance();
1299 AddLiteralChar(next_char); 1303 AddLiteralChar(next_char);
1300 continue; 1304 continue;
1301 } 1305 }
1302 // Fallthrough if no longer able to complete keyword. 1306 // Fallthrough if no longer able to complete keyword.
1303 return ScanIdentifierSuffix(&literal); 1307 return ScanIdentifierSuffix(&literal, escaped);
adamk 2015/11/04 23:42:02 and false here
1304 } 1308 }
1305 1309
1306 literal.Complete(); 1310 literal.Complete();
1307 1311
1308 if (next_.literal_chars->is_one_byte()) { 1312 if (next_.literal_chars->is_one_byte()) {
1309 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); 1313 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1310 return KeywordOrIdentifierToken(chars.start(), chars.length()); 1314 return KeywordOrIdentifierToken(chars.start(), chars.length(), escaped);
adamk 2015/11/04 23:42:02 and here too
1311 } 1315 }
1312 return Token::IDENTIFIER; 1316 return Token::IDENTIFIER;
1313 } 1317 }
1314 1318
1315 1319
1316 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { 1320 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,
1321 bool escaped) {
1317 // Scan the rest of the identifier characters. 1322 // Scan the rest of the identifier characters.
1318 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { 1323 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {
1319 if (c0_ == '\\') { 1324 if (c0_ == '\\') {
1320 uc32 c = ScanIdentifierUnicodeEscape(); 1325 uc32 c = ScanIdentifierUnicodeEscape();
1326 escaped = true;
1321 // Only allow legal identifier part characters. 1327 // Only allow legal identifier part characters.
1322 if (c < 0 || 1328 if (c < 0 ||
1323 c == '\\' || 1329 c == '\\' ||
1324 !unicode_cache_->IsIdentifierPart(c)) { 1330 !unicode_cache_->IsIdentifierPart(c)) {
1325 return Token::ILLEGAL; 1331 return Token::ILLEGAL;
1326 } 1332 }
1327 AddLiteralChar(c); 1333 AddLiteralChar(c);
1328 } else { 1334 } else {
1329 AddLiteralChar(c0_); 1335 AddLiteralChar(c0_);
1330 Advance(); 1336 Advance();
1331 } 1337 }
1332 } 1338 }
1333 literal->Complete(); 1339 literal->Complete();
1334 1340
1341 if (next_.literal_chars->is_one_byte()) {
1342 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1343 return KeywordOrIdentifierToken(chars.start(), chars.length(), escaped);
1344 }
1335 return Token::IDENTIFIER; 1345 return Token::IDENTIFIER;
1336 } 1346 }
1337 1347
1338 1348
1339 bool Scanner::ScanRegExpPattern(bool seen_equal) { 1349 bool Scanner::ScanRegExpPattern(bool seen_equal) {
1340 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags 1350 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
1341 bool in_character_class = false; 1351 bool in_character_class = false;
1342 1352
1343 // Previous token is either '/' or '/=', in the second case, the 1353 // Previous token is either '/' or '/=', in the second case, the
1344 // pattern starts at =. 1354 // pattern starts at =.
(...skipping 288 matching lines...) Expand 10 before | Expand all | Expand 10 after
1633 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); 1643 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
1634 } 1644 }
1635 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); 1645 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
1636 1646
1637 backing_store_.AddBlock(bytes); 1647 backing_store_.AddBlock(bytes);
1638 return backing_store_.EndSequence().start(); 1648 return backing_store_.EndSequence().start();
1639 } 1649 }
1640 1650
1641 } // namespace internal 1651 } // namespace internal
1642 } // namespace v8 1652 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698