Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #include "src/scanner.h" | 7 #include "src/scanner.h" |
| 8 | 8 |
| 9 #include <stdint.h> | 9 #include <stdint.h> |
| 10 | 10 |
| (...skipping 1159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1170 KEYWORD("var", Token::VAR) \ | 1170 KEYWORD("var", Token::VAR) \ |
| 1171 KEYWORD("void", Token::VOID) \ | 1171 KEYWORD("void", Token::VOID) \ |
| 1172 KEYWORD_GROUP('w') \ | 1172 KEYWORD_GROUP('w') \ |
| 1173 KEYWORD("while", Token::WHILE) \ | 1173 KEYWORD("while", Token::WHILE) \ |
| 1174 KEYWORD("with", Token::WITH) \ | 1174 KEYWORD("with", Token::WITH) \ |
| 1175 KEYWORD_GROUP('y') \ | 1175 KEYWORD_GROUP('y') \ |
| 1176 KEYWORD("yield", Token::YIELD) | 1176 KEYWORD("yield", Token::YIELD) |
| 1177 | 1177 |
| 1178 | 1178 |
| 1179 static Token::Value KeywordOrIdentifierToken(const uint8_t* input, | 1179 static Token::Value KeywordOrIdentifierToken(const uint8_t* input, |
| 1180 int input_length) { | 1180 int input_length, bool escaped) { |
| 1181 DCHECK(input_length >= 1); | 1181 DCHECK(input_length >= 1); |
| 1182 const int kMinLength = 2; | 1182 const int kMinLength = 2; |
| 1183 const int kMaxLength = 10; | 1183 const int kMaxLength = 10; |
| 1184 if (input_length < kMinLength || input_length > kMaxLength) { | 1184 if (input_length < kMinLength || input_length > kMaxLength) { |
| 1185 return Token::IDENTIFIER; | 1185 return Token::IDENTIFIER; |
| 1186 } | 1186 } |
| 1187 switch (input[0]) { | 1187 switch (input[0]) { |
| 1188 default: | 1188 default: |
| 1189 #define KEYWORD_GROUP_CASE(ch) \ | 1189 #define KEYWORD_GROUP_CASE(ch) \ |
| 1190 break; \ | 1190 break; \ |
| 1191 case ch: | 1191 case ch: |
| 1192 #define KEYWORD(keyword, token) \ | 1192 #define KEYWORD(keyword, token) \ |
| 1193 { \ | 1193 { \ |
| 1194 /* 'keyword' is a char array, so sizeof(keyword) is */ \ | 1194 /* 'keyword' is a char array, so sizeof(keyword) is */ \ |
| 1195 /* strlen(keyword) plus 1 for the NUL char. */ \ | 1195 /* strlen(keyword) plus 1 for the NUL char. */ \ |
| 1196 const int keyword_length = sizeof(keyword) - 1; \ | 1196 const int keyword_length = sizeof(keyword) - 1; \ |
| 1197 STATIC_ASSERT(keyword_length >= kMinLength); \ | 1197 STATIC_ASSERT(keyword_length >= kMinLength); \ |
| 1198 STATIC_ASSERT(keyword_length <= kMaxLength); \ | 1198 STATIC_ASSERT(keyword_length <= kMaxLength); \ |
| 1199 if (input_length == keyword_length && \ | 1199 if (input_length == keyword_length && input[1] == keyword[1] && \ |
| 1200 input[1] == keyword[1] && \ | 1200 (keyword_length <= 2 || input[2] == keyword[2]) && \ |
| 1201 (keyword_length <= 2 || input[2] == keyword[2]) && \ | 1201 (keyword_length <= 3 || input[3] == keyword[3]) && \ |
| 1202 (keyword_length <= 3 || input[3] == keyword[3]) && \ | 1202 (keyword_length <= 4 || input[4] == keyword[4]) && \ |
| 1203 (keyword_length <= 4 || input[4] == keyword[4]) && \ | 1203 (keyword_length <= 5 || input[5] == keyword[5]) && \ |
| 1204 (keyword_length <= 5 || input[5] == keyword[5]) && \ | 1204 (keyword_length <= 6 || input[6] == keyword[6]) && \ |
| 1205 (keyword_length <= 6 || input[6] == keyword[6]) && \ | 1205 (keyword_length <= 7 || input[7] == keyword[7]) && \ |
| 1206 (keyword_length <= 7 || input[7] == keyword[7]) && \ | 1206 (keyword_length <= 8 || input[8] == keyword[8]) && \ |
| 1207 (keyword_length <= 8 || input[8] == keyword[8]) && \ | 1207 (keyword_length <= 9 || input[9] == keyword[9])) { \ |
| 1208 (keyword_length <= 9 || input[9] == keyword[9])) { \ | 1208 if (escaped && token == Token::FUTURE_STRICT_RESERVED_WORD) { \ |
| 1209 return token; \ | 1209 return Token::ESCAPED_STRICT_RESERVED_WORD; \ |
| 1210 } \ | 1210 } \ |
| 1211 } | 1211 return escaped ? Token::ESCAPED_KEYWORD : token; \ |
| 1212 } \ | |
| 1213 } | |
| 1212 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) | 1214 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) |
| 1213 } | 1215 } |
| 1214 return Token::IDENTIFIER; | 1216 return Token::IDENTIFIER; |
| 1215 } | 1217 } |
| 1216 | 1218 |
| 1217 | 1219 |
| 1218 bool Scanner::IdentifierIsFutureStrictReserved( | 1220 bool Scanner::IdentifierIsFutureStrictReserved( |
| 1219 const AstRawString* string) const { | 1221 const AstRawString* string) const { |
| 1220 // Keywords are always 1-byte strings. | 1222 // Keywords are always 1-byte strings. |
| 1221 if (!string->is_one_byte()) return false; | 1223 if (!string->is_one_byte()) return false; |
| 1222 if (string->IsOneByteEqualTo("let") || string->IsOneByteEqualTo("static") || | 1224 if (string->IsOneByteEqualTo("let") || string->IsOneByteEqualTo("static") || |
| 1223 string->IsOneByteEqualTo("yield")) { | 1225 string->IsOneByteEqualTo("yield")) { |
| 1224 return true; | 1226 return true; |
| 1225 } | 1227 } |
| 1226 return Token::FUTURE_STRICT_RESERVED_WORD == | 1228 return Token::FUTURE_STRICT_RESERVED_WORD == |
| 1227 KeywordOrIdentifierToken(string->raw_data(), string->length()); | 1229 KeywordOrIdentifierToken(string->raw_data(), string->length(), false); |
| 1228 } | 1230 } |
| 1229 | 1231 |
| 1230 | 1232 |
| 1231 Token::Value Scanner::ScanIdentifierOrKeyword() { | 1233 Token::Value Scanner::ScanIdentifierOrKeyword() { |
| 1232 DCHECK(unicode_cache_->IsIdentifierStart(c0_)); | 1234 DCHECK(unicode_cache_->IsIdentifierStart(c0_)); |
| 1233 LiteralScope literal(this); | 1235 LiteralScope literal(this); |
| 1236 bool escaped = false; | |
|
adamk
2015/11/04 23:42:02
I think this mutable local actually makes this fun
caitp (gmail)
2015/11/04 23:54:16
Re-reading the function, I agree. I thought there
| |
| 1234 if (IsInRange(c0_, 'a', 'z')) { | 1237 if (IsInRange(c0_, 'a', 'z')) { |
| 1235 do { | 1238 do { |
| 1236 uc32 first_char = c0_; | 1239 uc32 first_char = c0_; |
| 1237 Advance<false, false>(); | 1240 Advance<false, false>(); |
| 1238 AddLiteralChar(first_char); | 1241 AddLiteralChar(first_char); |
| 1239 } while (IsInRange(c0_, 'a', 'z')); | 1242 } while (IsInRange(c0_, 'a', 'z')); |
| 1240 | 1243 |
| 1241 if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '_' || | 1244 if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '_' || |
| 1242 c0_ == '$') { | 1245 c0_ == '$') { |
| 1243 // Identifier starting with lowercase. | 1246 // Identifier starting with lowercase. |
| 1244 uc32 first_char = c0_; | 1247 uc32 first_char = c0_; |
| 1245 Advance<false, false>(); | 1248 Advance<false, false>(); |
| 1246 AddLiteralChar(first_char); | 1249 AddLiteralChar(first_char); |
| 1247 while (IsAsciiIdentifier(c0_)) { | 1250 while (IsAsciiIdentifier(c0_)) { |
| 1248 uc32 first_char = c0_; | 1251 uc32 first_char = c0_; |
| 1249 Advance<false, false>(); | 1252 Advance<false, false>(); |
| 1250 AddLiteralChar(first_char); | 1253 AddLiteralChar(first_char); |
| 1251 } | 1254 } |
| 1252 if (c0_ <= kMaxAscii && c0_ != '\\') { | 1255 if (c0_ <= kMaxAscii && c0_ != '\\') { |
| 1253 literal.Complete(); | 1256 literal.Complete(); |
| 1254 return Token::IDENTIFIER; | 1257 return Token::IDENTIFIER; |
| 1255 } | 1258 } |
| 1256 } else if (c0_ <= kMaxAscii && c0_ != '\\') { | 1259 } else if (c0_ <= kMaxAscii && c0_ != '\\') { |
| 1257 // Only a-z+: could be a keyword or identifier. | 1260 // Only a-z+: could be a keyword or identifier. |
| 1258 literal.Complete(); | 1261 literal.Complete(); |
| 1259 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); | 1262 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); |
| 1260 return KeywordOrIdentifierToken(chars.start(), chars.length()); | 1263 return KeywordOrIdentifierToken(chars.start(), chars.length(), escaped); |
|
adamk
2015/11/04 23:42:02
You could simply pass false here
| |
| 1261 } | 1264 } |
| 1262 | 1265 |
| 1263 HandleLeadSurrogate(); | 1266 HandleLeadSurrogate(); |
| 1264 } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '_' || c0_ == '$') { | 1267 } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '_' || c0_ == '$') { |
| 1265 do { | 1268 do { |
| 1266 uc32 first_char = c0_; | 1269 uc32 first_char = c0_; |
| 1267 Advance<false, false>(); | 1270 Advance<false, false>(); |
| 1268 AddLiteralChar(first_char); | 1271 AddLiteralChar(first_char); |
| 1269 } while (IsAsciiIdentifier(c0_)); | 1272 } while (IsAsciiIdentifier(c0_)); |
| 1270 | 1273 |
| 1271 if (c0_ <= kMaxAscii && c0_ != '\\') { | 1274 if (c0_ <= kMaxAscii && c0_ != '\\') { |
| 1272 literal.Complete(); | 1275 literal.Complete(); |
| 1273 return Token::IDENTIFIER; | 1276 return Token::IDENTIFIER; |
| 1274 } | 1277 } |
| 1275 | 1278 |
| 1276 HandleLeadSurrogate(); | 1279 HandleLeadSurrogate(); |
| 1277 } else if (c0_ == '\\') { | 1280 } else if (c0_ == '\\') { |
| 1278 // Scan identifier start character. | 1281 // Scan identifier start character. |
| 1279 uc32 c = ScanIdentifierUnicodeEscape(); | 1282 uc32 c = ScanIdentifierUnicodeEscape(); |
| 1283 escaped = true; | |
| 1280 // Only allow legal identifier start characters. | 1284 // Only allow legal identifier start characters. |
| 1281 if (c < 0 || | 1285 if (c < 0 || |
| 1282 c == '\\' || // No recursive escapes. | 1286 c == '\\' || // No recursive escapes. |
| 1283 !unicode_cache_->IsIdentifierStart(c)) { | 1287 !unicode_cache_->IsIdentifierStart(c)) { |
| 1284 return Token::ILLEGAL; | 1288 return Token::ILLEGAL; |
| 1285 } | 1289 } |
| 1286 AddLiteralChar(c); | 1290 AddLiteralChar(c); |
| 1287 return ScanIdentifierSuffix(&literal); | 1291 return ScanIdentifierSuffix(&literal, escaped); |
|
adamk
2015/11/04 23:42:02
and true here
| |
| 1288 } else { | 1292 } else { |
| 1289 uc32 first_char = c0_; | 1293 uc32 first_char = c0_; |
| 1290 Advance(); | 1294 Advance(); |
| 1291 AddLiteralChar(first_char); | 1295 AddLiteralChar(first_char); |
| 1292 } | 1296 } |
| 1293 | 1297 |
| 1294 // Scan the rest of the identifier characters. | 1298 // Scan the rest of the identifier characters. |
| 1295 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { | 1299 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
| 1296 if (c0_ != '\\') { | 1300 if (c0_ != '\\') { |
| 1297 uc32 next_char = c0_; | 1301 uc32 next_char = c0_; |
| 1298 Advance(); | 1302 Advance(); |
| 1299 AddLiteralChar(next_char); | 1303 AddLiteralChar(next_char); |
| 1300 continue; | 1304 continue; |
| 1301 } | 1305 } |
| 1302 // Fallthrough if no longer able to complete keyword. | 1306 // Fallthrough if no longer able to complete keyword. |
| 1303 return ScanIdentifierSuffix(&literal); | 1307 return ScanIdentifierSuffix(&literal, escaped); |
|
adamk
2015/11/04 23:42:02
and false here
| |
| 1304 } | 1308 } |
| 1305 | 1309 |
| 1306 literal.Complete(); | 1310 literal.Complete(); |
| 1307 | 1311 |
| 1308 if (next_.literal_chars->is_one_byte()) { | 1312 if (next_.literal_chars->is_one_byte()) { |
| 1309 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); | 1313 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); |
| 1310 return KeywordOrIdentifierToken(chars.start(), chars.length()); | 1314 return KeywordOrIdentifierToken(chars.start(), chars.length(), escaped); |
|
adamk
2015/11/04 23:42:02
and here too
| |
| 1311 } | 1315 } |
| 1312 return Token::IDENTIFIER; | 1316 return Token::IDENTIFIER; |
| 1313 } | 1317 } |
| 1314 | 1318 |
| 1315 | 1319 |
| 1316 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { | 1320 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal, |
| 1321 bool escaped) { | |
| 1317 // Scan the rest of the identifier characters. | 1322 // Scan the rest of the identifier characters. |
| 1318 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { | 1323 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
| 1319 if (c0_ == '\\') { | 1324 if (c0_ == '\\') { |
| 1320 uc32 c = ScanIdentifierUnicodeEscape(); | 1325 uc32 c = ScanIdentifierUnicodeEscape(); |
| 1326 escaped = true; | |
| 1321 // Only allow legal identifier part characters. | 1327 // Only allow legal identifier part characters. |
| 1322 if (c < 0 || | 1328 if (c < 0 || |
| 1323 c == '\\' || | 1329 c == '\\' || |
| 1324 !unicode_cache_->IsIdentifierPart(c)) { | 1330 !unicode_cache_->IsIdentifierPart(c)) { |
| 1325 return Token::ILLEGAL; | 1331 return Token::ILLEGAL; |
| 1326 } | 1332 } |
| 1327 AddLiteralChar(c); | 1333 AddLiteralChar(c); |
| 1328 } else { | 1334 } else { |
| 1329 AddLiteralChar(c0_); | 1335 AddLiteralChar(c0_); |
| 1330 Advance(); | 1336 Advance(); |
| 1331 } | 1337 } |
| 1332 } | 1338 } |
| 1333 literal->Complete(); | 1339 literal->Complete(); |
| 1334 | 1340 |
| 1341 if (next_.literal_chars->is_one_byte()) { | |
| 1342 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); | |
| 1343 return KeywordOrIdentifierToken(chars.start(), chars.length(), escaped); | |
| 1344 } | |
| 1335 return Token::IDENTIFIER; | 1345 return Token::IDENTIFIER; |
| 1336 } | 1346 } |
| 1337 | 1347 |
| 1338 | 1348 |
| 1339 bool Scanner::ScanRegExpPattern(bool seen_equal) { | 1349 bool Scanner::ScanRegExpPattern(bool seen_equal) { |
| 1340 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 1350 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
| 1341 bool in_character_class = false; | 1351 bool in_character_class = false; |
| 1342 | 1352 |
| 1343 // Previous token is either '/' or '/=', in the second case, the | 1353 // Previous token is either '/' or '/=', in the second case, the |
| 1344 // pattern starts at =. | 1354 // pattern starts at =. |
| (...skipping 288 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1633 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1643 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
| 1634 } | 1644 } |
| 1635 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1645 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
| 1636 | 1646 |
| 1637 backing_store_.AddBlock(bytes); | 1647 backing_store_.AddBlock(bytes); |
| 1638 return backing_store_.EndSequence().start(); | 1648 return backing_store_.EndSequence().start(); |
| 1639 } | 1649 } |
| 1640 | 1650 |
| 1641 } // namespace internal | 1651 } // namespace internal |
| 1642 } // namespace v8 | 1652 } // namespace v8 |
| OLD | NEW |