OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include "src/scanner.h" | 7 #include "src/scanner.h" |
8 | 8 |
9 #include <stdint.h> | 9 #include <stdint.h> |
10 | 10 |
(...skipping 1159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1170 KEYWORD("var", Token::VAR) \ | 1170 KEYWORD("var", Token::VAR) \ |
1171 KEYWORD("void", Token::VOID) \ | 1171 KEYWORD("void", Token::VOID) \ |
1172 KEYWORD_GROUP('w') \ | 1172 KEYWORD_GROUP('w') \ |
1173 KEYWORD("while", Token::WHILE) \ | 1173 KEYWORD("while", Token::WHILE) \ |
1174 KEYWORD("with", Token::WITH) \ | 1174 KEYWORD("with", Token::WITH) \ |
1175 KEYWORD_GROUP('y') \ | 1175 KEYWORD_GROUP('y') \ |
1176 KEYWORD("yield", Token::YIELD) | 1176 KEYWORD("yield", Token::YIELD) |
1177 | 1177 |
1178 | 1178 |
1179 static Token::Value KeywordOrIdentifierToken(const uint8_t* input, | 1179 static Token::Value KeywordOrIdentifierToken(const uint8_t* input, |
1180 int input_length) { | 1180 int input_length, bool escaped) { |
1181 DCHECK(input_length >= 1); | 1181 DCHECK(input_length >= 1); |
1182 const int kMinLength = 2; | 1182 const int kMinLength = 2; |
1183 const int kMaxLength = 10; | 1183 const int kMaxLength = 10; |
1184 if (input_length < kMinLength || input_length > kMaxLength) { | 1184 if (input_length < kMinLength || input_length > kMaxLength) { |
1185 return Token::IDENTIFIER; | 1185 return Token::IDENTIFIER; |
1186 } | 1186 } |
1187 switch (input[0]) { | 1187 switch (input[0]) { |
1188 default: | 1188 default: |
1189 #define KEYWORD_GROUP_CASE(ch) \ | 1189 #define KEYWORD_GROUP_CASE(ch) \ |
1190 break; \ | 1190 break; \ |
1191 case ch: | 1191 case ch: |
1192 #define KEYWORD(keyword, token) \ | 1192 #define KEYWORD(keyword, token) \ |
1193 { \ | 1193 { \ |
1194 /* 'keyword' is a char array, so sizeof(keyword) is */ \ | 1194 /* 'keyword' is a char array, so sizeof(keyword) is */ \ |
1195 /* strlen(keyword) plus 1 for the NUL char. */ \ | 1195 /* strlen(keyword) plus 1 for the NUL char. */ \ |
1196 const int keyword_length = sizeof(keyword) - 1; \ | 1196 const int keyword_length = sizeof(keyword) - 1; \ |
1197 STATIC_ASSERT(keyword_length >= kMinLength); \ | 1197 STATIC_ASSERT(keyword_length >= kMinLength); \ |
1198 STATIC_ASSERT(keyword_length <= kMaxLength); \ | 1198 STATIC_ASSERT(keyword_length <= kMaxLength); \ |
1199 if (input_length == keyword_length && \ | 1199 if (input_length == keyword_length && input[1] == keyword[1] && \ |
1200 input[1] == keyword[1] && \ | 1200 (keyword_length <= 2 || input[2] == keyword[2]) && \ |
1201 (keyword_length <= 2 || input[2] == keyword[2]) && \ | 1201 (keyword_length <= 3 || input[3] == keyword[3]) && \ |
1202 (keyword_length <= 3 || input[3] == keyword[3]) && \ | 1202 (keyword_length <= 4 || input[4] == keyword[4]) && \ |
1203 (keyword_length <= 4 || input[4] == keyword[4]) && \ | 1203 (keyword_length <= 5 || input[5] == keyword[5]) && \ |
1204 (keyword_length <= 5 || input[5] == keyword[5]) && \ | 1204 (keyword_length <= 6 || input[6] == keyword[6]) && \ |
1205 (keyword_length <= 6 || input[6] == keyword[6]) && \ | 1205 (keyword_length <= 7 || input[7] == keyword[7]) && \ |
1206 (keyword_length <= 7 || input[7] == keyword[7]) && \ | 1206 (keyword_length <= 8 || input[8] == keyword[8]) && \ |
1207 (keyword_length <= 8 || input[8] == keyword[8]) && \ | 1207 (keyword_length <= 9 || input[9] == keyword[9])) { \ |
1208 (keyword_length <= 9 || input[9] == keyword[9])) { \ | 1208 if (escaped && token == Token::FUTURE_STRICT_RESERVED_WORD) { \ |
rossberg
2015/11/06 13:31:05
Nit: Can we regroup this to
if (escaped) {
retu
caitp (gmail)
2015/11/06 19:08:06
Done.
| |
1209 return token; \ | 1209 return Token::ESCAPED_STRICT_RESERVED_WORD; \ |
1210 } \ | 1210 } \ |
1211 } | 1211 return escaped ? Token::ESCAPED_KEYWORD : token; \ |
1212 } \ | |
1213 } | |
1212 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) | 1214 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) |
1213 } | 1215 } |
1214 return Token::IDENTIFIER; | 1216 return Token::IDENTIFIER; |
1215 } | 1217 } |
1216 | 1218 |
1217 | 1219 |
1218 bool Scanner::IdentifierIsFutureStrictReserved( | 1220 bool Scanner::IdentifierIsFutureStrictReserved( |
1219 const AstRawString* string) const { | 1221 const AstRawString* string) const { |
1220 // Keywords are always 1-byte strings. | 1222 // Keywords are always 1-byte strings. |
1221 if (!string->is_one_byte()) return false; | 1223 if (!string->is_one_byte()) return false; |
1222 if (string->IsOneByteEqualTo("let") || string->IsOneByteEqualTo("static") || | 1224 if (string->IsOneByteEqualTo("let") || string->IsOneByteEqualTo("static") || |
1223 string->IsOneByteEqualTo("yield")) { | 1225 string->IsOneByteEqualTo("yield")) { |
1224 return true; | 1226 return true; |
1225 } | 1227 } |
1226 return Token::FUTURE_STRICT_RESERVED_WORD == | 1228 return Token::FUTURE_STRICT_RESERVED_WORD == |
1227 KeywordOrIdentifierToken(string->raw_data(), string->length()); | 1229 KeywordOrIdentifierToken(string->raw_data(), string->length(), false); |
1228 } | 1230 } |
1229 | 1231 |
1230 | 1232 |
1231 Token::Value Scanner::ScanIdentifierOrKeyword() { | 1233 Token::Value Scanner::ScanIdentifierOrKeyword() { |
1232 DCHECK(unicode_cache_->IsIdentifierStart(c0_)); | 1234 DCHECK(unicode_cache_->IsIdentifierStart(c0_)); |
1233 LiteralScope literal(this); | 1235 LiteralScope literal(this); |
1234 if (IsInRange(c0_, 'a', 'z')) { | 1236 if (IsInRange(c0_, 'a', 'z')) { |
1235 do { | 1237 do { |
1236 uc32 first_char = c0_; | 1238 uc32 first_char = c0_; |
1237 Advance<false, false>(); | 1239 Advance<false, false>(); |
(...skipping 12 matching lines...) Expand all Loading... | |
1250 AddLiteralChar(first_char); | 1252 AddLiteralChar(first_char); |
1251 } | 1253 } |
1252 if (c0_ <= kMaxAscii && c0_ != '\\') { | 1254 if (c0_ <= kMaxAscii && c0_ != '\\') { |
1253 literal.Complete(); | 1255 literal.Complete(); |
1254 return Token::IDENTIFIER; | 1256 return Token::IDENTIFIER; |
1255 } | 1257 } |
1256 } else if (c0_ <= kMaxAscii && c0_ != '\\') { | 1258 } else if (c0_ <= kMaxAscii && c0_ != '\\') { |
1257 // Only a-z+: could be a keyword or identifier. | 1259 // Only a-z+: could be a keyword or identifier. |
1258 literal.Complete(); | 1260 literal.Complete(); |
1259 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); | 1261 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); |
1260 return KeywordOrIdentifierToken(chars.start(), chars.length()); | 1262 return KeywordOrIdentifierToken(chars.start(), chars.length(), false); |
1261 } | 1263 } |
1262 | 1264 |
1263 HandleLeadSurrogate(); | 1265 HandleLeadSurrogate(); |
1264 } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '_' || c0_ == '$') { | 1266 } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '_' || c0_ == '$') { |
1265 do { | 1267 do { |
1266 uc32 first_char = c0_; | 1268 uc32 first_char = c0_; |
1267 Advance<false, false>(); | 1269 Advance<false, false>(); |
1268 AddLiteralChar(first_char); | 1270 AddLiteralChar(first_char); |
1269 } while (IsAsciiIdentifier(c0_)); | 1271 } while (IsAsciiIdentifier(c0_)); |
1270 | 1272 |
1271 if (c0_ <= kMaxAscii && c0_ != '\\') { | 1273 if (c0_ <= kMaxAscii && c0_ != '\\') { |
1272 literal.Complete(); | 1274 literal.Complete(); |
1273 return Token::IDENTIFIER; | 1275 return Token::IDENTIFIER; |
1274 } | 1276 } |
1275 | 1277 |
1276 HandleLeadSurrogate(); | 1278 HandleLeadSurrogate(); |
1277 } else if (c0_ == '\\') { | 1279 } else if (c0_ == '\\') { |
1278 // Scan identifier start character. | 1280 // Scan identifier start character. |
1279 uc32 c = ScanIdentifierUnicodeEscape(); | 1281 uc32 c = ScanIdentifierUnicodeEscape(); |
1280 // Only allow legal identifier start characters. | 1282 // Only allow legal identifier start characters. |
1281 if (c < 0 || | 1283 if (c < 0 || |
1282 c == '\\' || // No recursive escapes. | 1284 c == '\\' || // No recursive escapes. |
1283 !unicode_cache_->IsIdentifierStart(c)) { | 1285 !unicode_cache_->IsIdentifierStart(c)) { |
1284 return Token::ILLEGAL; | 1286 return Token::ILLEGAL; |
1285 } | 1287 } |
1286 AddLiteralChar(c); | 1288 AddLiteralChar(c); |
1287 return ScanIdentifierSuffix(&literal); | 1289 return ScanIdentifierSuffix(&literal, true); |
1288 } else { | 1290 } else { |
1289 uc32 first_char = c0_; | 1291 uc32 first_char = c0_; |
1290 Advance(); | 1292 Advance(); |
1291 AddLiteralChar(first_char); | 1293 AddLiteralChar(first_char); |
1292 } | 1294 } |
1293 | 1295 |
1294 // Scan the rest of the identifier characters. | 1296 // Scan the rest of the identifier characters. |
1295 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { | 1297 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
1296 if (c0_ != '\\') { | 1298 if (c0_ != '\\') { |
1297 uc32 next_char = c0_; | 1299 uc32 next_char = c0_; |
1298 Advance(); | 1300 Advance(); |
1299 AddLiteralChar(next_char); | 1301 AddLiteralChar(next_char); |
1300 continue; | 1302 continue; |
1301 } | 1303 } |
1302 // Fallthrough if no longer able to complete keyword. | 1304 // Fallthrough if no longer able to complete keyword. |
1303 return ScanIdentifierSuffix(&literal); | 1305 return ScanIdentifierSuffix(&literal, false); |
1304 } | 1306 } |
1305 | 1307 |
1306 literal.Complete(); | 1308 literal.Complete(); |
1307 | 1309 |
1308 if (next_.literal_chars->is_one_byte()) { | 1310 if (next_.literal_chars->is_one_byte()) { |
1309 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); | 1311 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); |
1310 return KeywordOrIdentifierToken(chars.start(), chars.length()); | 1312 return KeywordOrIdentifierToken(chars.start(), chars.length(), false); |
1311 } | 1313 } |
1312 return Token::IDENTIFIER; | 1314 return Token::IDENTIFIER; |
1313 } | 1315 } |
1314 | 1316 |
1315 | 1317 |
1316 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { | 1318 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal, |
1319 bool escaped) { | |
1317 // Scan the rest of the identifier characters. | 1320 // Scan the rest of the identifier characters. |
1318 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { | 1321 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
1319 if (c0_ == '\\') { | 1322 if (c0_ == '\\') { |
1320 uc32 c = ScanIdentifierUnicodeEscape(); | 1323 uc32 c = ScanIdentifierUnicodeEscape(); |
1324 escaped = true; | |
1321 // Only allow legal identifier part characters. | 1325 // Only allow legal identifier part characters. |
1322 if (c < 0 || | 1326 if (c < 0 || |
1323 c == '\\' || | 1327 c == '\\' || |
1324 !unicode_cache_->IsIdentifierPart(c)) { | 1328 !unicode_cache_->IsIdentifierPart(c)) { |
1325 return Token::ILLEGAL; | 1329 return Token::ILLEGAL; |
1326 } | 1330 } |
1327 AddLiteralChar(c); | 1331 AddLiteralChar(c); |
1328 } else { | 1332 } else { |
1329 AddLiteralChar(c0_); | 1333 AddLiteralChar(c0_); |
1330 Advance(); | 1334 Advance(); |
1331 } | 1335 } |
1332 } | 1336 } |
1333 literal->Complete(); | 1337 literal->Complete(); |
1334 | 1338 |
1339 if (next_.literal_chars->is_one_byte() && escaped) { | |
rossberg
2015/11/06 13:31:05
Nit: put the escaped first, since it is the cheape
caitp (gmail)
2015/11/06 19:08:06
Done.
| |
1340 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); | |
1341 return KeywordOrIdentifierToken(chars.start(), chars.length(), true); | |
1342 } | |
1335 return Token::IDENTIFIER; | 1343 return Token::IDENTIFIER; |
1336 } | 1344 } |
1337 | 1345 |
1338 | 1346 |
1339 bool Scanner::ScanRegExpPattern(bool seen_equal) { | 1347 bool Scanner::ScanRegExpPattern(bool seen_equal) { |
1340 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 1348 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
1341 bool in_character_class = false; | 1349 bool in_character_class = false; |
1342 | 1350 |
1343 // Previous token is either '/' or '/=', in the second case, the | 1351 // Previous token is either '/' or '/=', in the second case, the |
1344 // pattern starts at =. | 1352 // pattern starts at =. |
(...skipping 288 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1633 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1641 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
1634 } | 1642 } |
1635 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1643 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
1636 | 1644 |
1637 backing_store_.AddBlock(bytes); | 1645 backing_store_.AddBlock(bytes); |
1638 return backing_store_.EndSequence().start(); | 1646 return backing_store_.EndSequence().start(); |
1639 } | 1647 } |
1640 | 1648 |
1641 } // namespace internal | 1649 } // namespace internal |
1642 } // namespace v8 | 1650 } // namespace v8 |
OLD | NEW |