OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
(...skipping 252 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
263 multiline() ? RegExpAssertion::END_OF_LINE | 263 multiline() ? RegExpAssertion::END_OF_LINE |
264 : RegExpAssertion::END_OF_INPUT; | 264 : RegExpAssertion::END_OF_INPUT; |
265 builder->AddAssertion(new (zone()) RegExpAssertion(assertion_type)); | 265 builder->AddAssertion(new (zone()) RegExpAssertion(assertion_type)); |
266 continue; | 266 continue; |
267 } | 267 } |
268 case '.': { | 268 case '.': { |
269 Advance(); | 269 Advance(); |
270 // everything except \x0a, \x0d, \u2028 and \u2029 | 270 // everything except \x0a, \x0d, \u2028 and \u2029 |
271 ZoneList<CharacterRange>* ranges = | 271 ZoneList<CharacterRange>* ranges = |
272 new (zone()) ZoneList<CharacterRange>(2, zone()); | 272 new (zone()) ZoneList<CharacterRange>(2, zone()); |
273 CharacterRange::AddClassEscape('.', ranges, zone()); | 273 CharacterRange::AddClassEscape('.', ranges, false, zone()); |
274 RegExpCharacterClass* cc = | 274 RegExpCharacterClass* cc = |
275 new (zone()) RegExpCharacterClass(ranges, false); | 275 new (zone()) RegExpCharacterClass(ranges, false); |
276 builder->AddCharacterClass(cc); | 276 builder->AddCharacterClass(cc); |
277 break; | 277 break; |
278 } | 278 } |
279 case '(': { | 279 case '(': { |
280 SubexpressionType subexpr_type = CAPTURE; | 280 SubexpressionType subexpr_type = CAPTURE; |
281 RegExpLookaround::Type lookaround_type = state->lookaround_type(); | 281 RegExpLookaround::Type lookaround_type = state->lookaround_type(); |
282 bool is_named_capture = false; | 282 bool is_named_capture = false; |
283 Advance(); | 283 Advance(); |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
370 case 'd': | 370 case 'd': |
371 case 'D': | 371 case 'D': |
372 case 's': | 372 case 's': |
373 case 'S': | 373 case 'S': |
374 case 'w': | 374 case 'w': |
375 case 'W': { | 375 case 'W': { |
376 uc32 c = Next(); | 376 uc32 c = Next(); |
377 Advance(2); | 377 Advance(2); |
378 ZoneList<CharacterRange>* ranges = | 378 ZoneList<CharacterRange>* ranges = |
379 new (zone()) ZoneList<CharacterRange>(2, zone()); | 379 new (zone()) ZoneList<CharacterRange>(2, zone()); |
380 CharacterRange::AddClassEscape(c, ranges, zone()); | 380 CharacterRange::AddClassEscape(c, ranges, |
| 381 unicode() && ignore_case(), zone()); |
381 RegExpCharacterClass* cc = | 382 RegExpCharacterClass* cc = |
382 new (zone()) RegExpCharacterClass(ranges, false); | 383 new (zone()) RegExpCharacterClass(ranges, false); |
383 builder->AddCharacterClass(cc); | 384 builder->AddCharacterClass(cc); |
384 break; | 385 break; |
385 } | 386 } |
386 case 'p': | 387 case 'p': |
387 case 'P': { | 388 case 'P': { |
388 uc32 p = Next(); | 389 uc32 p = Next(); |
389 Advance(2); | 390 Advance(2); |
390 if (unicode()) { | 391 if (unicode()) { |
(...skipping 991 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1382 return CharacterRange::Singleton(first); | 1383 return CharacterRange::Singleton(first); |
1383 } | 1384 } |
1384 | 1385 |
1385 static const uc16 kNoCharClass = 0; | 1386 static const uc16 kNoCharClass = 0; |
1386 | 1387 |
1387 // Adds range or pre-defined character class to character ranges. | 1388 // Adds range or pre-defined character class to character ranges. |
1388 // If char_class is not kInvalidClass, it's interpreted as a class | 1389 // If char_class is not kInvalidClass, it's interpreted as a class |
1389 // escape (i.e., 's' means whitespace, from '\s'). | 1390 // escape (i.e., 's' means whitespace, from '\s'). |
1390 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, | 1391 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, |
1391 uc16 char_class, CharacterRange range, | 1392 uc16 char_class, CharacterRange range, |
| 1393 bool add_unicode_case_equivalents, |
1392 Zone* zone) { | 1394 Zone* zone) { |
1393 if (char_class != kNoCharClass) { | 1395 if (char_class != kNoCharClass) { |
1394 CharacterRange::AddClassEscape(char_class, ranges, zone); | 1396 CharacterRange::AddClassEscape(char_class, ranges, |
| 1397 add_unicode_case_equivalents, zone); |
1395 } else { | 1398 } else { |
1396 ranges->Add(range, zone); | 1399 ranges->Add(range, zone); |
1397 } | 1400 } |
1398 } | 1401 } |
1399 | 1402 |
1400 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) { | 1403 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) { |
1401 if (!FLAG_harmony_regexp_property) return false; | 1404 if (!FLAG_harmony_regexp_property) return false; |
1402 if (!unicode()) return false; | 1405 if (!unicode()) return false; |
1403 if (current() != '\\') return false; | 1406 if (current() != '\\') return false; |
1404 uc32 next = Next(); | 1407 uc32 next = Next(); |
(...skipping 19 matching lines...) Expand all Loading... |
1424 | 1427 |
1425 DCHECK_EQ(current(), '['); | 1428 DCHECK_EQ(current(), '['); |
1426 Advance(); | 1429 Advance(); |
1427 bool is_negated = false; | 1430 bool is_negated = false; |
1428 if (current() == '^') { | 1431 if (current() == '^') { |
1429 is_negated = true; | 1432 is_negated = true; |
1430 Advance(); | 1433 Advance(); |
1431 } | 1434 } |
1432 ZoneList<CharacterRange>* ranges = | 1435 ZoneList<CharacterRange>* ranges = |
1433 new (zone()) ZoneList<CharacterRange>(2, zone()); | 1436 new (zone()) ZoneList<CharacterRange>(2, zone()); |
| 1437 bool add_unicode_case_equivalents = unicode() && ignore_case(); |
1434 while (has_more() && current() != ']') { | 1438 while (has_more() && current() != ']') { |
1435 bool parsed_property = ParseClassProperty(ranges CHECK_FAILED); | 1439 bool parsed_property = ParseClassProperty(ranges CHECK_FAILED); |
1436 if (parsed_property) continue; | 1440 if (parsed_property) continue; |
1437 uc16 char_class = kNoCharClass; | 1441 uc16 char_class = kNoCharClass; |
1438 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); | 1442 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); |
1439 if (current() == '-') { | 1443 if (current() == '-') { |
1440 Advance(); | 1444 Advance(); |
1441 if (current() == kEndMarker) { | 1445 if (current() == kEndMarker) { |
1442 // If we reach the end we break out of the loop and let the | 1446 // If we reach the end we break out of the loop and let the |
1443 // following code report an error. | 1447 // following code report an error. |
1444 break; | 1448 break; |
1445 } else if (current() == ']') { | 1449 } else if (current() == ']') { |
1446 AddRangeOrEscape(ranges, char_class, first, zone()); | 1450 AddRangeOrEscape(ranges, char_class, first, |
| 1451 add_unicode_case_equivalents, zone()); |
1447 ranges->Add(CharacterRange::Singleton('-'), zone()); | 1452 ranges->Add(CharacterRange::Singleton('-'), zone()); |
1448 break; | 1453 break; |
1449 } | 1454 } |
1450 uc16 char_class_2 = kNoCharClass; | 1455 uc16 char_class_2 = kNoCharClass; |
1451 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED); | 1456 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED); |
1452 if (char_class != kNoCharClass || char_class_2 != kNoCharClass) { | 1457 if (char_class != kNoCharClass || char_class_2 != kNoCharClass) { |
1453 // Either end is an escaped character class. Treat the '-' verbatim. | 1458 // Either end is an escaped character class. Treat the '-' verbatim. |
1454 if (unicode()) { | 1459 if (unicode()) { |
1455 // ES2015 21.2.2.15.1 step 1. | 1460 // ES2015 21.2.2.15.1 step 1. |
1456 return ReportError(CStrVector(kRangeInvalid)); | 1461 return ReportError(CStrVector(kRangeInvalid)); |
1457 } | 1462 } |
1458 AddRangeOrEscape(ranges, char_class, first, zone()); | 1463 AddRangeOrEscape(ranges, char_class, first, |
| 1464 add_unicode_case_equivalents, zone()); |
1459 ranges->Add(CharacterRange::Singleton('-'), zone()); | 1465 ranges->Add(CharacterRange::Singleton('-'), zone()); |
1460 AddRangeOrEscape(ranges, char_class_2, next, zone()); | 1466 AddRangeOrEscape(ranges, char_class_2, next, |
| 1467 add_unicode_case_equivalents, zone()); |
1461 continue; | 1468 continue; |
1462 } | 1469 } |
1463 // ES2015 21.2.2.15.1 step 6. | 1470 // ES2015 21.2.2.15.1 step 6. |
1464 if (first.from() > next.to()) { | 1471 if (first.from() > next.to()) { |
1465 return ReportError(CStrVector(kRangeOutOfOrder)); | 1472 return ReportError(CStrVector(kRangeOutOfOrder)); |
1466 } | 1473 } |
1467 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone()); | 1474 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone()); |
1468 } else { | 1475 } else { |
1469 AddRangeOrEscape(ranges, char_class, first, zone()); | 1476 AddRangeOrEscape(ranges, char_class, first, add_unicode_case_equivalents, |
| 1477 zone()); |
1470 } | 1478 } |
1471 } | 1479 } |
1472 if (!has_more()) { | 1480 if (!has_more()) { |
1473 return ReportError(CStrVector(kUnterminated)); | 1481 return ReportError(CStrVector(kUnterminated)); |
1474 } | 1482 } |
1475 Advance(); | 1483 Advance(); |
1476 if (ranges->length() == 0) { | 1484 if (ranges->length() == 0) { |
1477 ranges->Add(CharacterRange::Everything(), zone()); | 1485 ranges->Add(CharacterRange::Everything(), zone()); |
1478 is_negated = !is_negated; | 1486 is_negated = !is_negated; |
1479 } | 1487 } |
(...skipping 313 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1793 return false; | 1801 return false; |
1794 } | 1802 } |
1795 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1803 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1796 zone()); | 1804 zone()); |
1797 LAST(ADD_TERM); | 1805 LAST(ADD_TERM); |
1798 return true; | 1806 return true; |
1799 } | 1807 } |
1800 | 1808 |
1801 } // namespace internal | 1809 } // namespace internal |
1802 } // namespace v8 | 1810 } // namespace v8 |
OLD | NEW |