Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 2725583002: [regexp] fix /\W/ui wrt \u017f and \u212a. (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 252 matching lines...) Expand 10 before | Expand all | Expand 10 after
263 multiline() ? RegExpAssertion::END_OF_LINE 263 multiline() ? RegExpAssertion::END_OF_LINE
264 : RegExpAssertion::END_OF_INPUT; 264 : RegExpAssertion::END_OF_INPUT;
265 builder->AddAssertion(new (zone()) RegExpAssertion(assertion_type)); 265 builder->AddAssertion(new (zone()) RegExpAssertion(assertion_type));
266 continue; 266 continue;
267 } 267 }
268 case '.': { 268 case '.': {
269 Advance(); 269 Advance();
270 // everything except \x0a, \x0d, \u2028 and \u2029 270 // everything except \x0a, \x0d, \u2028 and \u2029
271 ZoneList<CharacterRange>* ranges = 271 ZoneList<CharacterRange>* ranges =
272 new (zone()) ZoneList<CharacterRange>(2, zone()); 272 new (zone()) ZoneList<CharacterRange>(2, zone());
273 CharacterRange::AddClassEscape('.', ranges, zone()); 273 CharacterRange::AddClassEscape('.', ranges, false, zone());
274 RegExpCharacterClass* cc = 274 RegExpCharacterClass* cc =
275 new (zone()) RegExpCharacterClass(ranges, false); 275 new (zone()) RegExpCharacterClass(ranges, false);
276 builder->AddCharacterClass(cc); 276 builder->AddCharacterClass(cc);
277 break; 277 break;
278 } 278 }
279 case '(': { 279 case '(': {
280 SubexpressionType subexpr_type = CAPTURE; 280 SubexpressionType subexpr_type = CAPTURE;
281 RegExpLookaround::Type lookaround_type = state->lookaround_type(); 281 RegExpLookaround::Type lookaround_type = state->lookaround_type();
282 bool is_named_capture = false; 282 bool is_named_capture = false;
283 Advance(); 283 Advance();
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
370 case 'd': 370 case 'd':
371 case 'D': 371 case 'D':
372 case 's': 372 case 's':
373 case 'S': 373 case 'S':
374 case 'w': 374 case 'w':
375 case 'W': { 375 case 'W': {
376 uc32 c = Next(); 376 uc32 c = Next();
377 Advance(2); 377 Advance(2);
378 ZoneList<CharacterRange>* ranges = 378 ZoneList<CharacterRange>* ranges =
379 new (zone()) ZoneList<CharacterRange>(2, zone()); 379 new (zone()) ZoneList<CharacterRange>(2, zone());
380 CharacterRange::AddClassEscape(c, ranges, zone()); 380 CharacterRange::AddClassEscape(c, ranges,
381 unicode() && ignore_case(), zone());
381 RegExpCharacterClass* cc = 382 RegExpCharacterClass* cc =
382 new (zone()) RegExpCharacterClass(ranges, false); 383 new (zone()) RegExpCharacterClass(ranges, false);
383 builder->AddCharacterClass(cc); 384 builder->AddCharacterClass(cc);
384 break; 385 break;
385 } 386 }
386 case 'p': 387 case 'p':
387 case 'P': { 388 case 'P': {
388 uc32 p = Next(); 389 uc32 p = Next();
389 Advance(2); 390 Advance(2);
390 if (unicode()) { 391 if (unicode()) {
(...skipping 991 matching lines...) Expand 10 before | Expand all | Expand 10 after
1382 return CharacterRange::Singleton(first); 1383 return CharacterRange::Singleton(first);
1383 } 1384 }
1384 1385
1385 static const uc16 kNoCharClass = 0; 1386 static const uc16 kNoCharClass = 0;
1386 1387
1387 // Adds range or pre-defined character class to character ranges. 1388 // Adds range or pre-defined character class to character ranges.
1388 // If char_class is not kInvalidClass, it's interpreted as a class 1389 // If char_class is not kInvalidClass, it's interpreted as a class
1389 // escape (i.e., 's' means whitespace, from '\s'). 1390 // escape (i.e., 's' means whitespace, from '\s').
1390 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, 1391 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,
1391 uc16 char_class, CharacterRange range, 1392 uc16 char_class, CharacterRange range,
1393 bool add_unicode_case_equivalents,
1392 Zone* zone) { 1394 Zone* zone) {
1393 if (char_class != kNoCharClass) { 1395 if (char_class != kNoCharClass) {
1394 CharacterRange::AddClassEscape(char_class, ranges, zone); 1396 CharacterRange::AddClassEscape(char_class, ranges,
1397 add_unicode_case_equivalents, zone);
1395 } else { 1398 } else {
1396 ranges->Add(range, zone); 1399 ranges->Add(range, zone);
1397 } 1400 }
1398 } 1401 }
1399 1402
1400 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) { 1403 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) {
1401 if (!FLAG_harmony_regexp_property) return false; 1404 if (!FLAG_harmony_regexp_property) return false;
1402 if (!unicode()) return false; 1405 if (!unicode()) return false;
1403 if (current() != '\\') return false; 1406 if (current() != '\\') return false;
1404 uc32 next = Next(); 1407 uc32 next = Next();
(...skipping 19 matching lines...) Expand all
1424 1427
1425 DCHECK_EQ(current(), '['); 1428 DCHECK_EQ(current(), '[');
1426 Advance(); 1429 Advance();
1427 bool is_negated = false; 1430 bool is_negated = false;
1428 if (current() == '^') { 1431 if (current() == '^') {
1429 is_negated = true; 1432 is_negated = true;
1430 Advance(); 1433 Advance();
1431 } 1434 }
1432 ZoneList<CharacterRange>* ranges = 1435 ZoneList<CharacterRange>* ranges =
1433 new (zone()) ZoneList<CharacterRange>(2, zone()); 1436 new (zone()) ZoneList<CharacterRange>(2, zone());
1437 bool add_unicode_case_equivalents = unicode() && ignore_case();
1434 while (has_more() && current() != ']') { 1438 while (has_more() && current() != ']') {
1435 bool parsed_property = ParseClassProperty(ranges CHECK_FAILED); 1439 bool parsed_property = ParseClassProperty(ranges CHECK_FAILED);
1436 if (parsed_property) continue; 1440 if (parsed_property) continue;
1437 uc16 char_class = kNoCharClass; 1441 uc16 char_class = kNoCharClass;
1438 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); 1442 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);
1439 if (current() == '-') { 1443 if (current() == '-') {
1440 Advance(); 1444 Advance();
1441 if (current() == kEndMarker) { 1445 if (current() == kEndMarker) {
1442 // If we reach the end we break out of the loop and let the 1446 // If we reach the end we break out of the loop and let the
1443 // following code report an error. 1447 // following code report an error.
1444 break; 1448 break;
1445 } else if (current() == ']') { 1449 } else if (current() == ']') {
1446 AddRangeOrEscape(ranges, char_class, first, zone()); 1450 AddRangeOrEscape(ranges, char_class, first,
1451 add_unicode_case_equivalents, zone());
1447 ranges->Add(CharacterRange::Singleton('-'), zone()); 1452 ranges->Add(CharacterRange::Singleton('-'), zone());
1448 break; 1453 break;
1449 } 1454 }
1450 uc16 char_class_2 = kNoCharClass; 1455 uc16 char_class_2 = kNoCharClass;
1451 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED); 1456 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);
1452 if (char_class != kNoCharClass || char_class_2 != kNoCharClass) { 1457 if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
1453 // Either end is an escaped character class. Treat the '-' verbatim. 1458 // Either end is an escaped character class. Treat the '-' verbatim.
1454 if (unicode()) { 1459 if (unicode()) {
1455 // ES2015 21.2.2.15.1 step 1. 1460 // ES2015 21.2.2.15.1 step 1.
1456 return ReportError(CStrVector(kRangeInvalid)); 1461 return ReportError(CStrVector(kRangeInvalid));
1457 } 1462 }
1458 AddRangeOrEscape(ranges, char_class, first, zone()); 1463 AddRangeOrEscape(ranges, char_class, first,
1464 add_unicode_case_equivalents, zone());
1459 ranges->Add(CharacterRange::Singleton('-'), zone()); 1465 ranges->Add(CharacterRange::Singleton('-'), zone());
1460 AddRangeOrEscape(ranges, char_class_2, next, zone()); 1466 AddRangeOrEscape(ranges, char_class_2, next,
1467 add_unicode_case_equivalents, zone());
1461 continue; 1468 continue;
1462 } 1469 }
1463 // ES2015 21.2.2.15.1 step 6. 1470 // ES2015 21.2.2.15.1 step 6.
1464 if (first.from() > next.to()) { 1471 if (first.from() > next.to()) {
1465 return ReportError(CStrVector(kRangeOutOfOrder)); 1472 return ReportError(CStrVector(kRangeOutOfOrder));
1466 } 1473 }
1467 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone()); 1474 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());
1468 } else { 1475 } else {
1469 AddRangeOrEscape(ranges, char_class, first, zone()); 1476 AddRangeOrEscape(ranges, char_class, first, add_unicode_case_equivalents,
1477 zone());
1470 } 1478 }
1471 } 1479 }
1472 if (!has_more()) { 1480 if (!has_more()) {
1473 return ReportError(CStrVector(kUnterminated)); 1481 return ReportError(CStrVector(kUnterminated));
1474 } 1482 }
1475 Advance(); 1483 Advance();
1476 if (ranges->length() == 0) { 1484 if (ranges->length() == 0) {
1477 ranges->Add(CharacterRange::Everything(), zone()); 1485 ranges->Add(CharacterRange::Everything(), zone());
1478 is_negated = !is_negated; 1486 is_negated = !is_negated;
1479 } 1487 }
(...skipping 313 matching lines...) Expand 10 before | Expand all | Expand 10 after
1793 return false; 1801 return false;
1794 } 1802 }
1795 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1803 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1796 zone()); 1804 zone());
1797 LAST(ADD_TERM); 1805 LAST(ADD_TERM);
1798 return true; 1806 return true;
1799 } 1807 }
1800 1808
1801 } // namespace internal 1809 } // namespace internal
1802 } // namespace v8 1810 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698