src/regexp/regexp-parser.cc - Issue 2725583002: [regexp] fix /\W/ui wrt \u017f and \u212a.

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 2725583002: [regexp] fix /\W/ui wrt \u017f and \u212a. (Closed)

Patch Set: Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/regexp/regexp-parser.h"	5 #include "src/regexp/regexp-parser.h"

6	6

7 #include "src/char-predicates-inl.h"	7 #include "src/char-predicates-inl.h"

8 #include "src/factory.h"	8 #include "src/factory.h"

9 #include "src/isolate.h"	9 #include "src/isolate.h"

10 #include "src/objects-inl.h"	10 #include "src/objects-inl.h"

(...skipping 252 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
263 multiline() ? RegExpAssertion::END_OF_LINE	263 multiline() ? RegExpAssertion::END_OF_LINE

264 : RegExpAssertion::END_OF_INPUT;	264 : RegExpAssertion::END_OF_INPUT;

265 builder->AddAssertion(new (zone()) RegExpAssertion(assertion_type));	265 builder->AddAssertion(new (zone()) RegExpAssertion(assertion_type));

266 continue;	266 continue;

267 }	267 }

268 case '.': {	268 case '.': {

269 Advance();	269 Advance();

270 // everything except \x0a, \x0d, \u2028 and \u2029	270 // everything except \x0a, \x0d, \u2028 and \u2029

271 ZoneList<CharacterRange>* ranges =	271 ZoneList<CharacterRange>* ranges =

272 new (zone()) ZoneList<CharacterRange>(2, zone());	272 new (zone()) ZoneList<CharacterRange>(2, zone());

273 CharacterRange::AddClassEscape('.', ranges, zone());	273 CharacterRange::AddClassEscape('.', ranges, false, zone());

274 RegExpCharacterClass* cc =	274 RegExpCharacterClass* cc =

275 new (zone()) RegExpCharacterClass(ranges, false);	275 new (zone()) RegExpCharacterClass(ranges, false);

276 builder->AddCharacterClass(cc);	276 builder->AddCharacterClass(cc);

277 break;	277 break;

278 }	278 }

279 case '(': {	279 case '(': {

280 SubexpressionType subexpr_type = CAPTURE;	280 SubexpressionType subexpr_type = CAPTURE;

281 RegExpLookaround::Type lookaround_type = state->lookaround_type();	281 RegExpLookaround::Type lookaround_type = state->lookaround_type();

282 bool is_named_capture = false;	282 bool is_named_capture = false;

283 Advance();	283 Advance();

(...skipping 86 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
370 case 'd':	370 case 'd':

371 case 'D':	371 case 'D':

372 case 's':	372 case 's':

373 case 'S':	373 case 'S':

374 case 'w':	374 case 'w':

375 case 'W': {	375 case 'W': {

376 uc32 c = Next();	376 uc32 c = Next();

377 Advance(2);	377 Advance(2);

378 ZoneList<CharacterRange>* ranges =	378 ZoneList<CharacterRange>* ranges =

379 new (zone()) ZoneList<CharacterRange>(2, zone());	379 new (zone()) ZoneList<CharacterRange>(2, zone());

380 CharacterRange::AddClassEscape(c, ranges, zone());	380 CharacterRange::AddClassEscape(c, ranges,

	381 unicode() && ignore_case(), zone());

381 RegExpCharacterClass* cc =	382 RegExpCharacterClass* cc =

382 new (zone()) RegExpCharacterClass(ranges, false);	383 new (zone()) RegExpCharacterClass(ranges, false);

383 builder->AddCharacterClass(cc);	384 builder->AddCharacterClass(cc);

384 break;	385 break;

385 }	386 }

386 case 'p':	387 case 'p':

387 case 'P': {	388 case 'P': {

388 uc32 p = Next();	389 uc32 p = Next();

389 Advance(2);	390 Advance(2);

390 if (unicode()) {	391 if (unicode()) {

(...skipping 991 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1382 return CharacterRange::Singleton(first);	1383 return CharacterRange::Singleton(first);

1383 }	1384 }

1384	1385

1385 static const uc16 kNoCharClass = 0;	1386 static const uc16 kNoCharClass = 0;

1386	1387

1387 // Adds range or pre-defined character class to character ranges.	1388 // Adds range or pre-defined character class to character ranges.

1388 // If char_class is not kInvalidClass, it's interpreted as a class	1389 // If char_class is not kInvalidClass, it's interpreted as a class

1389 // escape (i.e., 's' means whitespace, from '\s').	1390 // escape (i.e., 's' means whitespace, from '\s').

1390 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,	1391 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,

1391 uc16 char_class, CharacterRange range,	1392 uc16 char_class, CharacterRange range,

	1393 bool add_unicode_case_equivalents,

1392 Zone* zone) {	1394 Zone* zone) {

1393 if (char_class != kNoCharClass) {	1395 if (char_class != kNoCharClass) {

1394 CharacterRange::AddClassEscape(char_class, ranges, zone);	1396 CharacterRange::AddClassEscape(char_class, ranges,

	1397 add_unicode_case_equivalents, zone);

1395 } else {	1398 } else {

1396 ranges->Add(range, zone);	1399 ranges->Add(range, zone);

1397 }	1400 }

1398 }	1401 }

1399	1402

1400 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) {	1403 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) {

1401 if (!FLAG_harmony_regexp_property) return false;	1404 if (!FLAG_harmony_regexp_property) return false;

1402 if (!unicode()) return false;	1405 if (!unicode()) return false;

1403 if (current() != '\\') return false;	1406 if (current() != '\\') return false;

1404 uc32 next = Next();	1407 uc32 next = Next();

(...skipping 19 matching lines...) Expand all Loading...
1424	1427

1425 DCHECK_EQ(current(), '[');	1428 DCHECK_EQ(current(), '[');

1426 Advance();	1429 Advance();

1427 bool is_negated = false;	1430 bool is_negated = false;

1428 if (current() == '^') {	1431 if (current() == '^') {

1429 is_negated = true;	1432 is_negated = true;

1430 Advance();	1433 Advance();

1431 }	1434 }

1432 ZoneList<CharacterRange>* ranges =	1435 ZoneList<CharacterRange>* ranges =

1433 new (zone()) ZoneList<CharacterRange>(2, zone());	1436 new (zone()) ZoneList<CharacterRange>(2, zone());

	1437 bool add_unicode_case_equivalents = unicode() && ignore_case();

1434 while (has_more() && current() != ']') {	1438 while (has_more() && current() != ']') {

1435 bool parsed_property = ParseClassProperty(ranges CHECK_FAILED);	1439 bool parsed_property = ParseClassProperty(ranges CHECK_FAILED);

1436 if (parsed_property) continue;	1440 if (parsed_property) continue;

1437 uc16 char_class = kNoCharClass;	1441 uc16 char_class = kNoCharClass;

1438 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);	1442 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);

1439 if (current() == '-') {	1443 if (current() == '-') {

1440 Advance();	1444 Advance();

1441 if (current() == kEndMarker) {	1445 if (current() == kEndMarker) {

1442 // If we reach the end we break out of the loop and let the	1446 // If we reach the end we break out of the loop and let the

1443 // following code report an error.	1447 // following code report an error.

1444 break;	1448 break;

1445 } else if (current() == ']') {	1449 } else if (current() == ']') {

1446 AddRangeOrEscape(ranges, char_class, first, zone());	1450 AddRangeOrEscape(ranges, char_class, first,

	1451 add_unicode_case_equivalents, zone());

1447 ranges->Add(CharacterRange::Singleton('-'), zone());	1452 ranges->Add(CharacterRange::Singleton('-'), zone());

1448 break;	1453 break;

1449 }	1454 }

1450 uc16 char_class_2 = kNoCharClass;	1455 uc16 char_class_2 = kNoCharClass;

1451 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);	1456 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);

1452 if (char_class != kNoCharClass \|\| char_class_2 != kNoCharClass) {	1457 if (char_class != kNoCharClass \|\| char_class_2 != kNoCharClass) {

1453 // Either end is an escaped character class. Treat the '-' verbatim.	1458 // Either end is an escaped character class. Treat the '-' verbatim.

1454 if (unicode()) {	1459 if (unicode()) {

1455 // ES2015 21.2.2.15.1 step 1.	1460 // ES2015 21.2.2.15.1 step 1.

1456 return ReportError(CStrVector(kRangeInvalid));	1461 return ReportError(CStrVector(kRangeInvalid));

1457 }	1462 }

1458 AddRangeOrEscape(ranges, char_class, first, zone());	1463 AddRangeOrEscape(ranges, char_class, first,

	1464 add_unicode_case_equivalents, zone());

1459 ranges->Add(CharacterRange::Singleton('-'), zone());	1465 ranges->Add(CharacterRange::Singleton('-'), zone());

1460 AddRangeOrEscape(ranges, char_class_2, next, zone());	1466 AddRangeOrEscape(ranges, char_class_2, next,

	1467 add_unicode_case_equivalents, zone());

1461 continue;	1468 continue;

1462 }	1469 }

1463 // ES2015 21.2.2.15.1 step 6.	1470 // ES2015 21.2.2.15.1 step 6.

1464 if (first.from() > next.to()) {	1471 if (first.from() > next.to()) {

1465 return ReportError(CStrVector(kRangeOutOfOrder));	1472 return ReportError(CStrVector(kRangeOutOfOrder));

1466 }	1473 }

1467 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());	1474 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());

1468 } else {	1475 } else {

1469 AddRangeOrEscape(ranges, char_class, first, zone());	1476 AddRangeOrEscape(ranges, char_class, first, add_unicode_case_equivalents,

	1477 zone());

1470 }	1478 }

1471 }	1479 }

1472 if (!has_more()) {	1480 if (!has_more()) {

1473 return ReportError(CStrVector(kUnterminated));	1481 return ReportError(CStrVector(kUnterminated));

1474 }	1482 }

1475 Advance();	1483 Advance();

1476 if (ranges->length() == 0) {	1484 if (ranges->length() == 0) {

1477 ranges->Add(CharacterRange::Everything(), zone());	1485 ranges->Add(CharacterRange::Everything(), zone());

1478 is_negated = !is_negated;	1486 is_negated = !is_negated;

1479 }	1487 }

(...skipping 313 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1793 return false;	1801 return false;

1794 }	1802 }

1795 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),	1803 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),

1796 zone());	1804 zone());

1797 LAST(ADD_TERM);	1805 LAST(ADD_TERM);

1798 return true;	1806 return true;

1799 }	1807 }

1800	1808

1801 } // namespace internal	1809 } // namespace internal

1802 } // namespace v8	1810 } // namespace v8

OLD	NEW

« src/regexp/regexp-ast.h ('K') | « src/regexp/regexp-ast.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »