src/regexp/regexp-parser.cc - Issue 2813893002: [regexp] Consider surrogate pairs when optimizing disjunctions

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 2813893002: [regexp] Consider surrogate pairs when optimizing disjunctions (Closed)

Patch Set: DCHECK(!IsLeadSurrogate) Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/regexp/regexp-parser.h"	5 #include "src/regexp/regexp-parser.h"

6	6

7 #include "src/char-predicates-inl.h"	7 #include "src/char-predicates-inl.h"

8 #include "src/factory.h"	8 #include "src/factory.h"

9 #include "src/isolate.h"	9 #include "src/isolate.h"

10 #include "src/objects-inl.h"	10 #include "src/objects-inl.h"

(...skipping 265 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
276	276

277 if (dotall()) {	277 if (dotall()) {

278 // Everything.	278 // Everything.

279 DCHECK(FLAG_harmony_regexp_dotall);	279 DCHECK(FLAG_harmony_regexp_dotall);

280 CharacterRange::AddClassEscape('*', ranges, false, zone());	280 CharacterRange::AddClassEscape('*', ranges, false, zone());

281 } else {	281 } else {

282 // Everything except \x0a, \x0d, \u2028 and \u2029	282 // Everything except \x0a, \x0d, \u2028 and \u2029

283 CharacterRange::AddClassEscape('.', ranges, false, zone());	283 CharacterRange::AddClassEscape('.', ranges, false, zone());

284 }	284 }

285	285

286 RegExpCharacterClass* cc =	286 RegExpCharacterClass* cc = new (zone()) RegExpCharacterClass(ranges);

287 new (zone()) RegExpCharacterClass(ranges, false);

288 builder->AddCharacterClass(cc);	287 builder->AddCharacterClass(cc);

289 break;	288 break;

290 }	289 }

291 case '(': {	290 case '(': {

292 SubexpressionType subexpr_type = CAPTURE;	291 SubexpressionType subexpr_type = CAPTURE;

293 RegExpLookaround::Type lookaround_type = state->lookaround_type();	292 RegExpLookaround::Type lookaround_type = state->lookaround_type();

294 bool is_named_capture = false;	293 bool is_named_capture = false;

295 Advance();	294 Advance();

296 if (current() == '?') {	295 if (current() == '?') {

297 switch (Next()) {	296 switch (Next()) {

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
385 case 'S':	384 case 'S':

386 case 'w':	385 case 'w':

387 case 'W': {	386 case 'W': {

388 uc32 c = Next();	387 uc32 c = Next();

389 Advance(2);	388 Advance(2);

390 ZoneList<CharacterRange>* ranges =	389 ZoneList<CharacterRange>* ranges =

391 new (zone()) ZoneList<CharacterRange>(2, zone());	390 new (zone()) ZoneList<CharacterRange>(2, zone());

392 CharacterRange::AddClassEscape(c, ranges,	391 CharacterRange::AddClassEscape(c, ranges,

393 unicode() && ignore_case(), zone());	392 unicode() && ignore_case(), zone());

394 RegExpCharacterClass* cc =	393 RegExpCharacterClass* cc =

395 new (zone()) RegExpCharacterClass(ranges, false);	394 new (zone()) RegExpCharacterClass(ranges);

396 builder->AddCharacterClass(cc);	395 builder->AddCharacterClass(cc);

397 break;	396 break;

398 }	397 }

399 case 'p':	398 case 'p':

400 case 'P': {	399 case 'P': {

401 uc32 p = Next();	400 uc32 p = Next();

402 Advance(2);	401 Advance(2);

403 if (unicode()) {	402 if (unicode()) {

404 if (FLAG_harmony_regexp_property) {	403 if (FLAG_harmony_regexp_property) {

405 ZoneList<CharacterRange>* ranges =	404 ZoneList<CharacterRange>* ranges =

406 new (zone()) ZoneList<CharacterRange>(2, zone());	405 new (zone()) ZoneList<CharacterRange>(2, zone());

407 if (!ParsePropertyClass(ranges, p == 'P')) {	406 if (!ParsePropertyClass(ranges, p == 'P')) {

408 return ReportError(CStrVector("Invalid property name"));	407 return ReportError(CStrVector("Invalid property name"));

409 }	408 }

410 RegExpCharacterClass* cc =	409 RegExpCharacterClass* cc =

411 new (zone()) RegExpCharacterClass(ranges, false);	410 new (zone()) RegExpCharacterClass(ranges);

412 builder->AddCharacterClass(cc);	411 builder->AddCharacterClass(cc);

413 } else {	412 } else {

414 // With /u, no identity escapes except for syntax characters	413 // With /u, no identity escapes except for syntax characters

415 // are allowed. Otherwise, all identity escapes are allowed.	414 // are allowed. Otherwise, all identity escapes are allowed.

416 return ReportError(CStrVector("Invalid escape"));	415 return ReportError(CStrVector("Invalid escape"));

417 }	416 }

418 } else {	417 } else {

419 builder->AddCharacter(p);	418 builder->AddCharacter(p);

420 }	419 }

421 break;	420 break;

(...skipping 1119 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1541 }	1540 }

1542 }	1541 }

1543 if (!has_more()) {	1542 if (!has_more()) {

1544 return ReportError(CStrVector(kUnterminated));	1543 return ReportError(CStrVector(kUnterminated));

1545 }	1544 }

1546 Advance();	1545 Advance();

1547 if (ranges->length() == 0) {	1546 if (ranges->length() == 0) {

1548 ranges->Add(CharacterRange::Everything(), zone());	1547 ranges->Add(CharacterRange::Everything(), zone());

1549 is_negated = !is_negated;	1548 is_negated = !is_negated;

1550 }	1549 }

1551 return new (zone()) RegExpCharacterClass(ranges, is_negated);	1550 RegExpCharacterClass::Flags flags;

	1551 if (is_negated) flags = RegExpCharacterClass::NEGATED;

	1552 return new (zone()) RegExpCharacterClass(ranges, flags);

1552 }	1553 }

1553	1554

1554	1555

1555 #undef CHECK_FAILED	1556 #undef CHECK_FAILED

1556	1557

1557	1558

1558 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,	1559 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,

1559 FlatStringReader* input, JSRegExp::Flags flags,	1560 FlatStringReader* input, JSRegExp::Flags flags,

1560 RegExpCompileData* result) {	1561 RegExpCompileData* result) {

1561 DCHECK(result != NULL);	1562 DCHECK(result != NULL);

(...skipping 153 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1715 // With /u, character class needs to be desugared, so it	1716 // With /u, character class needs to be desugared, so it

1716 // must be a standalone term instead of being part of a RegExpText.	1717 // must be a standalone term instead of being part of a RegExpText.

1717 AddTerm(cc);	1718 AddTerm(cc);

1718 } else {	1719 } else {

1719 AddAtom(cc);	1720 AddAtom(cc);

1720 }	1721 }

1721 }	1722 }

1722	1723

1723 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {	1724 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {

1724 AddTerm(new (zone()) RegExpCharacterClass(	1725 AddTerm(new (zone()) RegExpCharacterClass(

1725 CharacterRange::List(zone(), CharacterRange::Singleton(c)), false));	1726 CharacterRange::List(zone(), CharacterRange::Singleton(c))));

1726 }	1727 }

1727	1728

1728	1729

1729 void RegExpBuilder::AddAtom(RegExpTree* term) {	1730 void RegExpBuilder::AddAtom(RegExpTree* term) {

1730 if (term->IsEmpty()) {	1731 if (term->IsEmpty()) {

1731 AddEmpty();	1732 AddEmpty();

1732 return;	1733 return;

1733 }	1734 }

1734 if (term->IsTextElement()) {	1735 if (term->IsTextElement()) {

1735 FlushCharacters();	1736 FlushCharacters();

(...skipping 134 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1870 return false;	1871 return false;

1871 }	1872 }

1872 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),	1873 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),

1873 zone());	1874 zone());

1874 LAST(ADD_TERM);	1875 LAST(ADD_TERM);

1875 return true;	1876 return true;

1876 }	1877 }

1877	1878

1878 } // namespace internal	1879 } // namespace internal

1879 } // namespace v8	1880 } // namespace v8

OLD	NEW

« no previous file with comments | « src/regexp/regexp-ast.h ('k') | test/mjsunit/regress/regress-641091.js » ('j') | no next file with comments »