Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(110)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 2813893002: [regexp] Consider surrogate pairs when optimizing disjunctions (Closed)
Patch Set: DCHECK(!IsLeadSurrogate) Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/regexp-ast.h ('k') | test/mjsunit/regress/regress-641091.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 265 matching lines...) Expand 10 before | Expand all | Expand 10 after
276 276
277 if (dotall()) { 277 if (dotall()) {
278 // Everything. 278 // Everything.
279 DCHECK(FLAG_harmony_regexp_dotall); 279 DCHECK(FLAG_harmony_regexp_dotall);
280 CharacterRange::AddClassEscape('*', ranges, false, zone()); 280 CharacterRange::AddClassEscape('*', ranges, false, zone());
281 } else { 281 } else {
282 // Everything except \x0a, \x0d, \u2028 and \u2029 282 // Everything except \x0a, \x0d, \u2028 and \u2029
283 CharacterRange::AddClassEscape('.', ranges, false, zone()); 283 CharacterRange::AddClassEscape('.', ranges, false, zone());
284 } 284 }
285 285
286 RegExpCharacterClass* cc = 286 RegExpCharacterClass* cc = new (zone()) RegExpCharacterClass(ranges);
287 new (zone()) RegExpCharacterClass(ranges, false);
288 builder->AddCharacterClass(cc); 287 builder->AddCharacterClass(cc);
289 break; 288 break;
290 } 289 }
291 case '(': { 290 case '(': {
292 SubexpressionType subexpr_type = CAPTURE; 291 SubexpressionType subexpr_type = CAPTURE;
293 RegExpLookaround::Type lookaround_type = state->lookaround_type(); 292 RegExpLookaround::Type lookaround_type = state->lookaround_type();
294 bool is_named_capture = false; 293 bool is_named_capture = false;
295 Advance(); 294 Advance();
296 if (current() == '?') { 295 if (current() == '?') {
297 switch (Next()) { 296 switch (Next()) {
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
385 case 'S': 384 case 'S':
386 case 'w': 385 case 'w':
387 case 'W': { 386 case 'W': {
388 uc32 c = Next(); 387 uc32 c = Next();
389 Advance(2); 388 Advance(2);
390 ZoneList<CharacterRange>* ranges = 389 ZoneList<CharacterRange>* ranges =
391 new (zone()) ZoneList<CharacterRange>(2, zone()); 390 new (zone()) ZoneList<CharacterRange>(2, zone());
392 CharacterRange::AddClassEscape(c, ranges, 391 CharacterRange::AddClassEscape(c, ranges,
393 unicode() && ignore_case(), zone()); 392 unicode() && ignore_case(), zone());
394 RegExpCharacterClass* cc = 393 RegExpCharacterClass* cc =
395 new (zone()) RegExpCharacterClass(ranges, false); 394 new (zone()) RegExpCharacterClass(ranges);
396 builder->AddCharacterClass(cc); 395 builder->AddCharacterClass(cc);
397 break; 396 break;
398 } 397 }
399 case 'p': 398 case 'p':
400 case 'P': { 399 case 'P': {
401 uc32 p = Next(); 400 uc32 p = Next();
402 Advance(2); 401 Advance(2);
403 if (unicode()) { 402 if (unicode()) {
404 if (FLAG_harmony_regexp_property) { 403 if (FLAG_harmony_regexp_property) {
405 ZoneList<CharacterRange>* ranges = 404 ZoneList<CharacterRange>* ranges =
406 new (zone()) ZoneList<CharacterRange>(2, zone()); 405 new (zone()) ZoneList<CharacterRange>(2, zone());
407 if (!ParsePropertyClass(ranges, p == 'P')) { 406 if (!ParsePropertyClass(ranges, p == 'P')) {
408 return ReportError(CStrVector("Invalid property name")); 407 return ReportError(CStrVector("Invalid property name"));
409 } 408 }
410 RegExpCharacterClass* cc = 409 RegExpCharacterClass* cc =
411 new (zone()) RegExpCharacterClass(ranges, false); 410 new (zone()) RegExpCharacterClass(ranges);
412 builder->AddCharacterClass(cc); 411 builder->AddCharacterClass(cc);
413 } else { 412 } else {
414 // With /u, no identity escapes except for syntax characters 413 // With /u, no identity escapes except for syntax characters
415 // are allowed. Otherwise, all identity escapes are allowed. 414 // are allowed. Otherwise, all identity escapes are allowed.
416 return ReportError(CStrVector("Invalid escape")); 415 return ReportError(CStrVector("Invalid escape"));
417 } 416 }
418 } else { 417 } else {
419 builder->AddCharacter(p); 418 builder->AddCharacter(p);
420 } 419 }
421 break; 420 break;
(...skipping 1119 matching lines...) Expand 10 before | Expand all | Expand 10 after
1541 } 1540 }
1542 } 1541 }
1543 if (!has_more()) { 1542 if (!has_more()) {
1544 return ReportError(CStrVector(kUnterminated)); 1543 return ReportError(CStrVector(kUnterminated));
1545 } 1544 }
1546 Advance(); 1545 Advance();
1547 if (ranges->length() == 0) { 1546 if (ranges->length() == 0) {
1548 ranges->Add(CharacterRange::Everything(), zone()); 1547 ranges->Add(CharacterRange::Everything(), zone());
1549 is_negated = !is_negated; 1548 is_negated = !is_negated;
1550 } 1549 }
1551 return new (zone()) RegExpCharacterClass(ranges, is_negated); 1550 RegExpCharacterClass::Flags flags;
1551 if (is_negated) flags = RegExpCharacterClass::NEGATED;
1552 return new (zone()) RegExpCharacterClass(ranges, flags);
1552 } 1553 }
1553 1554
1554 1555
1555 #undef CHECK_FAILED 1556 #undef CHECK_FAILED
1556 1557
1557 1558
1558 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, 1559 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,
1559 FlatStringReader* input, JSRegExp::Flags flags, 1560 FlatStringReader* input, JSRegExp::Flags flags,
1560 RegExpCompileData* result) { 1561 RegExpCompileData* result) {
1561 DCHECK(result != NULL); 1562 DCHECK(result != NULL);
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after
1715 // With /u, character class needs to be desugared, so it 1716 // With /u, character class needs to be desugared, so it
1716 // must be a standalone term instead of being part of a RegExpText. 1717 // must be a standalone term instead of being part of a RegExpText.
1717 AddTerm(cc); 1718 AddTerm(cc);
1718 } else { 1719 } else {
1719 AddAtom(cc); 1720 AddAtom(cc);
1720 } 1721 }
1721 } 1722 }
1722 1723
1723 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) { 1724 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {
1724 AddTerm(new (zone()) RegExpCharacterClass( 1725 AddTerm(new (zone()) RegExpCharacterClass(
1725 CharacterRange::List(zone(), CharacterRange::Singleton(c)), false)); 1726 CharacterRange::List(zone(), CharacterRange::Singleton(c))));
1726 } 1727 }
1727 1728
1728 1729
1729 void RegExpBuilder::AddAtom(RegExpTree* term) { 1730 void RegExpBuilder::AddAtom(RegExpTree* term) {
1730 if (term->IsEmpty()) { 1731 if (term->IsEmpty()) {
1731 AddEmpty(); 1732 AddEmpty();
1732 return; 1733 return;
1733 } 1734 }
1734 if (term->IsTextElement()) { 1735 if (term->IsTextElement()) {
1735 FlushCharacters(); 1736 FlushCharacters();
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after
1870 return false; 1871 return false;
1871 } 1872 }
1872 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1873 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1873 zone()); 1874 zone());
1874 LAST(ADD_TERM); 1875 LAST(ADD_TERM);
1875 return true; 1876 return true;
1876 } 1877 }
1877 1878
1878 } // namespace internal 1879 } // namespace internal
1879 } // namespace v8 1880 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp/regexp-ast.h ('k') | test/mjsunit/regress/regress-641091.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698