src/regexp/regexp-parser.cc - Issue 1645573002: [regexp] restrict pattern syntax for unicode mode.

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 1645573002: [regexp] restrict pattern syntax for unicode mode. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@stage

Patch Set: allow forward slash as identity escape Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/regexp/regexp-parser.h"	5 #include "src/regexp/regexp-parser.h"

6	6

7 #include "src/char-predicates-inl.h"	7 #include "src/char-predicates-inl.h"

8 #include "src/factory.h"	8 #include "src/factory.h"

9 #include "src/isolate.h"	9 #include "src/isolate.h"

10 #include "src/objects-inl.h"	10 #include "src/objects-inl.h"

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
95	95

96	96

97 void RegExpParser::Advance(int dist) {	97 void RegExpParser::Advance(int dist) {

98 next_pos_ += dist - 1;	98 next_pos_ += dist - 1;

99 Advance();	99 Advance();

100 }	100 }

101	101

102	102

103 bool RegExpParser::simple() { return simple_; }	103 bool RegExpParser::simple() { return simple_; }

104	104

105	105 bool RegExpParser::IsSyntaxCharacterOrSlash(uc32 c) {

106 bool RegExpParser::IsSyntaxCharacter(uc32 c) {	106 switch (c) {

107 return c == '^' \|\| c == '$' \|\| c == '\\' \|\| c == '.' \|\| c == '*' \|\|	107 case '^':

108 c == '+' \|\| c == '?' \|\| c == '(' \|\| c == ')' \|\| c == '[' \|\| c == ']' \|\|	108 case '$':

109 c == '{' \|\| c == '}' \|\| c == '\|';	109 case '\\':

	110 case '.':

	111 case '*':

	112 case '+':

	113 case '?':

	114 case '(':

	115 case ')':

	116 case '[':

	117 case ']':

	118 case '{':

	119 case '}':

	120 case '\|':

	121 case '/':

	122 return true;

	123 default:

	124 break;

	125 }

	126 return false;

110 }	127 }

111	128

112	129

113 RegExpTree* RegExpParser::ReportError(Vector<const char> message) {	130 RegExpTree* RegExpParser::ReportError(Vector<const char> message) {

114 failed_ = true;	131 failed_ = true;

115 *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked();	132 *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked();

116 // Zip to the end to make sure the no more input is read.	133 // Zip to the end to make sure the no more input is read.

117 current_ = kEndMarker;	134 current_ = kEndMarker;

118 next_pos_ = in()->length();	135 next_pos_ = in()->length();

119 return NULL;	136 return NULL;

(...skipping 233 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
353 // the capture registers of the referenced capture are either	370 // the capture registers of the referenced capture are either

354 // both set or both cleared.	371 // both set or both cleared.

355 builder->AddEmpty();	372 builder->AddEmpty();

356 } else {	373 } else {

357 RegExpCapture* capture = GetCapture(index);	374 RegExpCapture* capture = GetCapture(index);

358 RegExpTree* atom = new (zone()) RegExpBackReference(capture);	375 RegExpTree* atom = new (zone()) RegExpBackReference(capture);

359 builder->AddAtom(atom);	376 builder->AddAtom(atom);

360 }	377 }

361 break;	378 break;

362 }	379 }

	380 // With /u, no identity escapes except for syntax characters

	381 // are allowed. Otherwise, all identity escapes are allowed.

	382 if (unicode()) {

	383 return ReportError(CStrVector("Invalid escape"));

	384 }

363 uc32 first_digit = Next();	385 uc32 first_digit = Next();

364 if (first_digit == '8' \|\| first_digit == '9') {	386 if (first_digit == '8' \|\| first_digit == '9') {

365 // If the 'u' flag is present, only syntax characters can be	387 builder->AddCharacter(first_digit);

366 // escaped,	388 Advance(2);

367 // no other identity escapes are allowed. If the 'u' flag is not

368 // present, all identity escapes are allowed.

369 if (!unicode()) {

370 builder->AddCharacter(first_digit);

371 Advance(2);

372 } else {

373 return ReportError(CStrVector("Invalid escape"));

374 }

375 break;	389 break;

376 }	390 }

377 }	391 }

378 // FALLTHROUGH	392 // FALLTHROUGH

379 case '0': {	393 case '0': {

380 Advance();	394 Advance();

	395 if (unicode() && Next() >= '0' && Next() <= '9') {

	396 // With /u, decimal escape with leading 0 are not parsed as octal.

	397 return ReportError(CStrVector("Invalid decimal escape"));

	398 }

381 uc32 octal = ParseOctalLiteral();	399 uc32 octal = ParseOctalLiteral();

382 builder->AddCharacter(octal);	400 builder->AddCharacter(octal);

383 break;	401 break;

384 }	402 }

385 // ControlEscape :: one of	403 // ControlEscape :: one of

386 // f n r t v	404 // f n r t v

387 case 'f':	405 case 'f':

388 Advance(2);	406 Advance(2);

389 builder->AddCharacter('\f');	407 builder->AddCharacter('\f');

390 break;	408 break;

(...skipping 17 matching lines...) Expand all Loading...
408 Advance();	426 Advance();

409 uc32 controlLetter = Next();	427 uc32 controlLetter = Next();

410 // Special case if it is an ASCII letter.	428 // Special case if it is an ASCII letter.

411 // Convert lower case letters to uppercase.	429 // Convert lower case letters to uppercase.

412 uc32 letter = controlLetter & ~('a' ^ 'A');	430 uc32 letter = controlLetter & ~('a' ^ 'A');

413 if (letter < 'A' \|\| 'Z' < letter) {	431 if (letter < 'A' \|\| 'Z' < letter) {

414 // controlLetter is not in range 'A'-'Z' or 'a'-'z'.	432 // controlLetter is not in range 'A'-'Z' or 'a'-'z'.

415 // This is outside the specification. We match JSC in	433 // This is outside the specification. We match JSC in

416 // reading the backslash as a literal character instead	434 // reading the backslash as a literal character instead

417 // of as starting an escape.	435 // of as starting an escape.

	436 if (unicode()) {

	437 // With /u, invalid escapes are not treated as identity escapes.

	438 return ReportError(CStrVector("Invalid unicode escape"));

	439 }

418 builder->AddCharacter('\\');	440 builder->AddCharacter('\\');

419 } else {	441 } else {

420 Advance(2);	442 Advance(2);

421 builder->AddCharacter(controlLetter & 0x1f);	443 builder->AddCharacter(controlLetter & 0x1f);

422 }	444 }

423 break;	445 break;

424 }	446 }

425 case 'x': {	447 case 'x': {

426 Advance(2);	448 Advance(2);

427 uc32 value;	449 uc32 value;

428 if (ParseHexEscape(2, &value)) {	450 if (ParseHexEscape(2, &value)) {

429 builder->AddCharacter(value);	451 builder->AddCharacter(value);

430 } else if (!unicode()) {	452 } else if (!unicode()) {

431 builder->AddCharacter('x');	453 builder->AddCharacter('x');

432 } else {	454 } else {

433 // If the 'u' flag is present, invalid escapes are not treated as	455 // With /u, invalid escapes are not treated as identity escapes.

434 // identity escapes.

435 return ReportError(CStrVector("Invalid escape"));	456 return ReportError(CStrVector("Invalid escape"));

436 }	457 }

437 break;	458 break;

438 }	459 }

439 case 'u': {	460 case 'u': {

440 Advance(2);	461 Advance(2);

441 uc32 value;	462 uc32 value;

442 if (ParseUnicodeEscape(&value)) {	463 if (ParseUnicodeEscape(&value)) {

443 builder->AddUnicodeCharacter(value);	464 builder->AddUnicodeCharacter(value);

444 } else if (!unicode()) {	465 } else if (!unicode()) {

445 builder->AddCharacter('u');	466 builder->AddCharacter('u');

446 } else {	467 } else {

447 // If the 'u' flag is present, invalid escapes are not treated as	468 // With /u, invalid escapes are not treated as identity escapes.

448 // identity escapes.

449 return ReportError(CStrVector("Invalid unicode escape"));	469 return ReportError(CStrVector("Invalid unicode escape"));

450 }	470 }

451 break;	471 break;

452 }	472 }

453 default:	473 default:

454 Advance();	474 Advance();

455 // If the 'u' flag is present, only syntax characters can be	475 // With /u, no identity escapes except for syntax characters

456 // escaped, no	476 // are allowed. Otherwise, all identity escapes are allowed.

457 // other identity escapes are allowed. If the 'u' flag is not	477 if (!unicode() \|\| IsSyntaxCharacterOrSlash(current())) {

458 // present,

459 // all identity escapes are allowed.

460 if (!unicode() \|\| IsSyntaxCharacter(current())) {

461 builder->AddCharacter(current());	478 builder->AddCharacter(current());

462 Advance();	479 Advance();

463 } else {	480 } else {

464 return ReportError(CStrVector("Invalid escape"));	481 return ReportError(CStrVector("Invalid escape"));

465 }	482 }

466 break;	483 break;

467 }	484 }

468 break;	485 break;

469 case '{': {	486 case '{': {

470 int dummy;	487 int dummy;

471 if (ParseIntervalQuantifier(&dummy, &dummy)) {	488 if (ParseIntervalQuantifier(&dummy, &dummy)) {

472 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);	489 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);

473 }	490 }

474 // fallthrough	491 // fallthrough

475 }	492 }

	493 case '}':

	494 case ']':

	495 if (unicode()) {

	496 ReportError(CStrVector("Lone quantifier brackets") CHECK_FAILED);
	vogelheim 2016/01/28 13:38:21 I don't get the point of the ReportError(... CHECK I don't get the point of the ReportError(... CHECK_FAILED) construct. As I read it, ReportError set failed_ = true; and returns NULL, and CHECK_FAILED will test for failed_ and return NULL, too. Wouldn't return ReportError(...) do the same thing, except somewhat less unorthodox? Yang 2016/01/28 14:01:07 Done. Show quoted text On 2016/01/28 13:38:21, vogelheim wrote: > I don't get the point of the ReportError(... CHECK_FAILED) construct. > > As I read it, ReportError set failed_ = true; and returns NULL, and CHECK_FAILED > will test for failed_ and return NULL, too. Wouldn't return ReportError(...) do > the same thing, except somewhat less unorthodox? Done.
	497 }

	498 // fallthrough

476 default:	499 default:

477 builder->AddUnicodeCharacter(current());	500 builder->AddUnicodeCharacter(current());

478 Advance();	501 Advance();

479 break;	502 break;

480 } // end switch(current())	503 } // end switch(current())

481	504

482 int min;	505 int min;

483 int max;	506 int max;

484 switch (current()) {	507 switch (current()) {

485 // QuantifierPrefix ::	508 // QuantifierPrefix ::

(...skipping 12 matching lines...) Expand all Loading...
498 Advance();	521 Advance();

499 break;	522 break;

500 case '?':	523 case '?':

501 min = 0;	524 min = 0;

502 max = 1;	525 max = 1;

503 Advance();	526 Advance();

504 break;	527 break;

505 case '{':	528 case '{':

506 if (ParseIntervalQuantifier(&min, &max)) {	529 if (ParseIntervalQuantifier(&min, &max)) {

507 if (max < min) {	530 if (max < min) {

508 ReportError(CStrVector("numbers out of order in {} quantifier.")	531 ReportError(CStrVector("numbers out of order in {} quantifier")

509 CHECK_FAILED);	532 CHECK_FAILED);

510 }	533 }

511 break;	534 break;

512 } else {	535 } else {

	536 if (unicode()) {

	537 // With /u, incomplete quantifiers are not allowed.

	538 ReportError(CStrVector("Incomplete quantifier") CHECK_FAILED);

	539 }

513 continue;	540 continue;

514 }	541 }

515 default:	542 default:

516 continue;	543 continue;

517 }	544 }

518 RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY;	545 RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY;

519 if (current() == '?') {	546 if (current() == '?') {

520 quantifier_type = RegExpQuantifier::NON_GREEDY;	547 quantifier_type = RegExpQuantifier::NON_GREEDY;

521 Advance();	548 Advance();

522 } else if (FLAG_regexp_possessive_quantifier && current() == '+') {	549 } else if (FLAG_regexp_possessive_quantifier && current() == '+') {

523 // FLAG_regexp_possessive_quantifier is a debug-only flag.	550 // FLAG_regexp_possessive_quantifier is a debug-only flag.

524 quantifier_type = RegExpQuantifier::POSSESSIVE;	551 quantifier_type = RegExpQuantifier::POSSESSIVE;

525 Advance();	552 Advance();

526 }	553 }

527 builder->AddQuantifierToAtom(min, max, quantifier_type);	554 if (!builder->AddQuantifierToAtom(min, max, quantifier_type)) {

	555 ReportError(CStrVector("Invalid quantifier") CHECK_FAILED);

	556 }

528 }	557 }

529 }	558 }

530	559

531	560

532 #ifdef DEBUG	561 #ifdef DEBUG

533 // Currently only used in an DCHECK.	562 // Currently only used in an DCHECK.

534 static bool IsSpecialClassEscape(uc32 c) {	563 static bool IsSpecialClassEscape(uc32 c) {

535 switch (c) {	564 switch (c) {

536 case 'd':	565 case 'd':

537 case 'D':	566 case 'D':

(...skipping 277 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
815 return '\r';	844 return '\r';

816 case 't':	845 case 't':

817 Advance();	846 Advance();

818 return '\t';	847 return '\t';

819 case 'v':	848 case 'v':

820 Advance();	849 Advance();

821 return '\v';	850 return '\v';

822 case 'c': {	851 case 'c': {

823 uc32 controlLetter = Next();	852 uc32 controlLetter = Next();

824 uc32 letter = controlLetter & ~('A' ^ 'a');	853 uc32 letter = controlLetter & ~('A' ^ 'a');

825 // For compatibility with JSC, inside a character class	854 // For compatibility with JSC, inside a character class. We also accept

826 // we also accept digits and underscore as control characters.	855 // digits and underscore as control characters, unless with /u.

827 if ((controlLetter >= '0' && controlLetter <= '9') \|\|	856 if (letter >= 'A' && letter <= 'Z') {

828 controlLetter == '_' \|\| (letter >= 'A' && letter <= 'Z')) {

829 Advance(2);	857 Advance(2);

830 // Control letters mapped to ASCII control characters in the range	858 // Control letters mapped to ASCII control characters in the range

831 // 0x00-0x1f.	859 // 0x00-0x1f.

832 return controlLetter & 0x1f;	860 return controlLetter & 0x1f;

833 }	861 }

	862 if (unicode()) {

	863 // With /u, invalid escapes are not treated as identity escapes.

	864 ReportError(CStrVector("Invalid class escape"));

	865 return 0;

	866 }

	867 if ((controlLetter >= '0' && controlLetter <= '9') \|\|

	868 controlLetter == '_') {

	869 Advance(2);

	870 return controlLetter & 0x1f;

	871 }

834 // We match JSC in reading the backslash as a literal	872 // We match JSC in reading the backslash as a literal

835 // character instead of as starting an escape.	873 // character instead of as starting an escape.

836 return '\\';	874 return '\\';

837 }	875 }

838 case '0':	876 case '0':

839 case '1':	877 case '1':

840 case '2':	878 case '2':

841 case '3':	879 case '3':

842 case '4':	880 case '4':

843 case '5':	881 case '5':

844 case '6':	882 case '6':

845 case '7':	883 case '7':

846 // For compatibility, we interpret a decimal escape that isn't	884 // For compatibility, we interpret a decimal escape that isn't

847 // a back reference (and therefore either \0 or not valid according	885 // a back reference (and therefore either \0 or not valid according

848 // to the specification) as a 1..3 digit octal character code.	886 // to the specification) as a 1..3 digit octal character code.

	887 if (unicode()) {

	888 // With /u, decimal escape is not interpreted as octal character code.

	889 ReportError(CStrVector("Invalid class escape"));

	890 return 0;

	891 }

849 return ParseOctalLiteral();	892 return ParseOctalLiteral();

850 case 'x': {	893 case 'x': {

851 Advance();	894 Advance();

852 uc32 value;	895 uc32 value;

853 if (ParseHexEscape(2, &value)) {	896 if (ParseHexEscape(2, &value)) return value;

854 return value;	897 if (unicode()) {

	898 // With /u, invalid escapes are not treated as identity escapes.

	899 ReportError(CStrVector("Invalid escape"));

	900 return 0;

855 }	901 }

856 if (!unicode()) {	902 // If \x is not followed by a two-digit hexadecimal, treat it

857 // If \x is not followed by a two-digit hexadecimal, treat it	903 // as an identity escape.

858 // as an identity escape.	904 return 'x';

859 return 'x';

860 }

861 // If the 'u' flag is present, invalid escapes are not treated as

862 // identity escapes.

863 ReportError(CStrVector("Invalid escape"));

864 return 0;

865 }	905 }

866 case 'u': {	906 case 'u': {

867 Advance();	907 Advance();

868 uc32 value;	908 uc32 value;

869 if (ParseUnicodeEscape(&value)) {	909 if (ParseUnicodeEscape(&value)) return value;

870 return value;	910 if (unicode()) {

	911 // With /u, invalid escapes are not treated as identity escapes.

	912 ReportError(CStrVector("Invalid unicode escape"));

	913 return 0;

871 }	914 }

872 if (!unicode()) {	915 // If \u is not followed by a two-digit hexadecimal, treat it

873 return 'u';	916 // as an identity escape.

874 }	917 return 'u';

875 // If the 'u' flag is present, invalid escapes are not treated as

876 // identity escapes.

877 ReportError(CStrVector("Invalid unicode escape"));

878 return 0;

879 }	918 }

880 default: {	919 default: {

881 uc32 result = current();	920 uc32 result = current();

882 // If the 'u' flag is present, only syntax characters can be escaped, no	921 // With /u, no identity escapes except for syntax characters are

883 // other identity escapes are allowed. If the 'u' flag is not present, all	922 // allowed. Otherwise, all identity escapes are allowed.

884 // identity escapes are allowed.	923 if (!unicode() \|\| IsSyntaxCharacterOrSlash(result)) {

885 if (!unicode() \|\| IsSyntaxCharacter(result)) {

886 Advance();	924 Advance();

887 return result;	925 return result;

888 }	926 }

889 ReportError(CStrVector("Invalid escape"));	927 ReportError(CStrVector("Invalid escape"));

890 return 0;	928 return 0;

891 }	929 }

892 }	930 }

893 return 0;	931 return 0;

894 }	932 }

895	933

(...skipping 53 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
949 if (char_class != kNoCharClass) {	987 if (char_class != kNoCharClass) {

950 CharacterRange::AddClassEscape(char_class, ranges, zone);	988 CharacterRange::AddClassEscape(char_class, ranges, zone);

951 } else {	989 } else {

952 ranges->Add(range, zone);	990 ranges->Add(range, zone);

953 }	991 }

954 }	992 }

955	993

956	994

957 RegExpTree* RegExpParser::ParseCharacterClass() {	995 RegExpTree* RegExpParser::ParseCharacterClass() {

958 static const char* kUnterminated = "Unterminated character class";	996 static const char* kUnterminated = "Unterminated character class";

	997 static const char* kRangeInvalid = "Invalid character class";

959 static const char* kRangeOutOfOrder = "Range out of order in character class";	998 static const char* kRangeOutOfOrder = "Range out of order in character class";

960	999

961 DCHECK_EQ(current(), '[');	1000 DCHECK_EQ(current(), '[');

962 Advance();	1001 Advance();

963 bool is_negated = false;	1002 bool is_negated = false;

964 if (current() == '^') {	1003 if (current() == '^') {

965 is_negated = true;	1004 is_negated = true;

966 Advance();	1005 Advance();

967 }	1006 }

968 ZoneList<CharacterRange>* ranges =	1007 ZoneList<CharacterRange>* ranges =

969 new (zone()) ZoneList<CharacterRange>(2, zone());	1008 new (zone()) ZoneList<CharacterRange>(2, zone());

970 while (has_more() && current() != ']') {	1009 while (has_more() && current() != ']') {

971 uc16 char_class = kNoCharClass;	1010 uc16 char_class = kNoCharClass;

972 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);	1011 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);

973 if (current() == '-') {	1012 if (current() == '-') {

974 Advance();	1013 Advance();

975 if (current() == kEndMarker) {	1014 if (current() == kEndMarker) {

976 // If we reach the end we break out of the loop and let the	1015 // If we reach the end we break out of the loop and let the

977 // following code report an error.	1016 // following code report an error.

978 break;	1017 break;

979 } else if (current() == ']') {	1018 } else if (current() == ']') {

980 AddRangeOrEscape(ranges, char_class, first, zone());	1019 AddRangeOrEscape(ranges, char_class, first, zone());

981 ranges->Add(CharacterRange::Singleton('-'), zone());	1020 ranges->Add(CharacterRange::Singleton('-'), zone());

982 break;	1021 break;

983 }	1022 }

984 uc16 char_class_2 = kNoCharClass;	1023 uc16 char_class_2 = kNoCharClass;

985 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);	1024 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);

986 if (char_class != kNoCharClass \|\| char_class_2 != kNoCharClass) {	1025 if (char_class != kNoCharClass \|\| char_class_2 != kNoCharClass) {

987 // Either end is an escaped character class. Treat the '-' verbatim.	1026 // Either end is an escaped character class. Treat the '-' verbatim.

	1027 if (unicode()) {

	1028 // ES2015 21.2.2.15.1 step 1.

	1029 return ReportError(CStrVector(kRangeInvalid) CHECK_FAILED);
	vogelheim 2016/01/28 13:38:21 CHECK_FAILED adds a return, after the unconditiona CHECK_FAILED adds a return, after the unconditional return in this line. Yang 2016/01/28 14:01:07 You are completely right. I simply copied the code Show quoted text On 2016/01/28 13:38:21, vogelheim wrote: > CHECK_FAILED adds a return, after the unconditional return in this line. You are completely right. I simply copied the code from other places where we call ReportError. Fixed all of those places now.
	1030 }

988 AddRangeOrEscape(ranges, char_class, first, zone());	1031 AddRangeOrEscape(ranges, char_class, first, zone());

989 ranges->Add(CharacterRange::Singleton('-'), zone());	1032 ranges->Add(CharacterRange::Singleton('-'), zone());

990 AddRangeOrEscape(ranges, char_class_2, next, zone());	1033 AddRangeOrEscape(ranges, char_class_2, next, zone());

991 continue;	1034 continue;

992 }	1035 }

	1036 // ES2015 21.2.2.15.1 step 6.

993 if (first.from() > next.to()) {	1037 if (first.from() > next.to()) {

994 return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED);	1038 return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED);

995 }	1039 }

996 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());	1040 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());

997 } else {	1041 } else {

998 AddRangeOrEscape(ranges, char_class, first, zone());	1042 AddRangeOrEscape(ranges, char_class, first, zone());

999 }	1043 }

1000 }	1044 }

1001 if (!has_more()) {	1045 if (!has_more()) {

1002 return ReportError(CStrVector(kUnterminated) CHECK_FAILED);	1046 return ReportError(CStrVector(kUnterminated) CHECK_FAILED);

(...skipping 152 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1155 AddCharacter(static_cast<uc16>(c));	1199 AddCharacter(static_cast<uc16>(c));

1156 }	1200 }

1157 }	1201 }

1158	1202

1159	1203

1160 void RegExpBuilder::AddEmpty() { pending_empty_ = true; }	1204 void RegExpBuilder::AddEmpty() { pending_empty_ = true; }

1161	1205

1162	1206

1163 void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {	1207 void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {

1164 if (NeedsDesugaringForUnicode(cc)) {	1208 if (NeedsDesugaringForUnicode(cc)) {

1165 // In unicode mode, character class needs to be desugared, so it	1209 // With /u, character class needs to be desugared, so it

1166 // must be a standalone term instead of being part of a RegExpText.	1210 // must be a standalone term instead of being part of a RegExpText.

1167 AddTerm(cc);	1211 AddTerm(cc);

1168 } else {	1212 } else {

1169 AddAtom(cc);	1213 AddAtom(cc);

1170 }	1214 }

1171 }	1215 }

1172	1216

1173 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {	1217 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {

1174 AddTerm(new (zone()) RegExpCharacterClass(	1218 AddTerm(new (zone()) RegExpCharacterClass(

1175 CharacterRange::List(zone(), CharacterRange::Singleton(c)), false));	1219 CharacterRange::List(zone(), CharacterRange::Singleton(c)), false));

(...skipping 92 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1268	1312

1269	1313

1270 RegExpTree* RegExpBuilder::ToRegExp() {	1314 RegExpTree* RegExpBuilder::ToRegExp() {

1271 FlushTerms();	1315 FlushTerms();

1272 int num_alternatives = alternatives_.length();	1316 int num_alternatives = alternatives_.length();

1273 if (num_alternatives == 0) return new (zone()) RegExpEmpty();	1317 if (num_alternatives == 0) return new (zone()) RegExpEmpty();

1274 if (num_alternatives == 1) return alternatives_.last();	1318 if (num_alternatives == 1) return alternatives_.last();

1275 return new (zone()) RegExpDisjunction(alternatives_.GetList(zone()));	1319 return new (zone()) RegExpDisjunction(alternatives_.GetList(zone()));

1276 }	1320 }

1277	1321

1278	1322 bool RegExpBuilder::AddQuantifierToAtom(

1279 void RegExpBuilder::AddQuantifierToAtom(

1280 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {	1323 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {

1281 FlushPendingSurrogate();	1324 FlushPendingSurrogate();

1282 if (pending_empty_) {	1325 if (pending_empty_) {

1283 pending_empty_ = false;	1326 pending_empty_ = false;

1284 return;	1327 return true;

1285 }	1328 }

1286 RegExpTree* atom;	1329 RegExpTree* atom;

1287 if (characters_ != NULL) {	1330 if (characters_ != NULL) {

1288 DCHECK(last_added_ == ADD_CHAR);	1331 DCHECK(last_added_ == ADD_CHAR);

1289 // Last atom was character.	1332 // Last atom was character.

1290 Vector<const uc16> char_vector = characters_->ToConstVector();	1333 Vector<const uc16> char_vector = characters_->ToConstVector();

1291 int num_chars = char_vector.length();	1334 int num_chars = char_vector.length();

1292 if (num_chars > 1) {	1335 if (num_chars > 1) {

1293 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);	1336 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);

1294 text_.Add(new (zone()) RegExpAtom(prefix), zone());	1337 text_.Add(new (zone()) RegExpAtom(prefix), zone());

1295 char_vector = char_vector.SubVector(num_chars - 1, num_chars);	1338 char_vector = char_vector.SubVector(num_chars - 1, num_chars);

1296 }	1339 }

1297 characters_ = NULL;	1340 characters_ = NULL;

1298 atom = new (zone()) RegExpAtom(char_vector);	1341 atom = new (zone()) RegExpAtom(char_vector);

1299 FlushText();	1342 FlushText();

1300 } else if (text_.length() > 0) {	1343 } else if (text_.length() > 0) {

1301 DCHECK(last_added_ == ADD_ATOM);	1344 DCHECK(last_added_ == ADD_ATOM);

1302 atom = text_.RemoveLast();	1345 atom = text_.RemoveLast();

1303 FlushText();	1346 FlushText();

1304 } else if (terms_.length() > 0) {	1347 } else if (terms_.length() > 0) {

1305 DCHECK(last_added_ == ADD_ATOM);	1348 DCHECK(last_added_ == ADD_ATOM);

1306 atom = terms_.RemoveLast();	1349 atom = terms_.RemoveLast();

	1350 // With /u, lookarounds are not quantifiable.

	1351 if (unicode() && atom->IsLookaround()) return false;

1307 if (atom->max_match() == 0) {	1352 if (atom->max_match() == 0) {

1308 // Guaranteed to only match an empty string.	1353 // Guaranteed to only match an empty string.

1309 LAST(ADD_TERM);	1354 LAST(ADD_TERM);

1310 if (min == 0) {	1355 if (min == 0) {

1311 return;	1356 return true;

1312 }	1357 }

1313 terms_.Add(atom, zone());	1358 terms_.Add(atom, zone());

1314 return;	1359 return true;

1315 }	1360 }

1316 } else {	1361 } else {

1317 // Only call immediately after adding an atom or character!	1362 // Only call immediately after adding an atom or character!

1318 UNREACHABLE();	1363 UNREACHABLE();

1319 return;	1364 return false;

1320 }	1365 }

1321 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),	1366 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),

1322 zone());	1367 zone());

1323 LAST(ADD_TERM);	1368 LAST(ADD_TERM);

	1369 return true;

1324 }	1370 }

1325	1371

1326 } // namespace internal	1372 } // namespace internal

1327 } // namespace v8	1373 } // namespace v8

OLD	NEW

« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/mjsunit/harmony/unicode-regexp-restricted-syntax.js » ('j') | no next file with comments »