src/regexp/regexp-parser.cc - Issue 1648673002: Revert of [regexp] restrict pattern syntax for unicode mode.

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 1648673002: Revert of [regexp] restrict pattern syntax for unicode mode. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@stage

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/regexp/regexp-parser.h"	5 #include "src/regexp/regexp-parser.h"

6	6

7 #include "src/char-predicates-inl.h"	7 #include "src/char-predicates-inl.h"

8 #include "src/factory.h"	8 #include "src/factory.h"

9 #include "src/isolate.h"	9 #include "src/isolate.h"

10 #include "src/objects-inl.h"	10 #include "src/objects-inl.h"

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
95	95

96	96

97 void RegExpParser::Advance(int dist) {	97 void RegExpParser::Advance(int dist) {

98 next_pos_ += dist - 1;	98 next_pos_ += dist - 1;

99 Advance();	99 Advance();

100 }	100 }

101	101

102	102

103 bool RegExpParser::simple() { return simple_; }	103 bool RegExpParser::simple() { return simple_; }

104	104

105 bool RegExpParser::IsSyntaxCharacterOrSlash(uc32 c) {	105

106 switch (c) {	106 bool RegExpParser::IsSyntaxCharacter(uc32 c) {

107 case '^':	107 return c == '^' \|\| c == '$' \|\| c == '\\' \|\| c == '.' \|\| c == '*' \|\|

108 case '$':	108 c == '+' \|\| c == '?' \|\| c == '(' \|\| c == ')' \|\| c == '[' \|\| c == ']' \|\|

109 case '\\':	109 c == '{' \|\| c == '}' \|\| c == '\|';

110 case '.':

111 case '*':

112 case '+':

113 case '?':

114 case '(':

115 case ')':

116 case '[':

117 case ']':

118 case '{':

119 case '}':

120 case '\|':

121 case '/':

122 return true;

123 default:

124 break;

125 }

126 return false;

127 }	110 }

128	111

129	112

130 RegExpTree* RegExpParser::ReportError(Vector<const char> message) {	113 RegExpTree* RegExpParser::ReportError(Vector<const char> message) {

131 failed_ = true;	114 failed_ = true;

132 *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked();	115 *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked();

133 // Zip to the end to make sure the no more input is read.	116 // Zip to the end to make sure the no more input is read.

134 current_ = kEndMarker;	117 current_ = kEndMarker;

135 next_pos_ = in()->length();	118 next_pos_ = in()->length();

136 return NULL;	119 return NULL;

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
171 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,	154 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,

172 flags_, zone());	155 flags_, zone());

173 RegExpParserState* state = &initial_state;	156 RegExpParserState* state = &initial_state;

174 // Cache the builder in a local variable for quick access.	157 // Cache the builder in a local variable for quick access.

175 RegExpBuilder* builder = initial_state.builder();	158 RegExpBuilder* builder = initial_state.builder();

176 while (true) {	159 while (true) {

177 switch (current()) {	160 switch (current()) {

178 case kEndMarker:	161 case kEndMarker:

179 if (state->IsSubexpression()) {	162 if (state->IsSubexpression()) {

180 // Inside a parenthesized group when hitting end of input.	163 // Inside a parenthesized group when hitting end of input.

181 return ReportError(CStrVector("Unterminated group"));	164 ReportError(CStrVector("Unterminated group") CHECK_FAILED);

182 }	165 }

183 DCHECK_EQ(INITIAL, state->group_type());	166 DCHECK_EQ(INITIAL, state->group_type());

184 // Parsing completed successfully.	167 // Parsing completed successfully.

185 return builder->ToRegExp();	168 return builder->ToRegExp();

186 case ')': {	169 case ')': {

187 if (!state->IsSubexpression()) {	170 if (!state->IsSubexpression()) {

188 return ReportError(CStrVector("Unmatched ')'"));	171 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);

189 }	172 }

190 DCHECK_NE(INITIAL, state->group_type());	173 DCHECK_NE(INITIAL, state->group_type());

191	174

192 Advance();	175 Advance();

193 // End disjunction parsing and convert builder content to new single	176 // End disjunction parsing and convert builder content to new single

194 // regexp atom.	177 // regexp atom.

195 RegExpTree* body = builder->ToRegExp();	178 RegExpTree* body = builder->ToRegExp();

196	179

197 int end_capture_index = captures_started();	180 int end_capture_index = captures_started();

198	181

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
286 if (Next() == '=') {	269 if (Next() == '=') {

287 subexpr_type = POSITIVE_LOOKAROUND;	270 subexpr_type = POSITIVE_LOOKAROUND;

288 break;	271 break;

289 } else if (Next() == '!') {	272 } else if (Next() == '!') {

290 subexpr_type = NEGATIVE_LOOKAROUND;	273 subexpr_type = NEGATIVE_LOOKAROUND;

291 break;	274 break;

292 }	275 }

293 }	276 }

294 // Fall through.	277 // Fall through.

295 default:	278 default:

296 return ReportError(CStrVector("Invalid group"));	279 ReportError(CStrVector("Invalid group") CHECK_FAILED);

	280 break;

297 }	281 }

298 Advance(2);	282 Advance(2);

299 } else {	283 } else {

300 if (captures_started_ >= kMaxCaptures) {	284 if (captures_started_ >= kMaxCaptures) {

301 return ReportError(CStrVector("Too many captures"));	285 ReportError(CStrVector("Too many captures") CHECK_FAILED);

302 }	286 }

303 captures_started_++;	287 captures_started_++;

304 }	288 }

305 // Store current state and begin new disjunction parsing.	289 // Store current state and begin new disjunction parsing.

306 state =	290 state =

307 new (zone()) RegExpParserState(state, subexpr_type, lookaround_type,	291 new (zone()) RegExpParserState(state, subexpr_type, lookaround_type,

308 captures_started_, flags_, zone());	292 captures_started_, flags_, zone());

309 builder = state->builder();	293 builder = state->builder();

310 continue;	294 continue;

311 }	295 }

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
369 // the capture registers of the referenced capture are either	353 // the capture registers of the referenced capture are either

370 // both set or both cleared.	354 // both set or both cleared.

371 builder->AddEmpty();	355 builder->AddEmpty();

372 } else {	356 } else {

373 RegExpCapture* capture = GetCapture(index);	357 RegExpCapture* capture = GetCapture(index);

374 RegExpTree* atom = new (zone()) RegExpBackReference(capture);	358 RegExpTree* atom = new (zone()) RegExpBackReference(capture);

375 builder->AddAtom(atom);	359 builder->AddAtom(atom);

376 }	360 }

377 break;	361 break;

378 }	362 }

379 // With /u, no identity escapes except for syntax characters

380 // are allowed. Otherwise, all identity escapes are allowed.

381 if (unicode()) {

382 return ReportError(CStrVector("Invalid escape"));

383 }

384 uc32 first_digit = Next();	363 uc32 first_digit = Next();

385 if (first_digit == '8' \|\| first_digit == '9') {	364 if (first_digit == '8' \|\| first_digit == '9') {

386 builder->AddCharacter(first_digit);	365 // If the 'u' flag is present, only syntax characters can be

387 Advance(2);	366 // escaped,

	367 // no other identity escapes are allowed. If the 'u' flag is not

	368 // present, all identity escapes are allowed.

	369 if (!unicode()) {

	370 builder->AddCharacter(first_digit);

	371 Advance(2);

	372 } else {

	373 return ReportError(CStrVector("Invalid escape"));

	374 }

388 break;	375 break;

389 }	376 }

390 }	377 }

391 // FALLTHROUGH	378 // FALLTHROUGH

392 case '0': {	379 case '0': {

393 Advance();	380 Advance();

394 if (unicode() && Next() >= '0' && Next() <= '9') {

395 // With /u, decimal escape with leading 0 are not parsed as octal.

396 return ReportError(CStrVector("Invalid decimal escape"));

397 }

398 uc32 octal = ParseOctalLiteral();	381 uc32 octal = ParseOctalLiteral();

399 builder->AddCharacter(octal);	382 builder->AddCharacter(octal);

400 break;	383 break;

401 }	384 }

402 // ControlEscape :: one of	385 // ControlEscape :: one of

403 // f n r t v	386 // f n r t v

404 case 'f':	387 case 'f':

405 Advance(2);	388 Advance(2);

406 builder->AddCharacter('\f');	389 builder->AddCharacter('\f');

407 break;	390 break;

(...skipping 17 matching lines...) Expand all Loading...
425 Advance();	408 Advance();

426 uc32 controlLetter = Next();	409 uc32 controlLetter = Next();

427 // Special case if it is an ASCII letter.	410 // Special case if it is an ASCII letter.

428 // Convert lower case letters to uppercase.	411 // Convert lower case letters to uppercase.

429 uc32 letter = controlLetter & ~('a' ^ 'A');	412 uc32 letter = controlLetter & ~('a' ^ 'A');

430 if (letter < 'A' \|\| 'Z' < letter) {	413 if (letter < 'A' \|\| 'Z' < letter) {

431 // controlLetter is not in range 'A'-'Z' or 'a'-'z'.	414 // controlLetter is not in range 'A'-'Z' or 'a'-'z'.

432 // This is outside the specification. We match JSC in	415 // This is outside the specification. We match JSC in

433 // reading the backslash as a literal character instead	416 // reading the backslash as a literal character instead

434 // of as starting an escape.	417 // of as starting an escape.

435 if (unicode()) {

436 // With /u, invalid escapes are not treated as identity escapes.

437 return ReportError(CStrVector("Invalid unicode escape"));

438 }

439 builder->AddCharacter('\\');	418 builder->AddCharacter('\\');

440 } else {	419 } else {

441 Advance(2);	420 Advance(2);

442 builder->AddCharacter(controlLetter & 0x1f);	421 builder->AddCharacter(controlLetter & 0x1f);

443 }	422 }

444 break;	423 break;

445 }	424 }

446 case 'x': {	425 case 'x': {

447 Advance(2);	426 Advance(2);

448 uc32 value;	427 uc32 value;

449 if (ParseHexEscape(2, &value)) {	428 if (ParseHexEscape(2, &value)) {

450 builder->AddCharacter(value);	429 builder->AddCharacter(value);

451 } else if (!unicode()) {	430 } else if (!unicode()) {

452 builder->AddCharacter('x');	431 builder->AddCharacter('x');

453 } else {	432 } else {

454 // With /u, invalid escapes are not treated as identity escapes.	433 // If the 'u' flag is present, invalid escapes are not treated as

	434 // identity escapes.

455 return ReportError(CStrVector("Invalid escape"));	435 return ReportError(CStrVector("Invalid escape"));

456 }	436 }

457 break;	437 break;

458 }	438 }

459 case 'u': {	439 case 'u': {

460 Advance(2);	440 Advance(2);

461 uc32 value;	441 uc32 value;

462 if (ParseUnicodeEscape(&value)) {	442 if (ParseUnicodeEscape(&value)) {

463 builder->AddUnicodeCharacter(value);	443 builder->AddUnicodeCharacter(value);

464 } else if (!unicode()) {	444 } else if (!unicode()) {

465 builder->AddCharacter('u');	445 builder->AddCharacter('u');

466 } else {	446 } else {

467 // With /u, invalid escapes are not treated as identity escapes.	447 // If the 'u' flag is present, invalid escapes are not treated as

	448 // identity escapes.

468 return ReportError(CStrVector("Invalid unicode escape"));	449 return ReportError(CStrVector("Invalid unicode escape"));

469 }	450 }

470 break;	451 break;

471 }	452 }

472 default:	453 default:

473 Advance();	454 Advance();

474 // With /u, no identity escapes except for syntax characters	455 // If the 'u' flag is present, only syntax characters can be

475 // are allowed. Otherwise, all identity escapes are allowed.	456 // escaped, no

476 if (!unicode() \|\| IsSyntaxCharacterOrSlash(current())) {	457 // other identity escapes are allowed. If the 'u' flag is not

	458 // present,

	459 // all identity escapes are allowed.

	460 if (!unicode() \|\| IsSyntaxCharacter(current())) {

477 builder->AddCharacter(current());	461 builder->AddCharacter(current());

478 Advance();	462 Advance();

479 } else {	463 } else {

480 return ReportError(CStrVector("Invalid escape"));	464 return ReportError(CStrVector("Invalid escape"));

481 }	465 }

482 break;	466 break;

483 }	467 }

484 break;	468 break;

485 case '{': {	469 case '{': {

486 int dummy;	470 int dummy;

487 if (ParseIntervalQuantifier(&dummy, &dummy)) {	471 if (ParseIntervalQuantifier(&dummy, &dummy)) {

488 return ReportError(CStrVector("Nothing to repeat"));	472 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);

489 }	473 }

490 // fallthrough	474 // fallthrough

491 }	475 }

492 case '}':

493 case ']':

494 if (unicode()) {

495 return ReportError(CStrVector("Lone quantifier brackets"));

496 }

497 // fallthrough

498 default:	476 default:

499 builder->AddUnicodeCharacter(current());	477 builder->AddUnicodeCharacter(current());

500 Advance();	478 Advance();

501 break;	479 break;

502 } // end switch(current())	480 } // end switch(current())

503	481

504 int min;	482 int min;

505 int max;	483 int max;

506 switch (current()) {	484 switch (current()) {

507 // QuantifierPrefix ::	485 // QuantifierPrefix ::

(...skipping 12 matching lines...) Expand all Loading...
520 Advance();	498 Advance();

521 break;	499 break;

522 case '?':	500 case '?':

523 min = 0;	501 min = 0;

524 max = 1;	502 max = 1;

525 Advance();	503 Advance();

526 break;	504 break;

527 case '{':	505 case '{':

528 if (ParseIntervalQuantifier(&min, &max)) {	506 if (ParseIntervalQuantifier(&min, &max)) {

529 if (max < min) {	507 if (max < min) {

530 return ReportError(	508 ReportError(CStrVector("numbers out of order in {} quantifier.")

531 CStrVector("numbers out of order in {} quantifier"));	509 CHECK_FAILED);

532 }	510 }

533 break;	511 break;

534 } else if (unicode()) {	512 } else {

535 // With /u, incomplete quantifiers are not allowed.	513 continue;

536 return ReportError(CStrVector("Incomplete quantifier"));

537 }	514 }

538 continue;

539 default:	515 default:

540 continue;	516 continue;

541 }	517 }

542 RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY;	518 RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY;

543 if (current() == '?') {	519 if (current() == '?') {

544 quantifier_type = RegExpQuantifier::NON_GREEDY;	520 quantifier_type = RegExpQuantifier::NON_GREEDY;

545 Advance();	521 Advance();

546 } else if (FLAG_regexp_possessive_quantifier && current() == '+') {	522 } else if (FLAG_regexp_possessive_quantifier && current() == '+') {

547 // FLAG_regexp_possessive_quantifier is a debug-only flag.	523 // FLAG_regexp_possessive_quantifier is a debug-only flag.

548 quantifier_type = RegExpQuantifier::POSSESSIVE;	524 quantifier_type = RegExpQuantifier::POSSESSIVE;

549 Advance();	525 Advance();

550 }	526 }

551 if (!builder->AddQuantifierToAtom(min, max, quantifier_type)) {	527 builder->AddQuantifierToAtom(min, max, quantifier_type);

552 return ReportError(CStrVector("Invalid quantifier"));

553 }

554 }	528 }

555 }	529 }

556	530

557	531

558 #ifdef DEBUG	532 #ifdef DEBUG

559 // Currently only used in an DCHECK.	533 // Currently only used in an DCHECK.

560 static bool IsSpecialClassEscape(uc32 c) {	534 static bool IsSpecialClassEscape(uc32 c) {

561 switch (c) {	535 switch (c) {

562 case 'd':	536 case 'd':

563 case 'D':	537 case 'D':

(...skipping 277 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
841 return '\r';	815 return '\r';

842 case 't':	816 case 't':

843 Advance();	817 Advance();

844 return '\t';	818 return '\t';

845 case 'v':	819 case 'v':

846 Advance();	820 Advance();

847 return '\v';	821 return '\v';

848 case 'c': {	822 case 'c': {

849 uc32 controlLetter = Next();	823 uc32 controlLetter = Next();

850 uc32 letter = controlLetter & ~('A' ^ 'a');	824 uc32 letter = controlLetter & ~('A' ^ 'a');

851 // For compatibility with JSC, inside a character class. We also accept	825 // For compatibility with JSC, inside a character class

852 // digits and underscore as control characters, unless with /u.	826 // we also accept digits and underscore as control characters.

853 if (letter >= 'A' && letter <= 'Z') {	827 if ((controlLetter >= '0' && controlLetter <= '9') \|\|

	828 controlLetter == '_' \|\| (letter >= 'A' && letter <= 'Z')) {

854 Advance(2);	829 Advance(2);

855 // Control letters mapped to ASCII control characters in the range	830 // Control letters mapped to ASCII control characters in the range

856 // 0x00-0x1f.	831 // 0x00-0x1f.

857 return controlLetter & 0x1f;	832 return controlLetter & 0x1f;

858 }	833 }

859 if (unicode()) {

860 // With /u, invalid escapes are not treated as identity escapes.

861 ReportError(CStrVector("Invalid class escape"));

862 return 0;

863 }

864 if ((controlLetter >= '0' && controlLetter <= '9') \|\|

865 controlLetter == '_') {

866 Advance(2);

867 return controlLetter & 0x1f;

868 }

869 // We match JSC in reading the backslash as a literal	834 // We match JSC in reading the backslash as a literal

870 // character instead of as starting an escape.	835 // character instead of as starting an escape.

871 return '\\';	836 return '\\';

872 }	837 }

873 case '0':	838 case '0':

874 case '1':	839 case '1':

875 case '2':	840 case '2':

876 case '3':	841 case '3':

877 case '4':	842 case '4':

878 case '5':	843 case '5':

879 case '6':	844 case '6':

880 case '7':	845 case '7':

881 // For compatibility, we interpret a decimal escape that isn't	846 // For compatibility, we interpret a decimal escape that isn't

882 // a back reference (and therefore either \0 or not valid according	847 // a back reference (and therefore either \0 or not valid according

883 // to the specification) as a 1..3 digit octal character code.	848 // to the specification) as a 1..3 digit octal character code.

884 if (unicode()) {

885 // With /u, decimal escape is not interpreted as octal character code.

886 ReportError(CStrVector("Invalid class escape"));

887 return 0;

888 }

889 return ParseOctalLiteral();	849 return ParseOctalLiteral();

890 case 'x': {	850 case 'x': {

891 Advance();	851 Advance();

892 uc32 value;	852 uc32 value;

893 if (ParseHexEscape(2, &value)) return value;	853 if (ParseHexEscape(2, &value)) {

894 if (unicode()) {	854 return value;

895 // With /u, invalid escapes are not treated as identity escapes.

896 ReportError(CStrVector("Invalid escape"));

897 return 0;

898 }	855 }

899 // If \x is not followed by a two-digit hexadecimal, treat it	856 if (!unicode()) {

900 // as an identity escape.	857 // If \x is not followed by a two-digit hexadecimal, treat it

901 return 'x';	858 // as an identity escape.

	859 return 'x';

	860 }

	861 // If the 'u' flag is present, invalid escapes are not treated as

	862 // identity escapes.

	863 ReportError(CStrVector("Invalid escape"));

	864 return 0;

902 }	865 }

903 case 'u': {	866 case 'u': {

904 Advance();	867 Advance();

905 uc32 value;	868 uc32 value;

906 if (ParseUnicodeEscape(&value)) return value;	869 if (ParseUnicodeEscape(&value)) {

907 if (unicode()) {	870 return value;

908 // With /u, invalid escapes are not treated as identity escapes.

909 ReportError(CStrVector("Invalid unicode escape"));

910 return 0;

911 }	871 }

912 // If \u is not followed by a two-digit hexadecimal, treat it	872 if (!unicode()) {

913 // as an identity escape.	873 return 'u';

914 return 'u';	874 }

	875 // If the 'u' flag is present, invalid escapes are not treated as

	876 // identity escapes.

	877 ReportError(CStrVector("Invalid unicode escape"));

	878 return 0;

915 }	879 }

916 default: {	880 default: {

917 uc32 result = current();	881 uc32 result = current();

918 // With /u, no identity escapes except for syntax characters are	882 // If the 'u' flag is present, only syntax characters can be escaped, no

919 // allowed. Otherwise, all identity escapes are allowed.	883 // other identity escapes are allowed. If the 'u' flag is not present, all

920 if (!unicode() \|\| IsSyntaxCharacterOrSlash(result)) {	884 // identity escapes are allowed.

	885 if (!unicode() \|\| IsSyntaxCharacter(result)) {

921 Advance();	886 Advance();

922 return result;	887 return result;

923 }	888 }

924 ReportError(CStrVector("Invalid escape"));	889 ReportError(CStrVector("Invalid escape"));

925 return 0;	890 return 0;

926 }	891 }

927 }	892 }

928 return 0;	893 return 0;

929 }	894 }

930	895

(...skipping 53 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
984 if (char_class != kNoCharClass) {	949 if (char_class != kNoCharClass) {

985 CharacterRange::AddClassEscape(char_class, ranges, zone);	950 CharacterRange::AddClassEscape(char_class, ranges, zone);

986 } else {	951 } else {

987 ranges->Add(range, zone);	952 ranges->Add(range, zone);

988 }	953 }

989 }	954 }

990	955

991	956

992 RegExpTree* RegExpParser::ParseCharacterClass() {	957 RegExpTree* RegExpParser::ParseCharacterClass() {

993 static const char* kUnterminated = "Unterminated character class";	958 static const char* kUnterminated = "Unterminated character class";

994 static const char* kRangeInvalid = "Invalid character class";

995 static const char* kRangeOutOfOrder = "Range out of order in character class";	959 static const char* kRangeOutOfOrder = "Range out of order in character class";

996	960

997 DCHECK_EQ(current(), '[');	961 DCHECK_EQ(current(), '[');

998 Advance();	962 Advance();

999 bool is_negated = false;	963 bool is_negated = false;

1000 if (current() == '^') {	964 if (current() == '^') {

1001 is_negated = true;	965 is_negated = true;

1002 Advance();	966 Advance();

1003 }	967 }

1004 ZoneList<CharacterRange>* ranges =	968 ZoneList<CharacterRange>* ranges =

1005 new (zone()) ZoneList<CharacterRange>(2, zone());	969 new (zone()) ZoneList<CharacterRange>(2, zone());

1006 while (has_more() && current() != ']') {	970 while (has_more() && current() != ']') {

1007 uc16 char_class = kNoCharClass;	971 uc16 char_class = kNoCharClass;

1008 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);	972 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);

1009 if (current() == '-') {	973 if (current() == '-') {

1010 Advance();	974 Advance();

1011 if (current() == kEndMarker) {	975 if (current() == kEndMarker) {

1012 // If we reach the end we break out of the loop and let the	976 // If we reach the end we break out of the loop and let the

1013 // following code report an error.	977 // following code report an error.

1014 break;	978 break;

1015 } else if (current() == ']') {	979 } else if (current() == ']') {

1016 AddRangeOrEscape(ranges, char_class, first, zone());	980 AddRangeOrEscape(ranges, char_class, first, zone());

1017 ranges->Add(CharacterRange::Singleton('-'), zone());	981 ranges->Add(CharacterRange::Singleton('-'), zone());

1018 break;	982 break;

1019 }	983 }

1020 uc16 char_class_2 = kNoCharClass;	984 uc16 char_class_2 = kNoCharClass;

1021 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);	985 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);

1022 if (char_class != kNoCharClass \|\| char_class_2 != kNoCharClass) {	986 if (char_class != kNoCharClass \|\| char_class_2 != kNoCharClass) {

1023 // Either end is an escaped character class. Treat the '-' verbatim.	987 // Either end is an escaped character class. Treat the '-' verbatim.

1024 if (unicode()) {

1025 // ES2015 21.2.2.15.1 step 1.

1026 return ReportError(CStrVector(kRangeInvalid));

1027 }

1028 AddRangeOrEscape(ranges, char_class, first, zone());	988 AddRangeOrEscape(ranges, char_class, first, zone());

1029 ranges->Add(CharacterRange::Singleton('-'), zone());	989 ranges->Add(CharacterRange::Singleton('-'), zone());

1030 AddRangeOrEscape(ranges, char_class_2, next, zone());	990 AddRangeOrEscape(ranges, char_class_2, next, zone());

1031 continue;	991 continue;

1032 }	992 }

1033 // ES2015 21.2.2.15.1 step 6.

1034 if (first.from() > next.to()) {	993 if (first.from() > next.to()) {

1035 return ReportError(CStrVector(kRangeOutOfOrder));	994 return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED);

1036 }	995 }

1037 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());	996 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());

1038 } else {	997 } else {

1039 AddRangeOrEscape(ranges, char_class, first, zone());	998 AddRangeOrEscape(ranges, char_class, first, zone());

1040 }	999 }

1041 }	1000 }

1042 if (!has_more()) {	1001 if (!has_more()) {

1043 return ReportError(CStrVector(kUnterminated));	1002 return ReportError(CStrVector(kUnterminated) CHECK_FAILED);

1044 }	1003 }

1045 Advance();	1004 Advance();

1046 if (ranges->length() == 0) {	1005 if (ranges->length() == 0) {

1047 ranges->Add(CharacterRange::Everything(), zone());	1006 ranges->Add(CharacterRange::Everything(), zone());

1048 is_negated = !is_negated;	1007 is_negated = !is_negated;

1049 }	1008 }

1050 return new (zone()) RegExpCharacterClass(ranges, is_negated);	1009 return new (zone()) RegExpCharacterClass(ranges, is_negated);

1051 }	1010 }

1052	1011

1053	1012

(...skipping 142 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1196 AddCharacter(static_cast<uc16>(c));	1155 AddCharacter(static_cast<uc16>(c));

1197 }	1156 }

1198 }	1157 }

1199	1158

1200	1159

1201 void RegExpBuilder::AddEmpty() { pending_empty_ = true; }	1160 void RegExpBuilder::AddEmpty() { pending_empty_ = true; }

1202	1161

1203	1162

1204 void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {	1163 void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {

1205 if (NeedsDesugaringForUnicode(cc)) {	1164 if (NeedsDesugaringForUnicode(cc)) {

1206 // With /u, character class needs to be desugared, so it	1165 // In unicode mode, character class needs to be desugared, so it

1207 // must be a standalone term instead of being part of a RegExpText.	1166 // must be a standalone term instead of being part of a RegExpText.

1208 AddTerm(cc);	1167 AddTerm(cc);

1209 } else {	1168 } else {

1210 AddAtom(cc);	1169 AddAtom(cc);

1211 }	1170 }

1212 }	1171 }

1213	1172

1214 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {	1173 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {

1215 AddTerm(new (zone()) RegExpCharacterClass(	1174 AddTerm(new (zone()) RegExpCharacterClass(

1216 CharacterRange::List(zone(), CharacterRange::Singleton(c)), false));	1175 CharacterRange::List(zone(), CharacterRange::Singleton(c)), false));

(...skipping 92 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1309	1268

1310	1269

1311 RegExpTree* RegExpBuilder::ToRegExp() {	1270 RegExpTree* RegExpBuilder::ToRegExp() {

1312 FlushTerms();	1271 FlushTerms();

1313 int num_alternatives = alternatives_.length();	1272 int num_alternatives = alternatives_.length();

1314 if (num_alternatives == 0) return new (zone()) RegExpEmpty();	1273 if (num_alternatives == 0) return new (zone()) RegExpEmpty();

1315 if (num_alternatives == 1) return alternatives_.last();	1274 if (num_alternatives == 1) return alternatives_.last();

1316 return new (zone()) RegExpDisjunction(alternatives_.GetList(zone()));	1275 return new (zone()) RegExpDisjunction(alternatives_.GetList(zone()));

1317 }	1276 }

1318	1277

1319 bool RegExpBuilder::AddQuantifierToAtom(	1278

	1279 void RegExpBuilder::AddQuantifierToAtom(

1320 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {	1280 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {

1321 FlushPendingSurrogate();	1281 FlushPendingSurrogate();

1322 if (pending_empty_) {	1282 if (pending_empty_) {

1323 pending_empty_ = false;	1283 pending_empty_ = false;

1324 return true;	1284 return;

1325 }	1285 }

1326 RegExpTree* atom;	1286 RegExpTree* atom;

1327 if (characters_ != NULL) {	1287 if (characters_ != NULL) {

1328 DCHECK(last_added_ == ADD_CHAR);	1288 DCHECK(last_added_ == ADD_CHAR);

1329 // Last atom was character.	1289 // Last atom was character.

1330 Vector<const uc16> char_vector = characters_->ToConstVector();	1290 Vector<const uc16> char_vector = characters_->ToConstVector();

1331 int num_chars = char_vector.length();	1291 int num_chars = char_vector.length();

1332 if (num_chars > 1) {	1292 if (num_chars > 1) {

1333 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);	1293 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);

1334 text_.Add(new (zone()) RegExpAtom(prefix), zone());	1294 text_.Add(new (zone()) RegExpAtom(prefix), zone());

1335 char_vector = char_vector.SubVector(num_chars - 1, num_chars);	1295 char_vector = char_vector.SubVector(num_chars - 1, num_chars);

1336 }	1296 }

1337 characters_ = NULL;	1297 characters_ = NULL;

1338 atom = new (zone()) RegExpAtom(char_vector);	1298 atom = new (zone()) RegExpAtom(char_vector);

1339 FlushText();	1299 FlushText();

1340 } else if (text_.length() > 0) {	1300 } else if (text_.length() > 0) {

1341 DCHECK(last_added_ == ADD_ATOM);	1301 DCHECK(last_added_ == ADD_ATOM);

1342 atom = text_.RemoveLast();	1302 atom = text_.RemoveLast();

1343 FlushText();	1303 FlushText();

1344 } else if (terms_.length() > 0) {	1304 } else if (terms_.length() > 0) {

1345 DCHECK(last_added_ == ADD_ATOM);	1305 DCHECK(last_added_ == ADD_ATOM);

1346 atom = terms_.RemoveLast();	1306 atom = terms_.RemoveLast();

1347 // With /u, lookarounds are not quantifiable.

1348 if (unicode() && atom->IsLookaround()) return false;

1349 if (atom->max_match() == 0) {	1307 if (atom->max_match() == 0) {

1350 // Guaranteed to only match an empty string.	1308 // Guaranteed to only match an empty string.

1351 LAST(ADD_TERM);	1309 LAST(ADD_TERM);

1352 if (min == 0) {	1310 if (min == 0) {

1353 return true;	1311 return;

1354 }	1312 }

1355 terms_.Add(atom, zone());	1313 terms_.Add(atom, zone());

1356 return true;	1314 return;

1357 }	1315 }

1358 } else {	1316 } else {

1359 // Only call immediately after adding an atom or character!	1317 // Only call immediately after adding an atom or character!

1360 UNREACHABLE();	1318 UNREACHABLE();

1361 return false;	1319 return;

1362 }	1320 }

1363 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),	1321 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),

1364 zone());	1322 zone());

1365 LAST(ADD_TERM);	1323 LAST(ADD_TERM);

1366 return true;

1367 }	1324 }

1368	1325

1369 } // namespace internal	1326 } // namespace internal

1370 } // namespace v8	1327 } // namespace v8

OLD	NEW

« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/mjsunit/harmony/unicode-regexp-restricted-syntax.js » ('j') | no next file with comments »