src/regexp/regexp-parser.cc - Issue 1645573002: [regexp] restrict pattern syntax for unicode mode.

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 1645573002: [regexp] restrict pattern syntax for unicode mode. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@stage

Patch Set: addressed comments Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/regexp/regexp-parser.h"	5 #include "src/regexp/regexp-parser.h"

6	6

7 #include "src/char-predicates-inl.h"	7 #include "src/char-predicates-inl.h"

8 #include "src/factory.h"	8 #include "src/factory.h"

9 #include "src/isolate.h"	9 #include "src/isolate.h"

10 #include "src/objects-inl.h"	10 #include "src/objects-inl.h"

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
95	95

96	96

97 void RegExpParser::Advance(int dist) {	97 void RegExpParser::Advance(int dist) {

98 next_pos_ += dist - 1;	98 next_pos_ += dist - 1;

99 Advance();	99 Advance();

100 }	100 }

101	101

102	102

103 bool RegExpParser::simple() { return simple_; }	103 bool RegExpParser::simple() { return simple_; }

104	104

105	105 bool RegExpParser::IsSyntaxCharacterOrSlash(uc32 c) {

106 bool RegExpParser::IsSyntaxCharacter(uc32 c) {	106 switch (c) {

107 return c == '^' \|\| c == '$' \|\| c == '\\' \|\| c == '.' \|\| c == '*' \|\|	107 case '^':

108 c == '+' \|\| c == '?' \|\| c == '(' \|\| c == ')' \|\| c == '[' \|\| c == ']' \|\|	108 case '$':

109 c == '{' \|\| c == '}' \|\| c == '\|';	109 case '\\':

	110 case '.':

	111 case '*':

	112 case '+':

	113 case '?':

	114 case '(':

	115 case ')':

	116 case '[':

	117 case ']':

	118 case '{':

	119 case '}':

	120 case '\|':

	121 case '/':

	122 return true;

	123 default:

	124 break;

	125 }

	126 return false;

110 }	127 }

111	128

112	129

113 RegExpTree* RegExpParser::ReportError(Vector<const char> message) {	130 RegExpTree* RegExpParser::ReportError(Vector<const char> message) {

114 failed_ = true;	131 failed_ = true;

115 *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked();	132 *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked();

116 // Zip to the end to make sure the no more input is read.	133 // Zip to the end to make sure the no more input is read.

117 current_ = kEndMarker;	134 current_ = kEndMarker;

118 next_pos_ = in()->length();	135 next_pos_ = in()->length();

119 return NULL;	136 return NULL;

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
154 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,	171 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,

155 flags_, zone());	172 flags_, zone());

156 RegExpParserState* state = &initial_state;	173 RegExpParserState* state = &initial_state;

157 // Cache the builder in a local variable for quick access.	174 // Cache the builder in a local variable for quick access.

158 RegExpBuilder* builder = initial_state.builder();	175 RegExpBuilder* builder = initial_state.builder();

159 while (true) {	176 while (true) {

160 switch (current()) {	177 switch (current()) {

161 case kEndMarker:	178 case kEndMarker:

162 if (state->IsSubexpression()) {	179 if (state->IsSubexpression()) {

163 // Inside a parenthesized group when hitting end of input.	180 // Inside a parenthesized group when hitting end of input.

164 ReportError(CStrVector("Unterminated group") CHECK_FAILED);	181 return ReportError(CStrVector("Unterminated group"));

165 }	182 }

166 DCHECK_EQ(INITIAL, state->group_type());	183 DCHECK_EQ(INITIAL, state->group_type());

167 // Parsing completed successfully.	184 // Parsing completed successfully.

168 return builder->ToRegExp();	185 return builder->ToRegExp();

169 case ')': {	186 case ')': {

170 if (!state->IsSubexpression()) {	187 if (!state->IsSubexpression()) {

171 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);	188 return ReportError(CStrVector("Unmatched ')'"));

172 }	189 }

173 DCHECK_NE(INITIAL, state->group_type());	190 DCHECK_NE(INITIAL, state->group_type());

174	191

175 Advance();	192 Advance();

176 // End disjunction parsing and convert builder content to new single	193 // End disjunction parsing and convert builder content to new single

177 // regexp atom.	194 // regexp atom.

178 RegExpTree* body = builder->ToRegExp();	195 RegExpTree* body = builder->ToRegExp();

179	196

180 int end_capture_index = captures_started();	197 int end_capture_index = captures_started();

181	198

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
269 if (Next() == '=') {	286 if (Next() == '=') {

270 subexpr_type = POSITIVE_LOOKAROUND;	287 subexpr_type = POSITIVE_LOOKAROUND;

271 break;	288 break;

272 } else if (Next() == '!') {	289 } else if (Next() == '!') {

273 subexpr_type = NEGATIVE_LOOKAROUND;	290 subexpr_type = NEGATIVE_LOOKAROUND;

274 break;	291 break;

275 }	292 }

276 }	293 }

277 // Fall through.	294 // Fall through.

278 default:	295 default:

279 ReportError(CStrVector("Invalid group") CHECK_FAILED);	296 return ReportError(CStrVector("Invalid group"));

280 break;

281 }	297 }

282 Advance(2);	298 Advance(2);

283 } else {	299 } else {

284 if (captures_started_ >= kMaxCaptures) {	300 if (captures_started_ >= kMaxCaptures) {

285 ReportError(CStrVector("Too many captures") CHECK_FAILED);	301 return ReportError(CStrVector("Too many captures"));

286 }	302 }

287 captures_started_++;	303 captures_started_++;

288 }	304 }

289 // Store current state and begin new disjunction parsing.	305 // Store current state and begin new disjunction parsing.

290 state =	306 state =

291 new (zone()) RegExpParserState(state, subexpr_type, lookaround_type,	307 new (zone()) RegExpParserState(state, subexpr_type, lookaround_type,

292 captures_started_, flags_, zone());	308 captures_started_, flags_, zone());

293 builder = state->builder();	309 builder = state->builder();

294 continue;	310 continue;

295 }	311 }

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
353 // the capture registers of the referenced capture are either	369 // the capture registers of the referenced capture are either

354 // both set or both cleared.	370 // both set or both cleared.

355 builder->AddEmpty();	371 builder->AddEmpty();

356 } else {	372 } else {

357 RegExpCapture* capture = GetCapture(index);	373 RegExpCapture* capture = GetCapture(index);

358 RegExpTree* atom = new (zone()) RegExpBackReference(capture);	374 RegExpTree* atom = new (zone()) RegExpBackReference(capture);

359 builder->AddAtom(atom);	375 builder->AddAtom(atom);

360 }	376 }

361 break;	377 break;

362 }	378 }

	379 // With /u, no identity escapes except for syntax characters

	380 // are allowed. Otherwise, all identity escapes are allowed.

	381 if (unicode()) {

	382 return ReportError(CStrVector("Invalid escape"));

	383 }

363 uc32 first_digit = Next();	384 uc32 first_digit = Next();

364 if (first_digit == '8' \|\| first_digit == '9') {	385 if (first_digit == '8' \|\| first_digit == '9') {

365 // If the 'u' flag is present, only syntax characters can be	386 builder->AddCharacter(first_digit);

366 // escaped,	387 Advance(2);

367 // no other identity escapes are allowed. If the 'u' flag is not

368 // present, all identity escapes are allowed.

369 if (!unicode()) {

370 builder->AddCharacter(first_digit);

371 Advance(2);

372 } else {

373 return ReportError(CStrVector("Invalid escape"));

374 }

375 break;	388 break;

376 }	389 }

377 }	390 }

378 // FALLTHROUGH	391 // FALLTHROUGH

379 case '0': {	392 case '0': {

380 Advance();	393 Advance();

	394 if (unicode() && Next() >= '0' && Next() <= '9') {

	395 // With /u, decimal escape with leading 0 are not parsed as octal.

	396 return ReportError(CStrVector("Invalid decimal escape"));

	397 }

381 uc32 octal = ParseOctalLiteral();	398 uc32 octal = ParseOctalLiteral();

382 builder->AddCharacter(octal);	399 builder->AddCharacter(octal);

383 break;	400 break;

384 }	401 }

385 // ControlEscape :: one of	402 // ControlEscape :: one of

386 // f n r t v	403 // f n r t v

387 case 'f':	404 case 'f':

388 Advance(2);	405 Advance(2);

389 builder->AddCharacter('\f');	406 builder->AddCharacter('\f');

390 break;	407 break;

(...skipping 17 matching lines...) Expand all Loading...
408 Advance();	425 Advance();

409 uc32 controlLetter = Next();	426 uc32 controlLetter = Next();

410 // Special case if it is an ASCII letter.	427 // Special case if it is an ASCII letter.

411 // Convert lower case letters to uppercase.	428 // Convert lower case letters to uppercase.

412 uc32 letter = controlLetter & ~('a' ^ 'A');	429 uc32 letter = controlLetter & ~('a' ^ 'A');

413 if (letter < 'A' \|\| 'Z' < letter) {	430 if (letter < 'A' \|\| 'Z' < letter) {

414 // controlLetter is not in range 'A'-'Z' or 'a'-'z'.	431 // controlLetter is not in range 'A'-'Z' or 'a'-'z'.

415 // This is outside the specification. We match JSC in	432 // This is outside the specification. We match JSC in

416 // reading the backslash as a literal character instead	433 // reading the backslash as a literal character instead

417 // of as starting an escape.	434 // of as starting an escape.

	435 if (unicode()) {

	436 // With /u, invalid escapes are not treated as identity escapes.

	437 return ReportError(CStrVector("Invalid unicode escape"));

	438 }

418 builder->AddCharacter('\\');	439 builder->AddCharacter('\\');

419 } else {	440 } else {

420 Advance(2);	441 Advance(2);

421 builder->AddCharacter(controlLetter & 0x1f);	442 builder->AddCharacter(controlLetter & 0x1f);

422 }	443 }

423 break;	444 break;

424 }	445 }

425 case 'x': {	446 case 'x': {

426 Advance(2);	447 Advance(2);

427 uc32 value;	448 uc32 value;

428 if (ParseHexEscape(2, &value)) {	449 if (ParseHexEscape(2, &value)) {

429 builder->AddCharacter(value);	450 builder->AddCharacter(value);

430 } else if (!unicode()) {	451 } else if (!unicode()) {

431 builder->AddCharacter('x');	452 builder->AddCharacter('x');

432 } else {	453 } else {

433 // If the 'u' flag is present, invalid escapes are not treated as	454 // With /u, invalid escapes are not treated as identity escapes.

434 // identity escapes.

435 return ReportError(CStrVector("Invalid escape"));	455 return ReportError(CStrVector("Invalid escape"));

436 }	456 }

437 break;	457 break;

438 }	458 }

439 case 'u': {	459 case 'u': {

440 Advance(2);	460 Advance(2);

441 uc32 value;	461 uc32 value;

442 if (ParseUnicodeEscape(&value)) {	462 if (ParseUnicodeEscape(&value)) {

443 builder->AddUnicodeCharacter(value);	463 builder->AddUnicodeCharacter(value);

444 } else if (!unicode()) {	464 } else if (!unicode()) {

445 builder->AddCharacter('u');	465 builder->AddCharacter('u');

446 } else {	466 } else {

447 // If the 'u' flag is present, invalid escapes are not treated as	467 // With /u, invalid escapes are not treated as identity escapes.

448 // identity escapes.

449 return ReportError(CStrVector("Invalid unicode escape"));	468 return ReportError(CStrVector("Invalid unicode escape"));

450 }	469 }

451 break;	470 break;

452 }	471 }

453 default:	472 default:

454 Advance();	473 Advance();

455 // If the 'u' flag is present, only syntax characters can be	474 // With /u, no identity escapes except for syntax characters

456 // escaped, no	475 // are allowed. Otherwise, all identity escapes are allowed.

457 // other identity escapes are allowed. If the 'u' flag is not	476 if (!unicode() \|\| IsSyntaxCharacterOrSlash(current())) {

458 // present,

459 // all identity escapes are allowed.

460 if (!unicode() \|\| IsSyntaxCharacter(current())) {

461 builder->AddCharacter(current());	477 builder->AddCharacter(current());

462 Advance();	478 Advance();

463 } else {	479 } else {

464 return ReportError(CStrVector("Invalid escape"));	480 return ReportError(CStrVector("Invalid escape"));

465 }	481 }

466 break;	482 break;

467 }	483 }

468 break;	484 break;

469 case '{': {	485 case '{': {

470 int dummy;	486 int dummy;

471 if (ParseIntervalQuantifier(&dummy, &dummy)) {	487 if (ParseIntervalQuantifier(&dummy, &dummy)) {

472 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);	488 return ReportError(CStrVector("Nothing to repeat"));

473 }	489 }

474 // fallthrough	490 // fallthrough

475 }	491 }

	492 case '}':

	493 case ']':

	494 if (unicode()) {

	495 return ReportError(CStrVector("Lone quantifier brackets"));

	496 }

	497 // fallthrough

476 default:	498 default:

477 builder->AddUnicodeCharacter(current());	499 builder->AddUnicodeCharacter(current());

478 Advance();	500 Advance();

479 break;	501 break;

480 } // end switch(current())	502 } // end switch(current())

481	503

482 int min;	504 int min;

483 int max;	505 int max;

484 switch (current()) {	506 switch (current()) {

485 // QuantifierPrefix ::	507 // QuantifierPrefix ::

(...skipping 12 matching lines...) Expand all Loading...
498 Advance();	520 Advance();

499 break;	521 break;

500 case '?':	522 case '?':

501 min = 0;	523 min = 0;

502 max = 1;	524 max = 1;

503 Advance();	525 Advance();

504 break;	526 break;

505 case '{':	527 case '{':

506 if (ParseIntervalQuantifier(&min, &max)) {	528 if (ParseIntervalQuantifier(&min, &max)) {

507 if (max < min) {	529 if (max < min) {

508 ReportError(CStrVector("numbers out of order in {} quantifier.")	530 return ReportError(

509 CHECK_FAILED);	531 CStrVector("numbers out of order in {} quantifier"));

510 }	532 }

511 break;	533 break;

512 } else {	534 } else if (unicode()) {

513 continue;	535 // With /u, incomplete quantifiers are not allowed.

	536 return ReportError(CStrVector("Incomplete quantifier"));

514 }	537 }

	538 continue;

515 default:	539 default:

516 continue;	540 continue;

517 }	541 }

518 RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY;	542 RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY;

519 if (current() == '?') {	543 if (current() == '?') {

520 quantifier_type = RegExpQuantifier::NON_GREEDY;	544 quantifier_type = RegExpQuantifier::NON_GREEDY;

521 Advance();	545 Advance();

522 } else if (FLAG_regexp_possessive_quantifier && current() == '+') {	546 } else if (FLAG_regexp_possessive_quantifier && current() == '+') {

523 // FLAG_regexp_possessive_quantifier is a debug-only flag.	547 // FLAG_regexp_possessive_quantifier is a debug-only flag.

524 quantifier_type = RegExpQuantifier::POSSESSIVE;	548 quantifier_type = RegExpQuantifier::POSSESSIVE;

525 Advance();	549 Advance();

526 }	550 }

527 builder->AddQuantifierToAtom(min, max, quantifier_type);	551 if (!builder->AddQuantifierToAtom(min, max, quantifier_type)) {

	552 return ReportError(CStrVector("Invalid quantifier"));

	553 }

528 }	554 }

529 }	555 }

530	556

531	557

532 #ifdef DEBUG	558 #ifdef DEBUG

533 // Currently only used in an DCHECK.	559 // Currently only used in an DCHECK.

534 static bool IsSpecialClassEscape(uc32 c) {	560 static bool IsSpecialClassEscape(uc32 c) {

535 switch (c) {	561 switch (c) {

536 case 'd':	562 case 'd':

537 case 'D':	563 case 'D':

(...skipping 277 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
815 return '\r';	841 return '\r';

816 case 't':	842 case 't':

817 Advance();	843 Advance();

818 return '\t';	844 return '\t';

819 case 'v':	845 case 'v':

820 Advance();	846 Advance();

821 return '\v';	847 return '\v';

822 case 'c': {	848 case 'c': {

823 uc32 controlLetter = Next();	849 uc32 controlLetter = Next();

824 uc32 letter = controlLetter & ~('A' ^ 'a');	850 uc32 letter = controlLetter & ~('A' ^ 'a');

825 // For compatibility with JSC, inside a character class	851 // For compatibility with JSC, inside a character class. We also accept

826 // we also accept digits and underscore as control characters.	852 // digits and underscore as control characters, unless with /u.

827 if ((controlLetter >= '0' && controlLetter <= '9') \|\|	853 if (letter >= 'A' && letter <= 'Z') {

828 controlLetter == '_' \|\| (letter >= 'A' && letter <= 'Z')) {

829 Advance(2);	854 Advance(2);

830 // Control letters mapped to ASCII control characters in the range	855 // Control letters mapped to ASCII control characters in the range

831 // 0x00-0x1f.	856 // 0x00-0x1f.

832 return controlLetter & 0x1f;	857 return controlLetter & 0x1f;

833 }	858 }

	859 if (unicode()) {

	860 // With /u, invalid escapes are not treated as identity escapes.

	861 ReportError(CStrVector("Invalid class escape"));

	862 return 0;

	863 }

	864 if ((controlLetter >= '0' && controlLetter <= '9') \|\|

	865 controlLetter == '_') {

	866 Advance(2);

	867 return controlLetter & 0x1f;

	868 }

834 // We match JSC in reading the backslash as a literal	869 // We match JSC in reading the backslash as a literal

835 // character instead of as starting an escape.	870 // character instead of as starting an escape.

836 return '\\';	871 return '\\';

837 }	872 }

838 case '0':	873 case '0':

839 case '1':	874 case '1':

840 case '2':	875 case '2':

841 case '3':	876 case '3':

842 case '4':	877 case '4':

843 case '5':	878 case '5':

844 case '6':	879 case '6':

845 case '7':	880 case '7':

846 // For compatibility, we interpret a decimal escape that isn't	881 // For compatibility, we interpret a decimal escape that isn't

847 // a back reference (and therefore either \0 or not valid according	882 // a back reference (and therefore either \0 or not valid according

848 // to the specification) as a 1..3 digit octal character code.	883 // to the specification) as a 1..3 digit octal character code.

	884 if (unicode()) {

	885 // With /u, decimal escape is not interpreted as octal character code.

	886 ReportError(CStrVector("Invalid class escape"));

	887 return 0;

	888 }

849 return ParseOctalLiteral();	889 return ParseOctalLiteral();

850 case 'x': {	890 case 'x': {

851 Advance();	891 Advance();

852 uc32 value;	892 uc32 value;

853 if (ParseHexEscape(2, &value)) {	893 if (ParseHexEscape(2, &value)) return value;

854 return value;	894 if (unicode()) {

	895 // With /u, invalid escapes are not treated as identity escapes.

	896 ReportError(CStrVector("Invalid escape"));

	897 return 0;

855 }	898 }

856 if (!unicode()) {	899 // If \x is not followed by a two-digit hexadecimal, treat it

857 // If \x is not followed by a two-digit hexadecimal, treat it	900 // as an identity escape.

858 // as an identity escape.	901 return 'x';

859 return 'x';

860 }

861 // If the 'u' flag is present, invalid escapes are not treated as

862 // identity escapes.

863 ReportError(CStrVector("Invalid escape"));

864 return 0;

865 }	902 }

866 case 'u': {	903 case 'u': {

867 Advance();	904 Advance();

868 uc32 value;	905 uc32 value;

869 if (ParseUnicodeEscape(&value)) {	906 if (ParseUnicodeEscape(&value)) return value;

870 return value;	907 if (unicode()) {

	908 // With /u, invalid escapes are not treated as identity escapes.

	909 ReportError(CStrVector("Invalid unicode escape"));

	910 return 0;

871 }	911 }

872 if (!unicode()) {	912 // If \u is not followed by a two-digit hexadecimal, treat it

873 return 'u';	913 // as an identity escape.

874 }	914 return 'u';

875 // If the 'u' flag is present, invalid escapes are not treated as

876 // identity escapes.

877 ReportError(CStrVector("Invalid unicode escape"));

878 return 0;

879 }	915 }

880 default: {	916 default: {

881 uc32 result = current();	917 uc32 result = current();

882 // If the 'u' flag is present, only syntax characters can be escaped, no	918 // With /u, no identity escapes except for syntax characters are

883 // other identity escapes are allowed. If the 'u' flag is not present, all	919 // allowed. Otherwise, all identity escapes are allowed.

884 // identity escapes are allowed.	920 if (!unicode() \|\| IsSyntaxCharacterOrSlash(result)) {

885 if (!unicode() \|\| IsSyntaxCharacter(result)) {

886 Advance();	921 Advance();

887 return result;	922 return result;

888 }	923 }

889 ReportError(CStrVector("Invalid escape"));	924 ReportError(CStrVector("Invalid escape"));

890 return 0;	925 return 0;

891 }	926 }

892 }	927 }

893 return 0;	928 return 0;

894 }	929 }

895	930

(...skipping 53 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
949 if (char_class != kNoCharClass) {	984 if (char_class != kNoCharClass) {

950 CharacterRange::AddClassEscape(char_class, ranges, zone);	985 CharacterRange::AddClassEscape(char_class, ranges, zone);

951 } else {	986 } else {

952 ranges->Add(range, zone);	987 ranges->Add(range, zone);

953 }	988 }

954 }	989 }

955	990

956	991

957 RegExpTree* RegExpParser::ParseCharacterClass() {	992 RegExpTree* RegExpParser::ParseCharacterClass() {

958 static const char* kUnterminated = "Unterminated character class";	993 static const char* kUnterminated = "Unterminated character class";

	994 static const char* kRangeInvalid = "Invalid character class";

959 static const char* kRangeOutOfOrder = "Range out of order in character class";	995 static const char* kRangeOutOfOrder = "Range out of order in character class";

960	996

961 DCHECK_EQ(current(), '[');	997 DCHECK_EQ(current(), '[');

962 Advance();	998 Advance();

963 bool is_negated = false;	999 bool is_negated = false;

964 if (current() == '^') {	1000 if (current() == '^') {

965 is_negated = true;	1001 is_negated = true;

966 Advance();	1002 Advance();

967 }	1003 }

968 ZoneList<CharacterRange>* ranges =	1004 ZoneList<CharacterRange>* ranges =

969 new (zone()) ZoneList<CharacterRange>(2, zone());	1005 new (zone()) ZoneList<CharacterRange>(2, zone());

970 while (has_more() && current() != ']') {	1006 while (has_more() && current() != ']') {

971 uc16 char_class = kNoCharClass;	1007 uc16 char_class = kNoCharClass;

972 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);	1008 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);

973 if (current() == '-') {	1009 if (current() == '-') {

974 Advance();	1010 Advance();

975 if (current() == kEndMarker) {	1011 if (current() == kEndMarker) {

976 // If we reach the end we break out of the loop and let the	1012 // If we reach the end we break out of the loop and let the

977 // following code report an error.	1013 // following code report an error.

978 break;	1014 break;

979 } else if (current() == ']') {	1015 } else if (current() == ']') {

980 AddRangeOrEscape(ranges, char_class, first, zone());	1016 AddRangeOrEscape(ranges, char_class, first, zone());

981 ranges->Add(CharacterRange::Singleton('-'), zone());	1017 ranges->Add(CharacterRange::Singleton('-'), zone());

982 break;	1018 break;

983 }	1019 }

984 uc16 char_class_2 = kNoCharClass;	1020 uc16 char_class_2 = kNoCharClass;

985 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);	1021 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);

986 if (char_class != kNoCharClass \|\| char_class_2 != kNoCharClass) {	1022 if (char_class != kNoCharClass \|\| char_class_2 != kNoCharClass) {

987 // Either end is an escaped character class. Treat the '-' verbatim.	1023 // Either end is an escaped character class. Treat the '-' verbatim.

	1024 if (unicode()) {

	1025 // ES2015 21.2.2.15.1 step 1.

	1026 return ReportError(CStrVector(kRangeInvalid));

	1027 }

988 AddRangeOrEscape(ranges, char_class, first, zone());	1028 AddRangeOrEscape(ranges, char_class, first, zone());

989 ranges->Add(CharacterRange::Singleton('-'), zone());	1029 ranges->Add(CharacterRange::Singleton('-'), zone());

990 AddRangeOrEscape(ranges, char_class_2, next, zone());	1030 AddRangeOrEscape(ranges, char_class_2, next, zone());

991 continue;	1031 continue;

992 }	1032 }

	1033 // ES2015 21.2.2.15.1 step 6.

993 if (first.from() > next.to()) {	1034 if (first.from() > next.to()) {

994 return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED);	1035 return ReportError(CStrVector(kRangeOutOfOrder));

995 }	1036 }

996 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());	1037 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());

997 } else {	1038 } else {

998 AddRangeOrEscape(ranges, char_class, first, zone());	1039 AddRangeOrEscape(ranges, char_class, first, zone());

999 }	1040 }

1000 }	1041 }

1001 if (!has_more()) {	1042 if (!has_more()) {

1002 return ReportError(CStrVector(kUnterminated) CHECK_FAILED);	1043 return ReportError(CStrVector(kUnterminated));

1003 }	1044 }

1004 Advance();	1045 Advance();

1005 if (ranges->length() == 0) {	1046 if (ranges->length() == 0) {

1006 ranges->Add(CharacterRange::Everything(), zone());	1047 ranges->Add(CharacterRange::Everything(), zone());

1007 is_negated = !is_negated;	1048 is_negated = !is_negated;

1008 }	1049 }

1009 return new (zone()) RegExpCharacterClass(ranges, is_negated);	1050 return new (zone()) RegExpCharacterClass(ranges, is_negated);

1010 }	1051 }

1011	1052

1012	1053

(...skipping 142 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1155 AddCharacter(static_cast<uc16>(c));	1196 AddCharacter(static_cast<uc16>(c));

1156 }	1197 }

1157 }	1198 }

1158	1199

1159	1200

1160 void RegExpBuilder::AddEmpty() { pending_empty_ = true; }	1201 void RegExpBuilder::AddEmpty() { pending_empty_ = true; }

1161	1202

1162	1203

1163 void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {	1204 void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {

1164 if (NeedsDesugaringForUnicode(cc)) {	1205 if (NeedsDesugaringForUnicode(cc)) {

1165 // In unicode mode, character class needs to be desugared, so it	1206 // With /u, character class needs to be desugared, so it

1166 // must be a standalone term instead of being part of a RegExpText.	1207 // must be a standalone term instead of being part of a RegExpText.

1167 AddTerm(cc);	1208 AddTerm(cc);

1168 } else {	1209 } else {

1169 AddAtom(cc);	1210 AddAtom(cc);

1170 }	1211 }

1171 }	1212 }

1172	1213

1173 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {	1214 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {

1174 AddTerm(new (zone()) RegExpCharacterClass(	1215 AddTerm(new (zone()) RegExpCharacterClass(

1175 CharacterRange::List(zone(), CharacterRange::Singleton(c)), false));	1216 CharacterRange::List(zone(), CharacterRange::Singleton(c)), false));

(...skipping 92 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1268	1309

1269	1310

1270 RegExpTree* RegExpBuilder::ToRegExp() {	1311 RegExpTree* RegExpBuilder::ToRegExp() {

1271 FlushTerms();	1312 FlushTerms();

1272 int num_alternatives = alternatives_.length();	1313 int num_alternatives = alternatives_.length();

1273 if (num_alternatives == 0) return new (zone()) RegExpEmpty();	1314 if (num_alternatives == 0) return new (zone()) RegExpEmpty();

1274 if (num_alternatives == 1) return alternatives_.last();	1315 if (num_alternatives == 1) return alternatives_.last();

1275 return new (zone()) RegExpDisjunction(alternatives_.GetList(zone()));	1316 return new (zone()) RegExpDisjunction(alternatives_.GetList(zone()));

1276 }	1317 }

1277	1318

1278	1319 bool RegExpBuilder::AddQuantifierToAtom(

1279 void RegExpBuilder::AddQuantifierToAtom(

1280 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {	1320 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {

1281 FlushPendingSurrogate();	1321 FlushPendingSurrogate();

1282 if (pending_empty_) {	1322 if (pending_empty_) {

1283 pending_empty_ = false;	1323 pending_empty_ = false;

1284 return;	1324 return true;

1285 }	1325 }

1286 RegExpTree* atom;	1326 RegExpTree* atom;

1287 if (characters_ != NULL) {	1327 if (characters_ != NULL) {

1288 DCHECK(last_added_ == ADD_CHAR);	1328 DCHECK(last_added_ == ADD_CHAR);

1289 // Last atom was character.	1329 // Last atom was character.

1290 Vector<const uc16> char_vector = characters_->ToConstVector();	1330 Vector<const uc16> char_vector = characters_->ToConstVector();

1291 int num_chars = char_vector.length();	1331 int num_chars = char_vector.length();

1292 if (num_chars > 1) {	1332 if (num_chars > 1) {

1293 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);	1333 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);

1294 text_.Add(new (zone()) RegExpAtom(prefix), zone());	1334 text_.Add(new (zone()) RegExpAtom(prefix), zone());

1295 char_vector = char_vector.SubVector(num_chars - 1, num_chars);	1335 char_vector = char_vector.SubVector(num_chars - 1, num_chars);

1296 }	1336 }

1297 characters_ = NULL;	1337 characters_ = NULL;

1298 atom = new (zone()) RegExpAtom(char_vector);	1338 atom = new (zone()) RegExpAtom(char_vector);

1299 FlushText();	1339 FlushText();

1300 } else if (text_.length() > 0) {	1340 } else if (text_.length() > 0) {

1301 DCHECK(last_added_ == ADD_ATOM);	1341 DCHECK(last_added_ == ADD_ATOM);

1302 atom = text_.RemoveLast();	1342 atom = text_.RemoveLast();

1303 FlushText();	1343 FlushText();

1304 } else if (terms_.length() > 0) {	1344 } else if (terms_.length() > 0) {

1305 DCHECK(last_added_ == ADD_ATOM);	1345 DCHECK(last_added_ == ADD_ATOM);

1306 atom = terms_.RemoveLast();	1346 atom = terms_.RemoveLast();

	1347 // With /u, lookarounds are not quantifiable.

	1348 if (unicode() && atom->IsLookaround()) return false;

1307 if (atom->max_match() == 0) {	1349 if (atom->max_match() == 0) {

1308 // Guaranteed to only match an empty string.	1350 // Guaranteed to only match an empty string.

1309 LAST(ADD_TERM);	1351 LAST(ADD_TERM);

1310 if (min == 0) {	1352 if (min == 0) {

1311 return;	1353 return true;

1312 }	1354 }

1313 terms_.Add(atom, zone());	1355 terms_.Add(atom, zone());

1314 return;	1356 return true;

1315 }	1357 }

1316 } else {	1358 } else {

1317 // Only call immediately after adding an atom or character!	1359 // Only call immediately after adding an atom or character!

1318 UNREACHABLE();	1360 UNREACHABLE();

1319 return;	1361 return false;

1320 }	1362 }

1321 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),	1363 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),

1322 zone());	1364 zone());

1323 LAST(ADD_TERM);	1365 LAST(ADD_TERM);

	1366 return true;

1324 }	1367 }

1325	1368

1326 } // namespace internal	1369 } // namespace internal

1327 } // namespace v8	1370 } // namespace v8

OLD	NEW

« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/mjsunit/harmony/unicode-regexp-restricted-syntax.js » ('j') | no next file with comments »