src/regexp/regexp-parser.cc - Issue 2050343002: [regexp] Experimental support for regexp named captures

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 2050343002: [regexp] Experimental support for regexp named captures (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Rebase Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/regexp/regexp-parser.h"	5 #include "src/regexp/regexp-parser.h"

6	6

7 #include "src/char-predicates-inl.h"	7 #include "src/char-predicates-inl.h"

8 #include "src/factory.h"	8 #include "src/factory.h"

9 #include "src/isolate.h"	9 #include "src/isolate.h"

10 #include "src/objects-inl.h"	10 #include "src/objects-inl.h"

11 #include "src/ostreams.h"	11 #include "src/ostreams.h"

12 #include "src/regexp/jsregexp.h"	12 #include "src/regexp/jsregexp.h"

13 #include "src/utils.h"	13 #include "src/utils.h"

14	14

15 #ifdef V8_I18N_SUPPORT	15 #ifdef V8_I18N_SUPPORT

16 #include "unicode/uset.h"	16 #include "unicode/uset.h"

17 #endif // V8_I18N_SUPPORT	17 #endif // V8_I18N_SUPPORT

18	18

19 namespace v8 {	19 namespace v8 {

20 namespace internal {	20 namespace internal {

21	21

22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,	22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,

23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone)	23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone)

24 : isolate_(isolate),	24 : isolate_(isolate),

25 zone_(zone),	25 zone_(zone),

26 error_(error),	26 error_(error),

27 captures_(NULL),	27 captures_(NULL),

	28 named_captures_(NULL),

	29 named_back_references_(NULL),

28 in_(in),	30 in_(in),

29 current_(kEndMarker),	31 current_(kEndMarker),

30 ignore_case_(flags & JSRegExp::kIgnoreCase),	32 ignore_case_(flags & JSRegExp::kIgnoreCase),

31 multiline_(flags & JSRegExp::kMultiline),	33 multiline_(flags & JSRegExp::kMultiline),

32 unicode_(flags & JSRegExp::kUnicode),	34 unicode_(flags & JSRegExp::kUnicode),

33 next_pos_(0),	35 next_pos_(0),

34 captures_started_(0),	36 captures_started_(0),

35 capture_count_(0),	37 capture_count_(0),

36 has_more_(true),	38 has_more_(true),

37 simple_(false),	39 simple_(false),

(...skipping 104 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
142	144

143 #define CHECK_FAILED /**/); \	145 #define CHECK_FAILED /**/); \

144 if (failed_) return NULL; \	146 if (failed_) return NULL; \

145 ((void)0	147 ((void)0

146	148

147	149

148 // Pattern ::	150 // Pattern ::

149 // Disjunction	151 // Disjunction

150 RegExpTree* RegExpParser::ParsePattern() {	152 RegExpTree* RegExpParser::ParsePattern() {

151 RegExpTree* result = ParseDisjunction(CHECK_FAILED);	153 RegExpTree* result = ParseDisjunction(CHECK_FAILED);

	154 PatchNamedBackReferences(CHECK_FAILED);

152 DCHECK(!has_more());	155 DCHECK(!has_more());

153 // If the result of parsing is a literal string atom, and it has the	156 // If the result of parsing is a literal string atom, and it has the

154 // same length as the input, then the atom is identical to the input.	157 // same length as the input, then the atom is identical to the input.

155 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {	158 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {

156 simple_ = true;	159 simple_ = true;

157 }	160 }

158 return result;	161 return result;

159 }	162 }

160	163

161	164

162 // Disjunction ::	165 // Disjunction ::

163 // Alternative	166 // Alternative

164 // Alternative \| Disjunction	167 // Alternative \| Disjunction

165 // Alternative ::	168 // Alternative ::

166 // [empty]	169 // [empty]

167 // Term Alternative	170 // Term Alternative

168 // Term ::	171 // Term ::

169 // Assertion	172 // Assertion

170 // Atom	173 // Atom

171 // Atom Quantifier	174 // Atom Quantifier

172 RegExpTree* RegExpParser::ParseDisjunction() {	175 RegExpTree* RegExpParser::ParseDisjunction() {

173 // Used to store current state while parsing subexpressions.	176 // Used to store current state while parsing subexpressions.

174 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,	177 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,

175 ignore_case(), unicode(), zone());	178 nullptr, ignore_case(), unicode(), zone());

176 RegExpParserState* state = &initial_state;	179 RegExpParserState* state = &initial_state;

177 // Cache the builder in a local variable for quick access.	180 // Cache the builder in a local variable for quick access.

178 RegExpBuilder* builder = initial_state.builder();	181 RegExpBuilder* builder = initial_state.builder();

179 while (true) {	182 while (true) {

180 switch (current()) {	183 switch (current()) {

181 case kEndMarker:	184 case kEndMarker:

182 if (state->IsSubexpression()) {	185 if (state->IsSubexpression()) {

183 // Inside a parenthesized group when hitting end of input.	186 // Inside a parenthesized group when hitting end of input.

184 return ReportError(CStrVector("Unterminated group"));	187 return ReportError(CStrVector("Unterminated group"));

185 }	188 }

(...skipping 11 matching lines...) Expand all Loading...
197 // regexp atom.	200 // regexp atom.

198 RegExpTree* body = builder->ToRegExp();	201 RegExpTree* body = builder->ToRegExp();

199	202

200 int end_capture_index = captures_started();	203 int end_capture_index = captures_started();

201	204

202 int capture_index = state->capture_index();	205 int capture_index = state->capture_index();

203 SubexpressionType group_type = state->group_type();	206 SubexpressionType group_type = state->group_type();

204	207

205 // Build result of subexpression.	208 // Build result of subexpression.

206 if (group_type == CAPTURE) {	209 if (group_type == CAPTURE) {

	210 if (state->IsNamedCapture()) {

	211 CreateNamedCaptureAtIndex(state->capture_name(),

	212 capture_index CHECK_FAILED);

	213 }

207 RegExpCapture* capture = GetCapture(capture_index);	214 RegExpCapture* capture = GetCapture(capture_index);

208 capture->set_body(body);	215 capture->set_body(body);

209 body = capture;	216 body = capture;

210 } else if (group_type != GROUPING) {	217 } else if (group_type != GROUPING) {

211 DCHECK(group_type == POSITIVE_LOOKAROUND \|\|	218 DCHECK(group_type == POSITIVE_LOOKAROUND \|\|

212 group_type == NEGATIVE_LOOKAROUND);	219 group_type == NEGATIVE_LOOKAROUND);

213 bool is_positive = (group_type == POSITIVE_LOOKAROUND);	220 bool is_positive = (group_type == POSITIVE_LOOKAROUND);

214 body = new (zone()) RegExpLookaround(	221 body = new (zone()) RegExpLookaround(

215 body, is_positive, end_capture_index - capture_index,	222 body, is_positive, end_capture_index - capture_index,

216 capture_index, state->lookaround_type());	223 capture_index, state->lookaround_type());

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
261 new (zone()) ZoneList<CharacterRange>(2, zone());	268 new (zone()) ZoneList<CharacterRange>(2, zone());

262 CharacterRange::AddClassEscape('.', ranges, zone());	269 CharacterRange::AddClassEscape('.', ranges, zone());

263 RegExpCharacterClass* cc =	270 RegExpCharacterClass* cc =

264 new (zone()) RegExpCharacterClass(ranges, false);	271 new (zone()) RegExpCharacterClass(ranges, false);

265 builder->AddCharacterClass(cc);	272 builder->AddCharacterClass(cc);

266 break;	273 break;

267 }	274 }

268 case '(': {	275 case '(': {

269 SubexpressionType subexpr_type = CAPTURE;	276 SubexpressionType subexpr_type = CAPTURE;

270 RegExpLookaround::Type lookaround_type = state->lookaround_type();	277 RegExpLookaround::Type lookaround_type = state->lookaround_type();

	278 bool is_named_capture = false;

271 Advance();	279 Advance();

272 if (current() == '?') {	280 if (current() == '?') {

273 switch (Next()) {	281 switch (Next()) {

274 case ':':	282 case ':':

275 subexpr_type = GROUPING;	283 subexpr_type = GROUPING;

	284 Advance(2);

276 break;	285 break;

277 case '=':	286 case '=':

278 lookaround_type = RegExpLookaround::LOOKAHEAD;	287 lookaround_type = RegExpLookaround::LOOKAHEAD;

279 subexpr_type = POSITIVE_LOOKAROUND;	288 subexpr_type = POSITIVE_LOOKAROUND;

	289 Advance(2);

280 break;	290 break;

281 case '!':	291 case '!':

282 lookaround_type = RegExpLookaround::LOOKAHEAD;	292 lookaround_type = RegExpLookaround::LOOKAHEAD;

283 subexpr_type = NEGATIVE_LOOKAROUND;	293 subexpr_type = NEGATIVE_LOOKAROUND;

	294 Advance(2);

284 break;	295 break;

285 case '<':	296 case '<':

	297 Advance();

286 if (FLAG_harmony_regexp_lookbehind) {	298 if (FLAG_harmony_regexp_lookbehind) {

287 Advance();

288 lookaround_type = RegExpLookaround::LOOKBEHIND;

289 if (Next() == '=') {	299 if (Next() == '=') {

290 subexpr_type = POSITIVE_LOOKAROUND;	300 subexpr_type = POSITIVE_LOOKAROUND;

	301 lookaround_type = RegExpLookaround::LOOKBEHIND;

	302 Advance(2);

291 break;	303 break;

292 } else if (Next() == '!') {	304 } else if (Next() == '!') {

293 subexpr_type = NEGATIVE_LOOKAROUND;	305 subexpr_type = NEGATIVE_LOOKAROUND;

	306 lookaround_type = RegExpLookaround::LOOKBEHIND;

	307 Advance(2);

294 break;	308 break;

295 }	309 }

296 }	310 }

	311 if (FLAG_harmony_regexp_named_captures && unicode()) {

	312 is_named_capture = true;

	313 Advance();

	314 break;

	315 }

297 // Fall through.	316 // Fall through.

298 default:	317 default:

299 return ReportError(CStrVector("Invalid group"));	318 return ReportError(CStrVector("Invalid group"));

300 }	319 }

301 Advance(2);	320 }

302 } else {	321

	322 const ZoneVector<uc16>* capture_name = nullptr;

	323 if (subexpr_type == CAPTURE) {

303 if (captures_started_ >= kMaxCaptures) {	324 if (captures_started_ >= kMaxCaptures) {

304 return ReportError(CStrVector("Too many captures"));	325 return ReportError(CStrVector("Too many captures"));

305 }	326 }

306 captures_started_++;	327 captures_started_++;

	328

	329 if (is_named_capture) {

	330 capture_name = ParseCaptureGroupName(CHECK_FAILED);

	331 }

307 }	332 }

308 // Store current state and begin new disjunction parsing.	333 // Store current state and begin new disjunction parsing.

309 state = new (zone()) RegExpParserState(	334 state = new (zone()) RegExpParserState(

310 state, subexpr_type, lookaround_type, captures_started_,	335 state, subexpr_type, lookaround_type, captures_started_,

311 ignore_case(), unicode(), zone());	336 capture_name, ignore_case(), unicode(), zone());

312 builder = state->builder();	337 builder = state->builder();

313 continue;	338 continue;

314 }	339 }

315 case '[': {	340 case '[': {

316 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED);	341 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED);

317 builder->AddCharacterClass(cc->AsCharacterClass());	342 builder->AddCharacterClass(cc->AsCharacterClass());

318 break;	343 break;

319 }	344 }

320 // Atom ::	345 // Atom ::

321 // \ AtomEscape	346 // \ AtomEscape

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
409 if (unicode()) {	434 if (unicode()) {

410 return ReportError(CStrVector("Invalid escape"));	435 return ReportError(CStrVector("Invalid escape"));

411 }	436 }

412 uc32 first_digit = Next();	437 uc32 first_digit = Next();

413 if (first_digit == '8' \|\| first_digit == '9') {	438 if (first_digit == '8' \|\| first_digit == '9') {

414 builder->AddCharacter(first_digit);	439 builder->AddCharacter(first_digit);

415 Advance(2);	440 Advance(2);

416 break;	441 break;

417 }	442 }

418 }	443 }

419 // FALLTHROUGH	444 // Fall through.

420 case '0': {	445 case '0': {

421 Advance();	446 Advance();

422 if (unicode() && Next() >= '0' && Next() <= '9') {	447 if (unicode() && Next() >= '0' && Next() <= '9') {

423 // With /u, decimal escape with leading 0 are not parsed as octal.	448 // With /u, decimal escape with leading 0 are not parsed as octal.

424 return ReportError(CStrVector("Invalid decimal escape"));	449 return ReportError(CStrVector("Invalid decimal escape"));

425 }	450 }

426 uc32 octal = ParseOctalLiteral();	451 uc32 octal = ParseOctalLiteral();

427 builder->AddCharacter(octal);	452 builder->AddCharacter(octal);

428 break;	453 break;

429 }	454 }

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
490 if (ParseUnicodeEscape(&value)) {	515 if (ParseUnicodeEscape(&value)) {

491 builder->AddEscapedUnicodeCharacter(value);	516 builder->AddEscapedUnicodeCharacter(value);

492 } else if (!unicode()) {	517 } else if (!unicode()) {

493 builder->AddCharacter('u');	518 builder->AddCharacter('u');

494 } else {	519 } else {

495 // With /u, invalid escapes are not treated as identity escapes.	520 // With /u, invalid escapes are not treated as identity escapes.

496 return ReportError(CStrVector("Invalid unicode escape"));	521 return ReportError(CStrVector("Invalid unicode escape"));

497 }	522 }

498 break;	523 break;

499 }	524 }

	525 case 'k':

	526 if (FLAG_harmony_regexp_named_captures && unicode()) {

	527 Advance(2);

	528 ParseNamedBackReference(builder, state CHECK_FAILED);

	529 break;

	530 }

	531 // Fall through.

500 default:	532 default:

501 Advance();	533 Advance();

502 // With /u, no identity escapes except for syntax characters	534 // With /u, no identity escapes except for syntax characters

503 // are allowed. Otherwise, all identity escapes are allowed.	535 // are allowed. Otherwise, all identity escapes are allowed.

504 if (!unicode() \|\| IsSyntaxCharacterOrSlash(current())) {	536 if (!unicode() \|\| IsSyntaxCharacterOrSlash(current())) {

505 builder->AddCharacter(current());	537 builder->AddCharacter(current());

506 Advance();	538 Advance();

507 } else {	539 } else {

508 return ReportError(CStrVector("Invalid escape"));	540 return ReportError(CStrVector("Invalid escape"));

509 }	541 }

510 break;	542 break;

511 }	543 }

512 break;	544 break;

513 case '{': {	545 case '{': {

514 int dummy;	546 int dummy;

515 bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED);	547 bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED);

516 if (parsed) return ReportError(CStrVector("Nothing to repeat"));	548 if (parsed) return ReportError(CStrVector("Nothing to repeat"));

517 // fallthrough	549 // Fall through.

518 }	550 }

519 case '}':	551 case '}':

520 case ']':	552 case ']':

521 if (unicode()) {	553 if (unicode()) {

522 return ReportError(CStrVector("Lone quantifier brackets"));	554 return ReportError(CStrVector("Lone quantifier brackets"));

523 }	555 }

524 // fallthrough	556 // Fall through.

525 default:	557 default:

526 builder->AddUnicodeCharacter(current());	558 builder->AddUnicodeCharacter(current());

527 Advance();	559 Advance();

528 break;	560 break;

529 } // end switch(current())	561 } // end switch(current())

530	562

531 int min;	563 int min;

532 int max;	564 int max;

533 switch (current()) {	565 switch (current()) {

534 // QuantifierPrefix ::	566 // QuantifierPrefix ::

(...skipping 133 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
668 }	700 }

669 if (value > capture_count_) {	701 if (value > capture_count_) {

670 Reset(start);	702 Reset(start);

671 return false;	703 return false;

672 }	704 }

673 }	705 }

674 *index_out = value;	706 *index_out = value;

675 return true;	707 return true;

676 }	708 }

677	709

	710 static void push_code_unit(ZoneVector<uc16>* v, uint32_t code_unit) {

	711 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {

	712 v->push_back(code_unit);

	713 } else {

	714 v->push_back(unibrow::Utf16::LeadSurrogate(code_unit));

	715 v->push_back(unibrow::Utf16::TrailSurrogate(code_unit));

	716 }

	717 }

	718

	719 const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() {

	720 DCHECK(FLAG_harmony_regexp_named_captures);

	721 DCHECK(unicode());

	722

	723 ZoneVector<uc16>* name =

	724 new (zone()->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone());

	725

	726 bool at_start = true;

	727 while (true) {

	728 uc32 c = current();

	729 Advance();

	730

	731 // Convert unicode escapes.

	732 if (c == '\\' && current() == 'u') {

	733 Advance();

	734 if (!ParseUnicodeEscape(&c)) {

	735 ReportError(CStrVector("Invalid Unicode escape sequence"));

	736 return nullptr;

	737 }

	738 }

	739

	740 if (at_start) {

	741 if (!IdentifierStart::Is(c)) {

	742 ReportError(CStrVector("Invalid capture group name"));

	743 return nullptr;

	744 }

	745 push_code_unit(name, c);

	746 at_start = false;

	747 } else {

	748 if (c == '>') {

	749 break;

	750 } else if (IdentifierPart::Is(c)) {

	751 push_code_unit(name, c);

	752 } else {

	753 ReportError(CStrVector("Invalid capture group name"));

	754 return nullptr;

	755 }

	756 }

	757 }

	758

	759 return name;

	760 }

	761

	762 bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name,

	763 int index) {

	764 DCHECK(FLAG_harmony_regexp_named_captures);

	765 DCHECK(unicode());

	766 DCHECK(0 < index && index <= captures_started_);

	767 DCHECK_NOT_NULL(name);

	768

	769 if (named_captures_ == nullptr) {

	770 named_captures_ = new (zone()) ZoneList<RegExpCapture*>(1, zone());

	771 } else {

	772 // Check for duplicates and bail if we find any.

	773 for (const auto& named_capture : *named_captures_) {

	774 if (named_capture->name() == name) {

	775 ReportError(CStrVector("Duplicate capture group name"));

	776 return false;

	777 }

	778 }

	779 }

	780

	781 RegExpCapture* capture = GetCapture(index);

	782 DCHECK(capture->name() == nullptr);

	783

	784 capture->set_name(name);

	785 named_captures_->Add(capture, zone());

	786

	787 return true;

	788 }

	789

	790 bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder,

	791 RegExpParserState* state) {

	792 // The parser is assumed to be on the '<' in \k<name>.

	793 if (current() != '<') {

	794 ReportError(CStrVector("Invalid named reference"));

	795 return false;

	796 }

	797

	798 Advance();

	799 const ZoneVector<uc16>* name = ParseCaptureGroupName();

	800 if (name == nullptr) {

	801 return false;

	802 }

	803

	804 if (state->IsInsideCaptureGroup(name)) {

	805 builder->AddEmpty();

	806 } else {

	807 RegExpBackReference* atom = new (zone()) RegExpBackReference();

	808 atom->set_name(name);

	809

	810 builder->AddAtom(atom);

	811

	812 if (named_back_references_ == nullptr) {

	813 named_back_references_ =

	814 new (zone()) ZoneList<RegExpBackReference*>(1, zone());

	815 }

	816 named_back_references_->Add(atom, zone());

	817 }

	818

	819 return true;

	820 }

	821

	822 void RegExpParser::PatchNamedBackReferences() {

	823 if (named_back_references_ == nullptr) return;

	824

	825 if (named_captures_ == nullptr) {

	826 ReportError(CStrVector("Invalid named capture referenced"));

	827 return;

	828 }

	829

	830 // Look up and patch the actual capture for each named back reference.

	831 // TODO(jgruber): O(n^2), optimize if necessary.

	832

	833 for (int i = 0; i < named_back_references_->length(); i++) {

	834 RegExpBackReference* ref = named_back_references_->at(i);

	835

	836 int index = -1;

	837 for (const auto& capture : *named_captures_) {

	838 if (capture->name() == ref->name()) {

	839 index = capture->index();

	840 break;

	841 }

	842 }

	843

	844 if (index == -1) {

	845 ReportError(CStrVector("Invalid named capture referenced"));

	846 return;

	847 }

	848

	849 ref->set_capture(GetCapture(index));

	850 }

	851 }

678	852

679 RegExpCapture* RegExpParser::GetCapture(int index) {	853 RegExpCapture* RegExpParser::GetCapture(int index) {

680 // The index for the capture groups are one-based. Its index in the list is	854 // The index for the capture groups are one-based. Its index in the list is

681 // zero-based.	855 // zero-based.

682 int know_captures =	856 int know_captures =

683 is_scanned_for_captures_ ? capture_count_ : captures_started_;	857 is_scanned_for_captures_ ? capture_count_ : captures_started_;

684 DCHECK(index <= know_captures);	858 DCHECK(index <= know_captures);

685 if (captures_ == NULL) {	859 if (captures_ == NULL) {

686 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());	860 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());

687 }	861 }

688 while (captures_->length() < know_captures) {	862 while (captures_->length() < know_captures) {

689 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());	863 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());

690 }	864 }

691 return captures_->at(index - 1);	865 return captures_->at(index - 1);

692 }	866 }

693	867

	868 Handle<FixedArray> RegExpParser::CreateCaptureNameMap() {

	869 if (named_captures_ == nullptr \|\| named_captures_->is_empty())

	870 return Handle<FixedArray>();

	871

	872 Factory* factory = isolate()->factory();

	873

	874 int len = named_captures_->length() * 2;

	875 Handle<FixedArray> array = factory->NewFixedArray(len);

	876

	877 for (int i = 0; i < named_captures_->length(); i++) {

	878 RegExpCapture* capture = named_captures_->at(i);

	879 MaybeHandle<String> name = factory->NewStringFromTwoByte(capture->name());

	880 array->set(i * 2, *name.ToHandleChecked());

	881 array->set(i * 2 + 1, Smi::FromInt(capture->index()));

	882 }

	883

	884 return array;

	885 }

694	886

695 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {	887 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {

696 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {	888 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {

697 if (s->group_type() != CAPTURE) continue;	889 if (s->group_type() != CAPTURE) continue;

698 // Return true if we found the matching capture index.	890 // Return true if we found the matching capture index.

699 if (index == s->capture_index()) return true;	891 if (index == s->capture_index()) return true;

700 // Abort if index is larger than what has been parsed up till this state.	892 // Abort if index is larger than what has been parsed up till this state.

701 if (index > s->capture_index()) return false;	893 if (index > s->capture_index()) return false;

702 }	894 }

703 return false;	895 return false;

704 }	896 }

705	897

	898 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(

	899 const ZoneVector<uc16>* name) {

	900 DCHECK_NOT_NULL(name);

	901 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {

	902 if (s->capture_name() == nullptr) continue;

	903 if (s->capture_name() == name) return true;

	904 }

	905 return false;

	906 }

706	907

707 // QuantifierPrefix ::	908 // QuantifierPrefix ::

708 // { DecimalDigits }	909 // { DecimalDigits }

709 // { DecimalDigits , }	910 // { DecimalDigits , }

710 // { DecimalDigits , DecimalDigits }	911 // { DecimalDigits , DecimalDigits }

711 //	912 //

712 // Returns true if parsing succeeds, and set the min_out and max_out	913 // Returns true if parsing succeeds, and set the min_out and max_out

713 // values. Values are truncated to RegExpTree::kInfinity if they overflow.	914 // values. Values are truncated to RegExpTree::kInfinity if they overflow.

714 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {	915 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {

715 DCHECK_EQ(current(), '{');	916 DCHECK_EQ(current(), '{');

(...skipping 412 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1128 default:	1329 default:

1129 first = ParseClassCharacterEscape(CHECK_FAILED);	1330 first = ParseClassCharacterEscape(CHECK_FAILED);

1130 }	1331 }

1131 } else {	1332 } else {

1132 Advance();	1333 Advance();

1133 }	1334 }

1134	1335

1135 return CharacterRange::Singleton(first);	1336 return CharacterRange::Singleton(first);

1136 }	1337 }

1137	1338

1138

1139 static const uc16 kNoCharClass = 0;	1339 static const uc16 kNoCharClass = 0;

1140	1340

1141 // Adds range or pre-defined character class to character ranges.	1341 // Adds range or pre-defined character class to character ranges.

1142 // If char_class is not kInvalidClass, it's interpreted as a class	1342 // If char_class is not kInvalidClass, it's interpreted as a class

1143 // escape (i.e., 's' means whitespace, from '\s').	1343 // escape (i.e., 's' means whitespace, from '\s').

1144 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,	1344 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,

1145 uc16 char_class, CharacterRange range,	1345 uc16 char_class, CharacterRange range,

1146 Zone* zone) {	1346 Zone* zone) {

1147 if (char_class != kNoCharClass) {	1347 if (char_class != kNoCharClass) {

1148 CharacterRange::AddClassEscape(char_class, ranges, zone);	1348 CharacterRange::AddClassEscape(char_class, ranges, zone);

(...skipping 112 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1261 DCHECK(result->error.is_null());	1461 DCHECK(result->error.is_null());

1262 if (FLAG_trace_regexp_parser) {	1462 if (FLAG_trace_regexp_parser) {

1263 OFStream os(stdout);	1463 OFStream os(stdout);

1264 tree->Print(os, zone);	1464 tree->Print(os, zone);

1265 os << "\n";	1465 os << "\n";

1266 }	1466 }

1267 result->tree = tree;	1467 result->tree = tree;

1268 int capture_count = parser.captures_started();	1468 int capture_count = parser.captures_started();

1269 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;	1469 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;

1270 result->contains_anchor = parser.contains_anchor();	1470 result->contains_anchor = parser.contains_anchor();

	1471 result->capture_name_map = parser.CreateCaptureNameMap();

1271 result->capture_count = capture_count;	1472 result->capture_count = capture_count;

1272 }	1473 }

1273 return !parser.failed();	1474 return !parser.failed();

1274 }	1475 }

1275	1476

1276 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode)	1477 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode)

1277 : zone_(zone),	1478 : zone_(zone),

1278 pending_empty_(false),	1479 pending_empty_(false),

1279 ignore_case_(ignore_case),	1480 ignore_case_(ignore_case),

1280 unicode_(unicode),	1481 unicode_(unicode),

(...skipping 276 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1557 return false;	1758 return false;

1558 }	1759 }

1559 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),	1760 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),

1560 zone());	1761 zone());

1561 LAST(ADD_TERM);	1762 LAST(ADD_TERM);

1562 return true;	1763 return true;

1563 }	1764 }

1564	1765

1565 } // namespace internal	1766 } // namespace internal

1566 } // namespace v8	1767 } // namespace v8

OLD	NEW

« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »